[ { "id": "01wSNY5T60", "title": "Are Compressed Language Models Less Subgroup Robust?", "track": "main", "status": "Short Main", "tldr": "", "abstract": "To reduce the inference cost of large language models, model compression is increasingly used to create smaller scalable models. However, little is known about their robustness to minority subgroups defined by the labels and attributes of a dataset. In this paper, we investigate the effects of 18 different compression methods and settings on the subgroup robustness of BERT language models. We show that worst-group performance does not depend on model size alone, but also on the compression method used. Additionally, we find that model compression does not always worsen the performance on minority subgroups. Altogether, our analysis serves to further research into the subgroup robustness of model compression.", "keywords": "Language Model Compression;Subgroup Robustness", "primary_area": "", "supplementary_material": "", "author": "Leonidas Gee;Andrea Zugarini;Novi Quadrianto", "authorids": "~Leonidas_Gee1;~Andrea_Zugarini1;~Novi_Quadrianto1", "gender": ";M;M", "homepage": ";;http://www.sussex.ac.uk/profiles/335583", "dblp": ";198/0918;http://dblp.uni-trier.de/pers/hd/q/Quadrianto:Novi", "google_scholar": ";leSVEswAAAAJ;I-rLzGcAAAAJ", "or_profile": "~Leonidas_Gee1;~Andrea_Zugarini1;~Novi_Quadrianto1", "aff": ";Expert.ai Srl;Monash Indonesia", "aff_domain": ";expert.ai;monash.edu", "position": ";Researcher;Full Professor", "bibtex": "@inproceedings{\ngee2023are,\ntitle={Are Compressed Language Models Less Subgroup Robust?},\nauthor={Leonidas Gee and Andrea Zugarini and Novi Quadrianto},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=01wSNY5T60}\n}", "github": "", "project": "", "reviewers": "GktP;RMg6;gpj9", "site": "https://openreview.net/forum?id=01wSNY5T60", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;3", "excitement": "2;3;3", "reproducibility": "3;4;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";https://it.linkedin.com/in/andrea-zugarini-930a8898;", "aff_unique_index": "0;1", "aff_unique_norm": "Expert.ai;Monash University", "aff_unique_dep": ";", "aff_unique_url": "https://www.expert.ai;https://www.monash.edu.id", "aff_unique_abbr": "Expert.ai;Monash", "aff_campus_unique_index": "1", "aff_campus_unique": ";Indonesia", "aff_country_unique_index": "0;1", "aff_country_unique": "Italy;Indonesia" }, { "id": "05vb8rwGct", "title": "Towards Informative Few-Shot Prompt with Maximum Information Gain for In-Context Learning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large Language models (LLMs) possess the capability to engage In-context Learning (ICL) by leveraging a few demonstrations pertaining to a new downstream task as conditions. However, this particular learning paradigm suffers from high instability stemming from substantial variances induced by factors such as the input distribution of selected examples, their ordering, and prompt formats. In this work, we demonstrate that even when all these factors are held constant, the random selection of examples still results in high variance. Consequently, we aim to explore the informative ability of data examples by quantifying the Information Gain (IG) obtained in prediction after observing a given example candidate. Then we propose to sample those with maximum IG. Additionally, we identify the presence of template bias, which can lead to unfair evaluations of IG during the sampling process. To mitigate this bias, we introduce Calibration Before Sampling strategy. The experimental results illustrate that our proposed method can yield an average relative improvement of 14.3\\% across six classification tasks using three LLMs.", "keywords": "Large Language Model;In-context Learning;Information Gain", "primary_area": "", "supplementary_material": "", "author": "Hongfu Liu;Ye Wang", "authorids": "~Hongfu_Liu4;~Ye_Wang3", "gender": "M;M", "homepage": "https://waffle-liu.github.io/;https://smcnus.comp.nus.edu.sg/", "dblp": "32/9075-2;44/6292-7", "google_scholar": "6xFZDEcAAAAJ;https://scholar.google.com.sg/citations?user=CdgLLL8AAAAJ", "or_profile": "~Hongfu_Liu4;~Ye_Wang3", "aff": "National University of Singapore;National University of Singapore", "aff_domain": "nus.edu.sg;nus.edu.sg", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nliu2023towards,\ntitle={Towards Informative Few-Shot Prompt with Maximum Information Gain for In-Context Learning},\nauthor={Hongfu Liu and Ye Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=05vb8rwGct}\n}", "github": "", "project": "", "reviewers": "CxLY;iLKr;wbuo", "site": "https://openreview.net/forum?id=05vb8rwGct", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;2;4", "reproducibility": "4;3;4", "correctness": "4;2;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 5, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-0123-1260", "linkedin": "hongfu-liu-38585b184/;", "aff_unique_index": "0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "id": "06oozRd4jU", "title": "Graph vs. Sequence: An Empirical Study on Knowledge Forms for Knowledge-Grounded Dialogue", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Knowledge-grounded dialogue is a task of gener- ating an informative response based on both the dialogue history and external knowledge source. In general, there are two forms of knowledge: manu- ally annotated knowledge graphs and knowledge text from website. From various evaluation viewpoints, each type of knowledge has advantages and downsides. To further distinguish the principles and determinants from the intricate factors, we conduct a thorough experiment and study on the task to answer three essential questions. The ques- tions involve the choice of appropriate knowledge form, the degree of mutual effects between knowl- edge and the model selection, and the few-shot performance of knowledge. Supported by statistical shreds of evidence, we offer conclusive solutions and sensible suggestions for directions and standards of future research.", "keywords": "Knowledge-Grounded Dialogue;Empirical Study", "primary_area": "", "supplementary_material": "", "author": "Yizhe Yang;Heyan Huang;Yuhang Liu;Yang Gao", "authorids": "~Yizhe_Yang1;~Heyan_Huang1;~Yuhang_Liu7;~Yang_Gao2", "gender": "M;F;M;F", "homepage": ";https://cs.bit.edu.cn/szdw/jsml/js/hhy/index.htm;;https://cs.bit.edu.cn/szdw/jsml/bssds/78c31a2505434740a51076b614742941.htm", "dblp": ";27/8686;;89/4402-16", "google_scholar": "VqTU-GYAAAAJ;;jANV-KcAAAAJ;CJwLwzQAAAAJ", "or_profile": "~Yizhe_Yang1;~Heyan_Huang1;~Yuhang_Liu7;~Yang_Gao2", "aff": "Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology", "aff_domain": "bit.edu.cn;bit.edu.cn;bit.edu.cn;bit.edu.cn", "position": "PhD student;Full Professor;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nyang2023graph,\ntitle={Graph vs. Sequence: An Empirical Study on Knowledge Forms for Knowledge-Grounded Dialogue},\nauthor={Yizhe Yang and Heyan Huang and Yuhang Liu and Yang Gao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=06oozRd4jU}\n}", "github": "", "project": "", "reviewers": "E5Fq;eXkB;Ez2J", "site": "https://openreview.net/forum?id=06oozRd4jU", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "2;4;3", "reproducibility": "3;4;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-0320-7520;0000-0002-9558-7611;", "linkedin": ";;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Beijing Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.bit.edu.cn/", "aff_unique_abbr": "BIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "0C5C70C3n8", "title": "Mitigating Intrinsic Named Entity-Related Hallucinations of Abstractive Text Summarization", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Abstractive text summarization (ATS) is both important and challenging. Recent studies have shown that ATS still faces various forms of hallucination. Our study also indicates that a significant portion of hallucinations is named entity-related. They might appear in different forms, such as mistaken entities and erroneous entity references. The underlying causes implicit in data are complex: data samples pose varying learning conditions. Despite recent research efforts dedicated to named entity-related hallucinations, the solutions have not adequately addressed the varying learning conditions posed by data. This paper aims to bridge the gap in pursuit of reducing intrinsic named entity-related hallucinations. To do so, we propose an adaptive margin ranking loss to facilitate two entity-alignment learning methods to tackle them. Our experiment results show that our methods improve the used baseline model on automatic evaluation scores. The human evaluation also indicates that our methods jointly reduce the intrinsic named entity-related hallucinations considerably compared to the used baseline model.", "keywords": "Abstractive text summarization;named entity-related hallucinations;adaptive margin ranking loss", "primary_area": "", "supplementary_material": "", "author": "Jianbin Shen;Junyu Xuan;Christy Jie Liang", "authorids": "~Jianbin_Shen1;~Junyu_Xuan1;~Christy_Jie_Liang1", "gender": ";M;F", "homepage": ";https://www.uts.edu.au/staff/junyu.xuan;https://profiles.uts.edu.au/Jie.Liang", "dblp": ";08/10768;", "google_scholar": ";https://scholar.google.com.au/citations?user=POQ_yJUAAAAJ;5RRyg60AAAAJ", "or_profile": "~Jianbin_Shen1;~Junyu_Xuan1;~Christy_Jie_Liang1", "aff": "University of Technology Sydney;University of Technology Sydney;", "aff_domain": "uts.edu.au;uts.edu.au;", "position": "PhD student;Assistant Professor;", "bibtex": "@inproceedings{\nshen2023mitigating,\ntitle={Mitigating Intrinsic Named Entity-Related Hallucinations of Abstractive Text Summarization},\nauthor={Jianbin Shen and Junyu Xuan and Christy Jie Liang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0C5C70C3n8}\n}", "github": "", "project": "", "reviewers": "n18X;oSii;sW6c;VoaE", "site": "https://openreview.net/forum?id=0C5C70C3n8", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;3;4;4", "excitement": "3;4;3;3", "reproducibility": "4;4;4;4", "correctness": "3;3;4;2", "rating_avg": 3.0, "confidence_avg": 3.5, "excitement_avg": 3.25, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0004-7201-5604;;", "linkedin": ";;", "aff_unique_index": "0;0", "aff_unique_norm": "University of Technology Sydney", "aff_unique_dep": "", "aff_unique_url": "https://www.uts.edu.au", "aff_unique_abbr": "UTS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Australia" }, { "id": "0DkaimvWs0", "title": "Contrastive Pre-training for Personalized Expert Finding", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Expert finding could help route questions to potential suitable users to answer in Community Question Answering (CQA) platforms.\nHence it is essential to learn accurate representations of experts and questions according to the question text articles. Recently the pre-training and fine-tuning paradigms are powerful for natural language understanding, which has the potential for better question modeling and expert finding. Inspired by this, we propose a CQA-domain Contrastive Pre-training framework for Expert Finding, named CPEF, which could learn more comprehensive question representations. Specifically, considering that there is semantic complementation between question titles and bodies, during the domain pre-training phase, we propose a title-body contrastive learning task to enhance question representations, which directly treats the question title and the corresponding body as positive samples of each other, instead of designing extra data-augmentation strategies. Furthermore, a personalized tuning network is proposed to inject the personalized preferences of different experts during the fine-tuning phase. Extensive experimental results on six real-world datasets demonstrate that our method could achieve superior performance for expert finding.", "keywords": "Expert Finding;Recommender Systems;Community Question Answering;Pre-training", "primary_area": "", "supplementary_material": "", "author": "Qiyao Peng;Hongtao Liu;Zhepeng Lv;Qing Yang;Wenjun Wang", "authorids": "~Qiyao_Peng1;~Hongtao_Liu1;~Zhepeng_Lv1;~Qing_Yang11;~Wenjun_Wang4", "gender": ";M;F;M;M", "homepage": ";;https://www.duxiaoman.com/;https://www.duxiaoman.com/index;", "dblp": ";;;47/3749;21/5941-2.html", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;;;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Qiyao_Peng1;~Hongtao_Liu1;~Zhepeng_Lv1;~Qing_Yang11;~Wenjun_Wang4", "aff": ";Du Xiaoman Financial;Du Xiaoman Technology(BeiJing);Du Xiaoman Technology(BeiJing);Tianjin University", "aff_domain": ";duxiaoman.com;duxiaoman.com;duxiaoman.com;tju.edu.cn", "position": ";Researcher;Researcher;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\npeng2023contrastive,\ntitle={Contrastive Pre-training for Personalized Expert Finding},\nauthor={Qiyao Peng and Hongtao Liu and Zhepeng Lv and Qing Yang and Wenjun Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0DkaimvWs0}\n}", "github": "", "project": "", "reviewers": "L924;hASg;QLNj", "site": "https://openreview.net/forum?id=0DkaimvWs0", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;4;5", "excitement": "3;3;4", "reproducibility": "4;4;3", "correctness": "3;3;4", "rating_avg": 2.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;;", "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Du Xiaoman Financial;Du Xiaoman Technology;Tianjin University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.duxiaoman.com;;http://www.tju.edu.cn", "aff_unique_abbr": "DXF;;TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "0DyJbE93XO", "title": "A Thorough Examination on Zero-shot Dense Retrieval", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recent years have witnessed the significant advance in dense retrieval (DR) based on powerful pre-trained language models (PLM). DR models have achieved excellent performance in several benchmark datasets, while they are shown to be not as competitive as traditional sparse retrieval models (e.g., BM25) in a zero-shot retrieval setting. However, in the related literature, there still lacks a detailed and comprehensive study on zero-shot retrieval. In this paper, we present the first thorough examination of the zero-shot capability of DR models. We aim to identify the key factors and analyze how they affect zero-shot retrieval performance. In particular, we discuss the effect of several key factors related to source training set, analyze the potential bias from the target dataset, and review and compare existing zero-shot DR models. Our findings provide important evidence to better understand and develop zero-shot DR models.", "keywords": "Zero-shot dense retrieval; Information retrieval", "primary_area": "", "supplementary_material": "", "author": "Ruiyang Ren;Yingqi Qu;Jing Liu;Xin Zhao;Qifei Wu;Yuchen DIng;Hua Wu;Haifeng Wang;Ji-Rong Wen", "authorids": "~Ruiyang_Ren1;~Yingqi_Qu1;~Jing_Liu7;~Xin_Zhao10;~Qifei_Wu1;~Yuchen_DIng1;~Hua_Wu4;~Haifeng_Wang3;~Ji-Rong_Wen1", "gender": "M;;M;M;M;M;M;M;F", "homepage": "https://rui-yang.ren;;https://legendarydan.github.io;https://gsai.ruc.edu.cn/addons/teacher/index/info.html?user_id=5&ruccode=20140041&ln=cn;https://github.com/ylf-Ng;http://www.baidu.com;https://haifengwang.net/;https://gsai.ruc.edu.cn/english/jrwen;https://wuhuanlp.github.io/", "dblp": "265/6402;276/9174.html;72/2590-22;https://dblp.uni-trier.de/pid/52/8700.html;;;10/5209-1.html;w/JRWen;27/6045-3", "google_scholar": "KpIEBYMAAAAJ;;_NtB74oAAAAJ;JNhNacoAAAAJ;;;jgy4jCAAAAAJ;tbxCHJgAAAAJ;9X2ThuAAAAAJ", "or_profile": "~Ruiyang_Ren1;~Yingqi_Qu1;~Jing_Liu7;~Xin_Zhao10;~Qifei_Wu1;~Yuchen_DIng1;~Haifeng_Wang3;~Ji-Rong_Wen1;~hua_wu1", "aff": "Renmin University of China;;Baidu;Renmin University of China;Beijing University of Posts and Telecommunications;;Baidu;Renmin University of China;Baidu", "aff_domain": "ruc.edu.cn;;baidu.com;ruc.edu.cn;bupt.edu.cn;;baidu.com;ruc.edu.cn;baidu.com", "position": "PhD student;;Researcher;Full Professor;MS student;;CTO;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nren2023a,\ntitle={A Thorough Examination on Zero-shot Dense Retrieval},\nauthor={Ruiyang Ren and Yingqi Qu and Jing Liu and Xin Zhao and Qifei Wu and Yuchen DIng and Hua Wu and Haifeng Wang and Ji-Rong Wen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0DyJbE93XO}\n}", "github": "", "project": "", "reviewers": "19Ji;8buS;SQPY", "site": "https://openreview.net/forum?id=0DyJbE93XO", "pdf_size": 0, "rating": "3;3;3", "confidence": "1;2;3", "excitement": "3;3;3", "reproducibility": "2;3;3", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 2.0, "excitement_avg": 3.0, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-8333-6196;;;0000-0002-0672-7468;0000-0002-9777-9676;0000-0001-8254-1561", "linkedin": ";;;;;;;;", "aff_unique_index": "0;1;0;2;1;0;1", "aff_unique_norm": "Renmin University of China;Baidu;Beijing University of Posts and Telecommunications", "aff_unique_dep": ";Baidu, Inc.;", "aff_unique_url": "http://www.ruc.edu.cn;https://www.baidu.com;http://www.bupt.edu.cn/", "aff_unique_abbr": "RUC;Baidu;BUPT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "0EQ4z8n5rp", "title": "Global Voices, Local Biases: Socio-Cultural Prejudices across Languages", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Human biases are ubiquitous but not uniform: disparities exist across linguistic, cultural, and societal borders. As large amounts of recent literature suggest, language models (LMs) trained on human data can reflect and often amplify the effects of these social biases. However, the vast majority of existing studies on bias are heavily skewed towards Western and European languages. \nIn this work, we scale the Word Embedding Association Test (WEAT) to 24 languages, enabling broader studies and yielding interesting findings about LM bias. We additionally enhance this data with culturally relevant information for each language, capturing local contexts on a global scale. \nFurther, to encompass more widely prevalent societal biases, we examine new bias dimensions across toxicity, ableism, and more. \nMoreover, we delve deeper into the Indian linguistic landscape, conducting a comprehensive regional bias analysis across six prevalent Indian languages. \nFinally, we highlight the significance of these social biases and the new dimensions through an extensive comparison of embedding methods, reinforcing the need to address them in pursuit of more equitable language models.", "keywords": "bias;multilinguality;language models", "primary_area": "", "supplementary_material": "", "author": "Anjishnu Mukherjee;Chahat Raj;Ziwei Zhu;Antonios Anastasopoulos", "authorids": "~Anjishnu_Mukherjee1;~Chahat_Raj1;~Ziwei_Zhu1;~Antonios_Anastasopoulos1", "gender": "M;F;M;M", "homepage": "https://iamshnoo.github.io;https://chahatraj.github.io;https://zziwei.github.io/;http://www.cs.gmu.edu/~antonis/", "dblp": "339/6660;295/2494;159/9916;148/9479", "google_scholar": "3849YpIAAAAJ;K8EKC4gAAAAJ;3S6pM7wAAAAJ;g_G_SNAAAAAJ", "or_profile": "~Anjishnu_Mukherjee1;~Chahat_Raj1;~Ziwei_Zhu1;~Antonios_Anastasopoulos1", "aff": "George Mason University;George Mason University;George Mason University;George Mason University", "aff_domain": "gmu.edu;gmu.edu;gmu.edu;gmu.edu", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nmukherjee2023global,\ntitle={Global Voices, Local Biases: Socio-Cultural Prejudices across Languages},\nauthor={Anjishnu Mukherjee and Chahat Raj and Ziwei Zhu and Antonios Anastasopoulos},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0EQ4z8n5rp}\n}", "github": "", "project": "", "reviewers": "LVwA;LAHL;L4qs", "site": "https://openreview.net/forum?id=0EQ4z8n5rp", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;4", "excitement": "4;4;3", "reproducibility": "3;4;2", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-4012-8466;0000-0003-0083-6812;0000-0002-3990-4774;0000-0002-8544-246X", "linkedin": "anjishnumukherjee/;chahatraj/;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "George Mason University", "aff_unique_dep": "", "aff_unique_url": "https://www.gmu.edu", "aff_unique_abbr": "GMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "0GO8Dtl8lJ", "title": "Unleashing the Multilingual Encoder Potential: Boosting Zero-Shot Performance via Probability Calibration", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Pretrained multilingual encoder models can directly perform zero-shot multilingual tasks or linguistic probing by reformulating the input examples into cloze-style prompts.\n This is accomplished by predicting the probabilities of the label words at the masked token position, without requiring any updates to the model parameters.\n However, the performance of this method is limited by the model's bias toward predicting label words which frequently occurred during the pretraining. \n These words typically receive high probabilities. \n To address this issue, we combine the models with calibration techniques which modify the probabilities of label words predicted by the models.\nWe first validate the effectiveness of a proposed simple calibration method together with other existing techniques on monolingual encoders in both zero- and few-shot scenarios.\nWe subsequently employ these calibration techniques on multilingual encoders, resulting in substantial performance improvements across a wide range of tasks.", "keywords": "prompt learning;multilingual encoders;calibration", "primary_area": "", "supplementary_material": "", "author": "Ercong Nie;Helmut Schmid;Hinrich Schuetze", "authorids": "~Ercong_Nie1;~Helmut_Schmid1;~Hinrich_Schuetze3", "gender": "M;M;M", "homepage": "https://cis.lmu.de/~nie;https://www.cis.uni-muenchen.de/~schmid/;https://www.cis.uni-muenchen.de/schuetze/", "dblp": "336/4767;79/3420;s/HinrichSchutze", "google_scholar": "dx00mD4AAAAJ;https://scholar.google.de/citations?hl=de;", "or_profile": "~Ercong_Nie1;~Helmut_Schmid1;~Hinrich_Schuetze3", "aff": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Center for Information and Language Processing;Center for Information and Language Processing", "aff_domain": "lmu.de;cis.lmu.de;lmu.de", "position": "PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nnie2023unleashing,\ntitle={Unleashing the Multilingual Encoder Potential: Boosting Zero-Shot Performance via Probability Calibration},\nauthor={Ercong Nie and Helmut Schmid and Hinrich Schuetze},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0GO8Dtl8lJ}\n}", "github": "", "project": "", "reviewers": "Pst8;LfMb;3ZFj", "site": "https://openreview.net/forum?id=0GO8Dtl8lJ", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;5", "excitement": "3;3;4", "reproducibility": "4;3;5", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-1453-4460;;", "linkedin": "ercong-nie-6375a5104/;;", "aff_unique_index": "0;1;1", "aff_unique_norm": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Center for Information and Language Processing", "aff_unique_dep": ";", "aff_unique_url": "https://www.lmu.de;", "aff_unique_abbr": "LMU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0", "aff_country_unique": "Germany;" }, { "id": "0JepdeBcDk", "title": "An Attribution Method for Siamese Encoders", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Despite the success of Siamese encoder models such as sentence\ntransformers (ST), little is known about the aspects of inputs they\npay attention to. A barrier is that their predictions cannot be\nattributed to individual features, as they compare two inputs rather\nthan processing a single one.\nThis paper derives a local attribution method for Siamese encoders by generalizing\nthe principle of integrated gradients to models with multiple inputs.\nThe output takes the form of feature-pair attributions and in case of STs it can be reduced to a token--token matrix. \nOur method involves the introduction of integrated Jacobians and inherits the advantageous formal properties of integrated gradients: it accounts for the model's full computation graph and is guaranteed to converge to the actual prediction.\nA pilot study shows that in case of STs few token pairs can dominate\npredictions and that STs preferentially focus on nouns and verbs.\nFor accurate predictions, however, they need to attend to the majority of tokens and parts of speech.", "keywords": "feature attribution;interpretability;explainability;siamese encoder;sentence transformer;integrated gradients;integrated Jacobians", "primary_area": "", "supplementary_material": "", "author": "Lucas Moeller;Dmitry Nikolaev;Sebastian Pad\u00f3", "authorids": "~Lucas_Moeller1;~Dmitry_Nikolaev1;~Sebastian_Pad\u00f32", "gender": "M;M;M", "homepage": ";https://dnikolaev.com;https://nlpado.de/~sebastian", "dblp": "325/5400;264/5979;p/SebastianPado", "google_scholar": "Gqt9NnQAAAAJ;Myl8EpkAAAAJ;vKqag_AAAAAJ", "or_profile": "~Lucas_Moeller1;~Dmitry_Nikolaev1;~Sebastian_Pado1", "aff": "University of Stuttgart;University of Stuttgart, Universit\u00e4t Stuttgart;University of Stuttgart, Universit\u00e4t Stuttgart", "aff_domain": "ims.uni-stuttgart.de;ims.uni-stuttgart.de;ims.uni-stuttgart.de", "position": "PhD student;Postdoc;Professor", "bibtex": "@inproceedings{\nmoeller2023an,\ntitle={An Attribution Method for Siamese Encoders},\nauthor={Lucas Moeller and Dmitry Nikolaev and Sebastian Pad{\\'o}},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0JepdeBcDk}\n}", "github": "", "project": "", "reviewers": "Ug1E;4954;PsQ5", "site": "https://openreview.net/forum?id=0JepdeBcDk", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;2;4", "excitement": "4;3;3", "reproducibility": "3;3;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-7921-7883;0000-0002-3034-9794;", "linkedin": "lucas-moeller-a031b71aa/;dmitry-nikolaev-9421405a/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Stuttgart", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-stuttgart.de", "aff_unique_abbr": "USTuttgart", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "0KYSlQdMu6", "title": "TacoPrompt: A Collaborative Multi-Task Prompt Learning Method for Self-Supervised Taxonomy Completion", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Automatic taxonomy completion aims to attach the emerging concept to an appropriate pair of hypernym and hyponym in the existing taxonomy. Existing methods suffer from the overfitting to leaf-only problem caused by imbalanced leaf and non-leaf samples when training the newly initialized classification head. Besides, they only leverage subtasks, namely attaching the concept to its hypernym or hyponym, as auxiliary supervision for representation learning yet neglect the effects of subtask results on the final prediction. To address the aforementioned limitations, we propose TacoPrompt, a Collaborative Multi-Task Prompt Learning Method for Self-Supervised Taxonomy Completion. First, we perform triplet semantic matching using the prompt learning paradigm to effectively learn non-leaf attachment ability from imbalanced training samples. Second, we design the result context to relate the final prediction to the subtask results by a contextual approach, enhancing prompt-based multi-task learning. Third, we leverage a two-stage retrieval and re-ranking approach to improve the inference efficiency. Experimental results on three datasets show that TacoPrompt achieves state-of-the-art taxonomy completion performance. Codes are available at https://github.com/cyclexu/TacoPrompt.", "keywords": "Taxonomy completion;prompt learning;self-supervised learning;multi-task learning", "primary_area": "", "supplementary_material": "", "author": "Hongyuan Xu;Ciyi Liu;Yuhang Niu;Yunong Chen;Xiangrui Cai;Yanlong Wen;Xiaojie Yuan", "authorids": "~Hongyuan_Xu1;~Ciyi_Liu1;~Yuhang_Niu1;~Yunong_Chen1;~Xiangrui_Cai1;~Yanlong_Wen1;~Xiaojie_Yuan1", "gender": "M;M;M;M;M;;", "homepage": ";https://github.com/virtual-world-oss;https://github.com/nyh-a;https://chenyunong.com/;https://dbis.nankai.edu.cn/2023/0322/c12139a506911/page.htm;;https://dbis.nankai.edu.cn/2023/0322/c12139a506919/page.htm", "dblp": "220/2889;;;274/3157;137/0504;;79/2280", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;;;Y9vuweEAAAAJ;;", "or_profile": "~Hongyuan_Xu1;~Ciyi_Liu1;~Yuhang_Niu1;~Yunong_Chen1;~Xiangrui_Cai1;~Yanlong_Wen1;~Xiaojie_Yuan1", "aff": "Nankai University;Nankai University;Nankai University;Nankai University;Nankai University;;Nankai University", "aff_domain": "nku.nankai.edu.cn;nankai.edu.cn;nankai.edu.cn;nankai.edu.cn;nankai.edu.cn;;nankai.edu.cn", "position": "PhD student;Undergrad student;Undergrad student;MS student;Associate Professor;;Full Professor", "bibtex": "@inproceedings{\nxu2023tacoprompt,\ntitle={TacoPrompt: A Collaborative Multi-Task Prompt Learning Method for Self-Supervised Taxonomy Completion},\nauthor={Hongyuan Xu and Ciyi Liu and Yuhang Niu and Yunong Chen and Xiangrui Cai and Yanlong Wen and Xiaojie Yuan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0KYSlQdMu6}\n}", "github": "", "project": "", "reviewers": "Z6sr;6PQe;tsdR;j9Kp", "site": "https://openreview.net/forum?id=0KYSlQdMu6", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;4;5;3", "excitement": "3;3;4;2", "reproducibility": "4;3;4;3", "correctness": "3;3;4;3", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 3.0, "reproducibility_avg": 3.5, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;0000-0002-5876-6856", "linkedin": ";;;;;;", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Nankai University", "aff_unique_dep": "", "aff_unique_url": "http://www.nankai.edu.cn", "aff_unique_abbr": "NKU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "0LXEvcD3dB", "title": "SpeechGPT: Empowering Large Language Models with Intrinsic Cross-Modal Conversational Abilities", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Multi-modal large language models are regarded as a crucial step towards Artificial General Intelligence~(AGI) and have garnered significant interest with the emergence of ChatGPT. However, current speech-language models typically adopt the cascade paradigm, preventing inter-modal knowledge transfer. In this paper, we propose SpeechGPT, a large language model with intrinsic cross-modal conversational abilities, capable of perceiving and generating multi-modal content. With discrete speech representations, we construct SpeechInstruct, the first large-scale cross-modal speech instruction dataset. Additionally, we employ a three-stage training strategy that includes modality-adaptation pre-training, cross-modal instruction fine-tuning, and chain-of-modality instruction fine-tuning. The experimental results demonstrate that SpeechGPT has an impressive capacity to follow cross-modal human instructions and highlight the potential of handling multiple modalities with one model. Code and models are available in \\url{https://github.com/0nutation/SpeechGPT}. Demos are shown in \\url{https://0nutation.github.io/SpeechGPT.github.io/}.", "keywords": "large language model;speech;multi-modal", "primary_area": "", "supplementary_material": "", "author": "Dong Zhang;Shimin Li;Xin Zhang;Jun Zhan;Pengyu Wang;Yaqian Zhou;Xipeng Qiu", "authorids": "~Dong_Zhang9;~Shimin_Li1;~Xin_Zhang36;~Jun_Zhan2;~Pengyu_Wang2;~Yaqian_Zhou1;~Xipeng_Qiu1", "gender": "M;M;M;M;M;F;M", "homepage": ";;https://github.com/ZhangXInFD;https://junzhan2000.github.io/;;;https://xpqiu.github.io/", "dblp": ";;;;14/3832-6;34/389-1.html;69/1395", "google_scholar": "ScVbeu0AAAAJ;0xxkGjMAAAAJ;https://scholar.google.com/citations?hl=en;GfrhPE8AAAAJ;https://scholar.google.co.jp/citations?user=NGniJS0AAAAJ;;Pq4Yp_kAAAAJ", "or_profile": "~Dong_Zhang9;~Shimin_Li1;~Xin_Zhang36;~Jun_Zhan2;~Pengyu_Wang2;~Yaqian_Zhou1;~Xipeng_Qiu1", "aff": "Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "MS student;PhD student;MS student;MS student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2023speechgpt,\ntitle={Speech{GPT}: Empowering Large Language Models with Intrinsic Cross-Modal Conversational Abilities},\nauthor={Dong Zhang and Shimin Li and Xin Zhang and Jun Zhan and Pengyu Wang and Yaqian Zhou and Xipeng Qiu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0LXEvcD3dB}\n}", "github": "", "project": "", "reviewers": "mbz5;9UAb;bU86", "site": "https://openreview.net/forum?id=0LXEvcD3dB", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "4;4;4", "reproducibility": "3;3;3", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;0000-0001-7163-5247", "linkedin": ";;;;;;", "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "0M2m9GUTLN", "title": "Fair Text Classification with Wasserstein Independence", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Group fairness is a central research topic in text classification, where reaching fair treatment between sensitive groups (e.g. women vs. men) remains an open challenge. This paper presents a novel method for mitigating biases in neural text classification, agnostic to the model architecture. Considering the difficulty to distinguish fair from unfair information in a text encoder, we take inspiration from adversarial training to induce Wasserstein independence between representations learned to predict our target label and the ones learned to predict some sensitive attribute. \nOur approach provides two significant advantages. Firstly, it does not require annotations of sensitive attributes in both testing and training data. This is more suitable for real-life scenarios compared to existing methods that require annotations of sensitive attributes at train time. Secondly, our approach exhibits a comparable or better fairness-accuracy trade-off compared to existing methods.", "keywords": "Text Classification;Fairness;Wasserstein", "primary_area": "", "supplementary_material": "", "author": "Thibaud Leteno;Antoine Gourru;Charlotte Laclau;R\u00e9mi Emonet;Christophe Gravier", "authorids": "~Thibaud_Leteno1;~Antoine_Gourru1;~Charlotte_Laclau2;~R\u00e9mi_Emonet1;~Christophe_Gravier1", "gender": "Not Specified;M;F;M;M", "homepage": "https://fr.linkedin.com/in/thibaud-leteno-398221182;http://antoinegourru.com;https://laclauc.github.io/index.html;https://home.heeere.com;", "dblp": ";219/8435;153/2640;53/2975;93/1485", "google_scholar": ";uc5M9lEAAAAJ;https://scholar.google.fr/citations?user=47i5TpcAAAAJ;https://scholar.google.fr/citations?hl=fr;", "or_profile": "~Thibaud_Leteno1;~Antoine_Gourru1;~Charlotte_Laclau2;~R\u00e9mi_Emonet1;~Christophe_Gravier1", "aff": "Universit\u00e9 Jean Monnet;Universit\u00e9 Jean Monnet;T\u00e9lecom Paris;University Jean Monnet;University Jean Monnet", "aff_domain": "univ-st-etienne.fr;univ-st-etienne.fr;telecom-paris.fr;univ-st-etienne.fr;univ-st-etienne.fr", "position": "PhD student;Associate Professor;Associate Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nleteno2023fair,\ntitle={Fair Text Classification with Wasserstein Independence},\nauthor={Thibaud Leteno and Antoine Gourru and Charlotte Laclau and R{\\'e}mi Emonet and Christophe Gravier},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0M2m9GUTLN}\n}", "github": "", "project": "", "reviewers": "6C1r;nGpp;Xr2p", "site": "https://openreview.net/forum?id=0M2m9GUTLN", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;4", "excitement": "3;4;3", "reproducibility": "3;5;4", "correctness": "3;5;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-1870-1329;0000-0001-8586-6302", "linkedin": ";;;remi-emonet-0722216/;christophe-gravier-09311112", "aff_unique_index": "0;0;1;2;2", "aff_unique_norm": "Universit\u00e9 Jean Monnet;T\u00e9l\u00e9com Paris;University Jean Monnet", "aff_unique_dep": ";;", "aff_unique_url": "https://www.univ-jean-monnet.fr;https://www.telecom-paris.fr;https://www.univ-jean-monnet.fr", "aff_unique_abbr": "UJM;T\u00e9l\u00e9com Paris;UJM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "France" }, { "id": "0ODPaEbHxG", "title": "Measuring Pointwise $\\mathcal{V}$-Usable Information In-Context-ly", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In-context learning (ICL) is a new learning paradigm that has gained popularity along with the development of large language models. In this work, we adapt a recently proposed hardness metric, pointwise $\\mathcal{V}$-usable information (PVI), to an in-context version (in-context PVI). Compared to the original PVI, in-context PVI is more efficient in that it requires only a few exemplars and does not require fine-tuning. We conducted a comprehensive empirical analysis to evaluate the reliability of in-context PVI. Our findings indicate that in-context PVI estimates exhibit similar characteristics to the original PVI. Specific to the in-context setting, we show that in-context PVI estimates remain consistent across different exemplar selections and numbers of shots. The variance of in-context PVI estimates across different exemplar selections is insignificant, which suggests that in-context PVI estimates are stable. Furthermore, we demonstrate how in-context PVI can be employed to identify challenging instances. Our work highlights the potential of in-context PVI and provides new insights into the capabilities of ICL.", "keywords": "Hardness;Pointwise V-Usable Information;In-Context Learning", "primary_area": "", "supplementary_material": "", "author": "Sheng Lu;Shan Chen;Yingya Li;Danielle Bitterman;Guergana K Savova;Iryna Gurevych", "authorids": "~Sheng_Lu1;~Shan_Chen1;~Yingya_Li1;~Danielle_Bitterman1;~Guergana_K_Savova1;~Iryna_Gurevych1", "gender": "M;M;;F;F;", "homepage": ";https://shanchen.dev;;https://aim.hms.harvard.edu/;;", "dblp": ";;205/9311;281/1619;;", "google_scholar": ";;;aCFYAEsAAAAJ;9538Cr4AAAAJ;", "or_profile": "~Sheng_Lu1;~Shan_Chen1;~Yingya_Li1;~Danielle_Bitterman1;~Guergana_K_Savova1;~Iryna_Gurevych1", "aff": "Technische Universit\u00e4t Darmstadt;Maastricht University ;Harvard University;Harvard University;Harvard University;", "aff_domain": "tu-darmstadt.de;maastrichtuniversity.nl;harvard.edu;harvard.edu;harvard.edu;", "position": "PhD student;PhD student;Postdoc;Assistant Professor;Full Professor;", "bibtex": "@inproceedings{\nlu2023measuring,\ntitle={Measuring Pointwise \\${\\textbackslash}mathcal\\{V\\}\\$-Usable Information In-Context-ly},\nauthor={Sheng Lu and Shan Chen and Yingya Li and Danielle Bitterman and Guergana K Savova and Iryna Gurevych},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0ODPaEbHxG}\n}", "github": "", "project": "", "reviewers": "pfMK;Ydp3;jyth", "site": "https://openreview.net/forum?id=0ODPaEbHxG", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;4;4", "excitement": "3;4;3", "reproducibility": "4;4;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-8696-4024;;;;0000-0002-5887-200X;", "linkedin": ";;yingyali;;;", "aff_unique_index": "0;1;2;2;2", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt;Maastricht University;Harvard University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tu-darmstadt.de;https://www.maastrichtuniversity.nl;https://www.harvard.edu", "aff_unique_abbr": "TUD;MU;Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;2;2", "aff_country_unique": "Germany;Netherlands;United States" }, { "id": "0OtGfwj8eB", "title": "Reinforcement Replaces Supervision: Query focused Summarization using Deep Reinforcement Learning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Query-focused Summarization (QfS) deals with systems that generate summaries from document(s) based on a query. Motivated by the insight that Reinforcement Learning (RL) provides a generalization to Supervised Learning (SL) for Natural Language Generation, and thereby performs better (empirically) than SL, we use an RL-based approach for this task of QfS. Additionally, we also resolve the conflict of employing RL in Transformers with Teacher Forcing. We develop multiple Policy Gradient networks, trained on various reward signals: ROUGE, BLEU, and Semantic Similarity, which lead to a $\\mathit{10}$-point improvement over the State-of-the-Art approach on the ROUGE-L metric for a benchmark dataset (ELI5). We also show performance of our approach in zero-shot setting for another benchmark dataset (DebatePedia) -- our approach leads to results comparable to baselines, which were specifically trained on DebatePedia. To aid the RL training, we propose a better semantic similarity reward, enabled by a novel Passage Embedding scheme developed using Cluster Hypothesis. Lastly, we contribute a gold-standard test dataset to further research in QfS and Long-form Question Answering (LfQA).", "keywords": "reinforcement learning for long text;query focused summarization;passage embedding;long form question answering", "primary_area": "", "supplementary_material": "", "author": "Swaroop Nath;Pushpak Bhattacharyya;Harshad Khadilkar", "authorids": "~Swaroop_Nath1;~Pushpak_Bhattacharyya1;~Harshad_Khadilkar1", "gender": "M;M;M", "homepage": "https://www.cse.iitb.ac.in/~swaroopnath/;https://www.cse.iitb.ac.in/~pb/;https://www.aero.iitb.ac.in/home/node/172", "dblp": "362/2973;p/PushpakBhattacharyya;144/2689", "google_scholar": "pfMk3uMAAAAJ;https://scholar.google.com.tw/citations?user=vvg-pAkAAAAJ;y9efmmsAAAAJ", "or_profile": "~Swaroop_Nath1;~Pushpak_Bhattacharyya1;~Harshad_Khadilkar1", "aff": "Indian Institute of Technology Bombay, Indian Institute of Technology, Bombay;Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology;Tata Consultancy Services Limited, India", "aff_domain": "cse.iitb.ac.in;iitb.ac.in;tcs.com", "position": "MS student;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nnath2023reinforcement,\ntitle={Reinforcement Replaces Supervision: Query focused Summarization using Deep Reinforcement Learning},\nauthor={Swaroop Nath and Pushpak Bhattacharyya and Harshad Khadilkar},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0OtGfwj8eB}\n}", "github": "", "project": "", "reviewers": "ydP6;G1x8;VnCj", "site": "https://openreview.net/forum?id=0OtGfwj8eB", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "4;3;3", "reproducibility": "3;3;1", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-3601-778X", "linkedin": ";pushpakbh/?originalSubdomain=in;https://linkedin.com/in/harshad-khadilkar-80609959", "aff_unique_index": "0;1;2", "aff_unique_norm": "Indian Institute of Technology Bombay;Indian Institute of Technology, Bombay;Tata Consultancy Services Limited", "aff_unique_dep": ";;", "aff_unique_url": "https://www.iitb.ac.in;https://www.iitb.ac.in;https://www.tcs.com", "aff_unique_abbr": "IIT Bombay;IIT Bombay;TCS", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Bombay;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "India" }, { "id": "0Rdp7a3y2H", "title": "Adversarial Text Generation by Search and Learning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recent research has shown that evaluating the robustness of natural language processing models using textual attack methods is significant. However, most existing text attack methods only use heuristic replacement strategies or language models to generate replacement words at the word level. The blind pursuit of high attack success rates makes it difficult to ensure the quality of the generated adversarial text. As a result, adversarial text is often difficult for humans to understand. In fact, many methods that perform well in terms of text attacks often generate adversarial text with poor quality. To address this important gap, our work treats black-box text attack as an unsupervised text generation problem and proposes a search and learning framework for Adversarial Text Generation by Search and Learning (ATGSL) and develops three adversarial attack methods (ATGSL-SA, ATGSL-BM, ATGSL-FUSION) for black box text attacks. We first apply a heuristic search attack algorithm (ATGSL-SA) and a linguistic thesaurus to generate adversarial samples with high semantic similarity. After this process, we train a conditional generative model to learn from the search results while smoothing out search noise. Moreover, we design an efficient ATGSL-BM attack algorithm based on the text generator. Furthermore, we propose a hybrid attack method (ATGSL-FUSION) that integrates the advantages of ATGSL-SA and ATGSL-BM to enhance attack effectiveness. Our proposed attack algorithms are significantly superior to the most advanced methods in terms of attack efficiency and adversarial text quality.", "keywords": "Adversarial robustness;Adversarial training;Unsupervised Text Generation", "primary_area": "", "supplementary_material": "", "author": "Guoyi Li;Bingkang Shi;Zongzhen Liu;Dehan Kong;Yulei Wu;Xiaodan Zhang;Longtao Huang;Honglei Lyu", "authorids": "~Guoyi_Li2;~Bingkang_Shi1;~Zongzhen_Liu1;~Dehan_Kong1;~Yulei_Wu1;~Xiaodan_Zhang3;~Longtao_Huang2;~Honglei_Lyu1", "gender": "M;M;F;M;;;M;", "homepage": "https://blog.csdn.net/Littlewhite520?spm=1000.2123.3001.5343;https://github.com/BingkangShi;https://www.linkedin.com/in/zongzhen-liu-5165446b;;;https://people.ucas.ac.cn/~iiezxd;http://people.ucas.edu.cn/~huanglongtao?language=en;https://www.semanticscholar.org/author/Honglei-Lv/", "dblp": "65/8028;361/7564.html;139/4171;;;29/2631-2;76/10119;342/7824", "google_scholar": ";;;Y6wbzl4AAAAJ;;;EQDfV9cAAAAJ;", "or_profile": "~Guoyi_Li2;~Bingkang_Shi1;~Zongzhen_Liu1;~Dehan_Kong1;~Yulei_Wu1;~Xiaodan_Zhang3;~Longtao_Huang2;~Honglei_Lyu1", "aff": "Institute of Information Engineering, Chinese Academy of Sciences; Institute of Information Engineering, Chinese Academy of Sciences;Chinese Academy of Sciences;imean.ai;;University of Chinese Academy of Sciences;Alibaba Group;Institute of Information Engineering, Chinese Academy of Sciences", "aff_domain": "iie.ac.cn;iie.ac.cn;ac.cn;imean.ai;;ucas.ac.cn;alibaba-inc.com;iie.ac.cn", "position": "PhD student;PhD student;PhD student;Researcher;;Full Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\nli2023adversarial,\ntitle={Adversarial Text Generation by Search and Learning},\nauthor={Guoyi Li and Bingkang Shi and Zongzhen Liu and Dehan Kong and Yulei Wu and Xiaodan Zhang and Longtao Huang and Honglei Lyu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0Rdp7a3y2H}\n}", "github": "", "project": "", "reviewers": "xqBr;9DV4;UFnh", "site": "https://openreview.net/forum?id=0Rdp7a3y2H", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;4;2", "excitement": "3;3;4", "reproducibility": "2;4;5", "correctness": "3;4;4", "rating_avg": 3.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 9, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0009-5389-139X;;;;;;", "linkedin": ";;zongzhen-liu-5165446b;dehan-kong-0355a8181/;;;;", "aff_unique_index": "0;0;0;1;2;3;0", "aff_unique_norm": "Chinese Academy of Sciences;imean.ai;University of Chinese Academy of Sciences;Alibaba Group", "aff_unique_dep": "Institute of Information Engineering;;;", "aff_unique_url": "http://www.cas.cn;https://www.imean.ai;http://www.ucas.ac.cn;https://www.alibaba.com", "aff_unique_abbr": "CAS;;UCAS;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "China;United States" }, { "id": "0SF6Kr1lrx", "title": "Leap-of-Thought: Accelerating Transformers via Dynamic Token Routing", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Computational inefficiency in transformers has been a long-standing challenge, hindering the deployment in resource-constrained or real-time applications. One promising approach to mitigate this limitation is to progressively remove less significant tokens, given that the sequence length strongly contributes to the inefficiency. However, this approach entails a potential risk of losing crucial information due to the irrevocable nature of token removal. In this paper, we introduce Leap-of-Thought (LoT), a novel token reduction approach that dynamically routes tokens within layers. Unlike previous work that irrevocably discards tokens, LoT enables tokens to `leap' across layers. This ensures that all tokens remain accessible in subsequent layers while reducing the number of tokens processed within layers. We achieve this by pairing the transformer with dynamic token routers, which learn to selectively process tokens essential for the task. Evaluation results clearly show that LoT achieves a substantial improvement in computational efficiency. Specifically, LoT attains up to 25x faster inference time without a significant loss in accuracy", "keywords": "transformer;language models;token routing;token pruning;input length reduction", "primary_area": "", "supplementary_material": "", "author": "Yeachan Kim;Junho Kim;Jun-Hyung Park;Mingyu Lee;SangKeun Lee", "authorids": "~Yeachan_Kim3;~Junho_Kim6;~Jun-Hyung_Park1;~Mingyu_Lee1;~SangKeun_Lee1", "gender": "M;M;;M;M", "homepage": "https://sites.google.com/view/yeachan/;;https://www.jhpark.info;https://sites.google.com/view/mingyulee92/;http://dilab.korea.ac.kr", "dblp": "224/6085;;16/716;;73/3458-1", "google_scholar": "zyOyBzwAAAAJ;8BpIZoUAAAAJ;https://scholar.google.com/citations?hl=en;d7FBdkQAAAAJ;BGSUpLgAAAAJ", "or_profile": "~Yeachan_Kim3;~Junho_Kim6;~Jun-Hyung_Park1;~Mingyu_Lee1;~SangKeun_Lee1", "aff": "Korea University;Korea University;Korea University;Korea University;Korea University", "aff_domain": "korea.ac.kr;korea.ac.kr;korea.ac.kr;korea.ac.kr;korea.ac.kr", "position": "PhD student;PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nkim2023leapofthought,\ntitle={Leap-of-Thought: Accelerating Transformers via Dynamic Token Routing},\nauthor={Yeachan Kim and Junho Kim and Jun-Hyung Park and Mingyu Lee and SangKeun Lee},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0SF6Kr1lrx}\n}", "github": "", "project": "", "reviewers": "K4aR;iScN;9vaB", "site": "https://openreview.net/forum?id=0SF6Kr1lrx", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;3;3", "reproducibility": "4;4;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-7900-3743;;0000-0002-6249-8217", "linkedin": "yeachan-kim-8719281aa/;junho-kim-637383253/;jun-hyung-park-901a62252;mingyu-lee-329338197/;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Korea University", "aff_unique_dep": "", "aff_unique_url": "https://www.korea.ac.kr", "aff_unique_abbr": "KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "0SIyWZEOmJ", "title": "The Linearity of the Effect of Surprisal on Reading Times across Languages", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "In psycholinguistics, surprisal theory posits that the amount of online processing effort expended by a human comprehender per word positively correlates with the surprisal of that word given its preceding context. In addition to this overall correlation, more importantly, the specific quantitative form taken by the processing effort as a function of surprisal offers insights into the underlying cognitive mechanisms of language processing. Focusing on English, previous studies have looked into the linearity of surprisal on reading times. Here, we extend the investigation by examining eyetracking corpora of seven languages: Danish, Dutch, English, German, Japanese, Mandarin, and Russian. We find evidence for superlinearity in some languages, but the results are highly sensitive to which language model is used to estimate surprisal.", "keywords": "psycholinguistics;surprisal theory;linearity;reading time;cross-linguistic", "primary_area": "", "supplementary_material": "", "author": "Weijie Xu;Jason Sejin Chon;Tianran Liu;Richard Futrell", "authorids": "~Weijie_Xu2;~Jason_Sejin_Chon1;~Tianran_Liu1;~Richard_Futrell2", "gender": "M;M;;Not Specified", "homepage": "https://weijiexu-charlie.github.io/;https://profiles.stanford.edu/jason-chon;;http://socsci.uci.edu/~rfutrell", "dblp": ";;;169/3172", "google_scholar": "UrPCWf8AAAAJ;;;BzI4ynUAAAAJ", "or_profile": "~Weijie_Xu2;~Jason_Sejin_Chon1;~Tianran_Liu1;~Richard_Futrell2", "aff": "University of California, Irvine;Stanford University;;University of California, Irvine", "aff_domain": "uci.edu;stanford.edu;;uci.edu", "position": "PhD student;Undergrad student;;Associate Professor", "bibtex": "@inproceedings{\nxu2023the,\ntitle={The Linearity of the Effect of Surprisal on Reading Times across Languages},\nauthor={Weijie Xu and Jason Sejin Chon and Tianran Liu and Richard Futrell},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0SIyWZEOmJ}\n}", "github": "", "project": "", "reviewers": "fm4s;JsrJ;1TwT", "site": "https://openreview.net/forum?id=0SIyWZEOmJ", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;4", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, Irvine;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uci.edu;https://www.stanford.edu", "aff_unique_abbr": "UCI;Stanford", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Irvine;Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "0ULLuIRdcu", "title": "ClimateBERT-NetZero: Detecting and Assessing Net Zero and Reduction Targets", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Public and private actors struggle to assess the vast amounts of information about sustainability commitments made by various institutions. To address this problem, we create a novel tool for automatically detecting corporate and national net zero and reduction targets in three steps. First, we introduce an expert-annotated data set with 3.5K text samples. Second, we train and release ClimateBERT-NetZero, a natural language classifier to detect whether a text contains a net zero or reduction target. Third, we showcase its analysis potential with two use cases: We first demonstrate how ClimateBERT-NetZero can be combined with conventional question-answering (Q\\&A) models to analyze the ambitions displayed in net zero and reduction targets. Furthermore, we employ the ClimateBERT-NetZero model on quarterly earning call transcripts and outline how communication patterns evolve over time. Our experiments demonstrate promising pathways for extracting and analyzing net zero and emission reduction targets at scale.", "keywords": "ClimateBERT;BERT;climate change;net zero", "primary_area": "", "supplementary_material": "", "author": "Tobias Schimanski;Julia Bingler;Mathias Kraus;Camilla Hyslop;Markus Leippold", "authorids": "~Tobias_Schimanski1;~Julia_Bingler1;~Mathias_Kraus1;~Camilla_Hyslop1;~Markus_Leippold1", "gender": "M;F;M;;M", "homepage": ";;https://www.data-analytics.rw.fau.eu/;;https://www.bf.uzh.ch/de/persons/leippold-markus", "dblp": ";;;;", "google_scholar": ";https://scholar.google.com/citations?hl=de;https://scholar.google.ch/citations?user=lgVr4w8AAAAJ;;-Ta9boQAAAAJ", "or_profile": "~Tobias_Schimanski1;~Julia_Bingler1;~Mathias_Kraus1;~Camilla_Hyslop1;~Markus_Leippold1", "aff": "University of Zurich;University of Oxford;FAU Erlangen-Nuremberg;University of Oxford;University of Zurich", "aff_domain": "uzh.ch;smithschool.ox.ac.uk;fau.de;ox.ac.uk;uzh.ch", "position": "PhD student;Postdoc;Assistant Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nschimanski2023climatebertnetzero,\ntitle={Climate{BERT}-NetZero: Detecting and Assessing Net Zero and Reduction Targets},\nauthor={Tobias Schimanski and Julia Bingler and Mathias Kraus and Camilla Hyslop and Markus Leippold},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0ULLuIRdcu}\n}", "github": "", "project": "", "reviewers": "vRSM;8w4H;Qtwv", "site": "https://openreview.net/forum?id=0ULLuIRdcu", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;5;3", "excitement": "3;4;2", "reproducibility": "3;4;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-3802-509X;;;0009-0003-4809-9652;", "linkedin": ";;;;markus-leippold-578bb95/", "aff_unique_index": "0;1;2;1;0", "aff_unique_norm": "University of Zurich;University of Oxford;Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg", "aff_unique_dep": ";;", "aff_unique_url": "https://www.unizh.ch;https://www.ox.ac.uk;https://www.fau.de", "aff_unique_abbr": "UZH;Oxford;FAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;1;0", "aff_country_unique": "Switzerland;United Kingdom;Germany" }, { "id": "0VQImEvjPJ", "title": "NormDial: A Comparable Bilingual Synthetic Dialog Dataset for Modeling Social Norm Adherence and Violation", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Social norms fundamentally shape interpersonal communication. We present NormDial, a high-quality dyadic dialogue dataset with turn-by-turn annotations of social norm adherences and violations for Chinese and American cultures. Introducing the task of social norm observance detection, our dataset is synthetically generated in both Chinese and English using a human-in-the-loop pipeline by prompting large language models with a small collection of expert-annotated social norms. We show that our generated dialogues are of high quality through human evaluation and further evaluate the performance of existing large language models on this task. Our findings point towards new directions for understanding the nuances of social norms as they manifest in conversational contexts that span across languages and cultures.", "keywords": "social norms;resources and evaluation;large language models", "primary_area": "", "supplementary_material": "", "author": "Oliver Li;Mallika Subramanian;Arkadiy Saakyan;Sky CH-Wang;Smaranda Muresan", "authorids": "~Oliver_Li1;~Mallika_Subramanian1;~Arkadiy_Saakyan1;~Sky_CH-Wang1;~Smaranda_Muresan3", "gender": "M;F;M;M;", "homepage": "https://github.com/Aochong-Li/aochong-li.github.io;https://mallika2011.github.io/;https://asaakyan.github.io/;https://skywang.me;http://www.cs.columbia.edu/~smara/", "dblp": "308/3312;;294/5397;301/9138;44/70", "google_scholar": "rZ186jcAAAAJ;xr5RlokAAAAJ;oPegqXQAAAAJ;6lHNfVoAAAAJ;Esbx2VcAAAAJ", "or_profile": "~Oliver_Li1;~Mallika_Subramanian1;~Arkadiy_Saakyan1;~Sky_CH-Wang1;~Smaranda_Muresan3", "aff": "Columbia University;Columbia University;Amazon;Columbia University;Columbia University", "aff_domain": "columbia.edu;columbia.edu;amazon.com;columbia.edu;columbia.edu", "position": "Undergrad student;MS student;Intern;PhD student;Principal Researcher", "bibtex": "@inproceedings{\nli2023normdial,\ntitle={NormDial: A Comparable Bilingual Synthetic Dialog Dataset for Modeling Social Norm Adherence and Violation},\nauthor={Oliver Li and Mallika Subramanian and Arkadiy Saakyan and Sky CH-Wang and Smaranda Muresan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0VQImEvjPJ}\n}", "github": "", "project": "", "reviewers": "Gicu;q6Mj;nowG", "site": "https://openreview.net/forum?id=0VQImEvjPJ", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "2;3;3", "reproducibility": "2;2;2", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 2.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";mallika-subramanian-2607b116b;;skychwang/;", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Columbia University;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.columbia.edu;https://www.amazon.com", "aff_unique_abbr": "Columbia;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "0W2aSP6y3x", "title": "Vision-Enhanced Semantic Entity Recognition in Document Images via Visually-Asymmetric Consistency Learning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Extracting meaningful entities belonging to predefined categories from Visually-rich Form-like Documents (VFDs) is a challenging task. Visual and layout features such as font, background, color, and bounding box location and size provide important cues for identifying entities of the same type. However, existing models commonly train a visual encoder with weak cross-modal supervision signals, resulting in a limited capacity to capture these non-textual features and suboptimal performance. In this paper, we propose a novel Visually-Asymmetric coNsistenCy Learning (VANCL) approach that addresses the above limitation by enhancing the model's ability to capture fine-grained visual and layout features through the incorporation of color priors. Experimental results on benchmark datasets show that our approach substantially outperforms the strong LayoutLM series baseline, demonstrating the effectiveness of our approach. Additionally, we investigate the effects of different color schemes on our approach, providing insights for optimizing model performance. We believe our work will inspire future research on multimodal information extraction.", "keywords": "Visually rich documents;Information extraction;Consistency Learning;Multimodality", "primary_area": "", "supplementary_material": "", "author": "Hao Wang;Xiahua Chen;Rui Wang;Chenhui Chu", "authorids": "~Hao_Wang23;~Xiahua_Chen1;~Rui_Wang10;~Chenhui_Chu1", "gender": "M;M;M;M", "homepage": "https://hint-lab.github.io/people/wang_hao;http://cxhgh.github.io;http://researchmap.jp/chu/?lang=en;https://wangruinlp.github.io/", "dblp": "181/2812;;126/8755;w/RuiWang15", "google_scholar": ";;https://scholar.google.co.jp/citations?user=6ef0qbgAAAAJ;oTU0v5IAAAAJ", "or_profile": "~Hao_Wang23;~Xiahua_Chen1;~Chenhui_Chu1;~Rui_Wang7", "aff": "Shanghai University;Shanghai University;Kyoto University;Shanghai Jiaotong University", "aff_domain": "shu.edu.cn;shu.edu.cn;kyoto-u.ac.jp;sjtu.edu.cn", "position": "Assistant Professor;MS student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2023visionenhanced,\ntitle={Vision-Enhanced Semantic Entity Recognition in Document Images via Visually-Asymmetric Consistency Learning},\nauthor={Hao Wang and Xiahua Chen and Rui Wang and Chenhui Chu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0W2aSP6y3x}\n}", "github": "", "project": "", "reviewers": "XaQk;XvBS;uW1H;SYbT", "site": "https://openreview.net/forum?id=0W2aSP6y3x", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;3;3;4", "excitement": "4;3;4;4", "reproducibility": "4;3;4;4", "correctness": "4;2;3;5", "rating_avg": 4.0, "confidence_avg": 3.25, "excitement_avg": 3.75, "reproducibility_avg": 3.75, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-1089-9828;;0000-0001-9848-6384;0000-0001-8007-2503", "linkedin": ";;;", "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Shanghai University;Kyoto University;Shanghai Jiao Tong University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.shu.edu.cn;https://www.kyoto-u.ac.jp;https://www.sjtu.edu.cn", "aff_unique_abbr": "SHU;Kyoto U;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;Japan" }, { "id": "0aiFUPYan3", "title": "VER: Unifying Verbalizing Entities and Relations", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Entities and relationships between entities are vital in the real world. Essentially, we understand the world by understanding entities and relations. For instance, to understand a field, e.g., computer science, we need to understand the relevant concepts, e.g., machine learning, and the relationships between concepts, e.g., machine learning and artificial intelligence. To understand a person, we should first know who he/she is and how he/she is related to others. To understand entities and relations, humans may refer to natural language descriptions. For instance, when learning a new scientific term, people usually start by reading its definition in dictionaries or encyclopedias. To know the relationship between two entities, humans tend to create a sentence to connect them. In this paper, we propose VER: a unified model for Verbalizing Entities and Relations. Specifically, we attempt to build a system that takes any entity or entity set as input and generates a sentence to represent entities and relations. Extensive experiments demonstrate that our model can generate high-quality sentences describing entities and entity relationships and facilitate various tasks on entities and relations, including definition modeling, relation modeling, and generative commonsense reasoning.", "keywords": "definition modeling;relation modeling;entity relationships", "primary_area": "", "supplementary_material": "", "author": "Jie Huang;Kevin Chang", "authorids": "~Jie_Huang3;~Kevin_Chang1", "gender": ";M", "homepage": "https://jeffhj.github.io/;https://siebelschool.illinois.edu/about/people/faculty/kcchang", "dblp": "29/6643-9;c/KCCChang", "google_scholar": "GIoPkMoAAAAJ;https://scholar.google.com.tw/citations?user=sugWZ6MAAAAJ", "or_profile": "~Jie_Huang3;~Kevin_Chang1", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;illinois.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nhuang2023ver,\ntitle={{VER}: Unifying Verbalizing Entities and Relations},\nauthor={Jie Huang and Kevin Chang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0aiFUPYan3}\n}", "github": "", "project": "", "reviewers": "hSHA;W9C6;ZaBz", "site": "https://openreview.net/forum?id=0aiFUPYan3", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;2", "excitement": "4;3;3", "reproducibility": "4;3;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-0997-6803", "linkedin": "jie-huang-4b0104151/;", "aff_unique_index": "0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "0b2chPXfVG", "title": "Orca: A Few-shot Benchmark for Chinese Conversational Machine Reading Comprehension", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The conversational machine reading comprehension (CMRC) task aims to answer questions in conversations, which has been a hot research topic in recent years because of its wide applications. However, existing CMRC benchmarks in which each conversation is assigned a static passage are inconsistent with real scenarios. Thus, model's comprehension ability towards real scenarios are hard to evaluate reasonably. To this end, we propose the first Chinese CMRC benchmark \\textbf{Orca} and further provide zero-shot/few-shot settings to evaluate model's generalization ability towards diverse domains. We collect 831 hot-topic driven conversations with 4,742 turns in total. Each turn of a conversation is assigned with a response-related passage, aiming to evaluate model's comprehension ability more reasonably. The topics of conversations are collected from social media platform and cover 33 domains, trying to be consistent with real scenarios. Importantly, answers in Orca are all well-annotated natural responses rather than the specific spans or short phrase in previous datasets.\nBesides, we implement three strong baselines to tackle the challenge in Orca. The results indicate the great challenge of our CMRC benchmark.", "keywords": "dataset;conversational machine reading comprehension", "primary_area": "", "supplementary_material": "", "author": "Nuo Chen;Hongguang Li;Junqing He;Yinan Bao;Xinshi Lin;Qi Yang;Jianfeng Liu;Ruyi Gan;Jiaxing Zhang;Baoyuan Wang;Jia Li", "authorids": "~Nuo_Chen1;~Hongguang_Li2;~Junqing_He1;~Yinan_Bao1;~Xinshi_Lin1;~Qi_Yang6;~Jianfeng_Liu2;~Ruyi_Gan1;~Jiaxing_Zhang1;~Baoyuan_Wang3;~Jia_Li4", "gender": "M;;F;;M;;M;M;M;M;M", "homepage": "https://jerrynchen.github.io/;;;;http://linxinshi.net;;;https://github.com/ganzhiruyi;https://idea.edu.cn/en/person/zhangjiaxing.html;;https://sites.google.com/view/lijia", "dblp": "135/5622-1;;203/9352;;;;20/4145-2;;;41/8869;23/6950-9", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;CNqDVoMAAAAJ;;;https://scholar.google.com.hk/citations?user=uIYLtG4AAAAJ;;y994nEsAAAAJ;ozXuhOUAAAAJ;https://scholar.google.co.jp/citations?user=OWa5rOEAAAAJ;1gSbcYoAAAAJ", "or_profile": "~Nuo_Chen1;~Hongguang_Li2;~Junqing_He1;~Yinan_Bao1;~Xinshi_Lin1;~Qi_Yang6;~Jianfeng_Liu2;~Ruyi_Gan1;~Jiaxing_Zhang1;~Baoyuan_Wang3;~Jia_Li4", "aff": "Hong Kong University of Science and Technology;;International Digital Econemy Academy;;;International Academy of Digital Economy;xiaobing.ai;University of Science and Technology of China;IDEA;Xiaobing.ai;Hong Kong University of Science and Technology (Guangzhou)", "aff_domain": "hkust.edu;;idea.edu.cn;;;idea.edu.cn;xiaobing.ai;ustc.edu.cn;idea.edu.cn;xiaobing.ai;ust.hk", "position": "PhD student;;Researcher;;;Researcher;Researcher;PhD student;Principal Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nchen2023orca,\ntitle={Orca: A Few-shot Benchmark for Chinese Conversational Machine Reading Comprehension},\nauthor={Nuo Chen and Hongguang Li and Junqing He and Yinan Bao and Xinshi Lin and Qi Yang and Jianfeng Liu and Ruyi Gan and Jiaxing Zhang and Baoyuan Wang and Jia Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0b2chPXfVG}\n}", "github": "", "project": "", "reviewers": "fQ1z;vj1q;e1ov", "site": "https://openreview.net/forum?id=0b2chPXfVG", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "3;3;3", "reproducibility": "4;4;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 11, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0003-0127-6167;;;;;0000-0002-6362-4385", "linkedin": ";;;;;;jianfeng-liu-9539897b;;;;", "aff_unique_index": "0;1;2;3;4;5;3;0", "aff_unique_norm": "Hong Kong University of Science and Technology;International Digital Economy Academy;International Academy of Digital Economy;Xiaobing.AI;University of Science and Technology of China;Institute of Electrical and Electronics Engineers", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.ust.hk;;;https://xiaobing.ai;http://www.ustc.edu.cn;https://www.ieee.org", "aff_unique_abbr": "HKUST;;;;USTC;IEEE", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;2;0;0", "aff_country_unique": "China;;United States" }, { "id": "0bderX6zwr", "title": "FFAEval: Evaluating Dialogue System via Free-For-All Ranking", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Evaluating open-domain dialogue systems is currently an open question. Automatic evaluation metrics have shown poor correlation with human assessment in dialogue generation tasks. Human evaluation, which involves annotators for multi-dimension scoring, is trustworthy but time-consuming. In this work, we propose FFAEval, a reliable and efficient human evaluation framework using Free-For-All ranking approach. By sharing the dialogue history, the framework enables annotators to converse with multiple dialogue systems simultaneously in a single-blind, multi-turn manner. The subsequent free-for-all allows annotators to select the most favourable model in each turn from among all the participating dialogue systems. The final performance of each model is represented by calculating the TrueSkill score derived from the free-for-all competition. Our empirical study on English and Chinese dialogue systems demonstrates that FFAEval achieves a strong correlation with score-based human assessment compared to existing evaluation methods. We further prove the efficiency and stability of our framework in additional experiments. The source code and data are available on Github.", "keywords": "Human Evaluation;Dialogue System Evaluation", "primary_area": "", "supplementary_material": "", "author": "Zeyao Ma;Zijun Yao;Jing Zhang;Jifan Yu;Xiaohan Zhang;Juanzi Li;Jie Tang", "authorids": "~Zeyao_Ma1;~Zijun_Yao2;~Jing_Zhang24;~Jifan_Yu2;~Xiaohan_Zhang6;~Juanzi_Li1;~Jie_Tang1", "gender": "M;M;;M;F;;", "homepage": "https://github.com/Kaka23333;https://transirius.github.io/;https://xiaojingzi.github.io/;https://yujifan0326.github.io/;;;", "dblp": ";134/4025-2;05/3499-1.html;239/6130.html;;;", "google_scholar": ";B4LmHSUAAAAJ;T7Wa3GQAAAAJ;https://scholar.google.com.tw/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=RKyE8o0AAAAJ;;", "or_profile": "~Zeyao_Ma1;~Zijun_Yao2;~Jing_Zhang24;~Jifan_Yu2;~Xiaohan_Zhang6;~Juanzi_Li1;~Jie_Tang1", "aff": "Beijing University of Posts and Telecommunications;Tsinghua University;Renmin University of China;;Beijing Knowledge Atlas Technology Co., Ltd. ;;", "aff_domain": "bupt.edu.cn;tsinghua.edu.cn;ruc.edu.cn;;zhipuai.cn;;", "position": "Undergrad student;MS student;Associate Professor;;Researcher;;", "bibtex": "@inproceedings{\nma2023ffaeval,\ntitle={{FFAE}val: Evaluating Dialogue System via Free-For-All Ranking},\nauthor={Zeyao Ma and Zijun Yao and Jing Zhang and Jifan Yu and Xiaohan Zhang and Juanzi Li and Jie Tang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0bderX6zwr}\n}", "github": "", "project": "", "reviewers": "giRW;nRyd;ojyE;H8nE", "site": "https://openreview.net/forum?id=0bderX6zwr", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "4;4;4;4", "excitement": "2;2;3;4", "reproducibility": "3;4;2;4", "correctness": "3;4;3;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 2.75, "reproducibility_avg": 3.25, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-0288-9283;;0000-0003-3430-4048;0000-0003-3295-7758;;", "linkedin": ";%E5%AD%90%E4%BF%8A-%E5%A7%9A-313188209/;;;;;", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Beijing University of Posts and Telecommunications;Tsinghua University;Renmin University of China;Beijing Knowledge Atlas Technology Co., Ltd.", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.bupt.edu.cn/;https://www.tsinghua.edu.cn;http://www.ruc.edu.cn;", "aff_unique_abbr": "BUPT;THU;RUC;", "aff_campus_unique_index": "0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "0duz9dhwRc", "title": "Stance Detection on Social Media with Background Knowledge", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Identifying users' stances regarding specific targets/topics is a significant route to learning public opinion from social media platforms. Most existing studies of stance detection strive to learn stance information about specific targets from the context, in order to determine the user's stance on the target. However, in real-world scenarios, we usually have a certain understanding of a target when we express our stance on it. In this paper, we investigate stance detection from a novel perspective, where the background knowledge of the targets is taken into account for better stance detection. To be specific, we categorize background knowledge into two categories: episodic knowledge and discourse knowledge, and propose a novel Knowledge-Augmented Stance Detection (KASD) framework. For episodic knowledge, we devise a heuristic retrieval algorithm based on the topic to retrieve the Wikipedia documents relevant to the sample. Further, we construct a prompt for ChatGPT to filter the Wikipedia documents to derive episodic knowledge. For discourse knowledge, we construct a prompt for ChatGPT to paraphrase the hashtags, references, etc., in the sample, thereby injecting discourse knowledge into the sample. Experimental results on four benchmark datasets demonstrate that our KASD achieves state-of-the-art performance in in-target and zero-shot stance detection.", "keywords": "Stance Detection;Knowledge Augmentation;Background Knowledge", "primary_area": "", "supplementary_material": "", "author": "Ang Li;Bin Liang;Jingqian Zhao;Bowen Zhang;Min Yang;Ruifeng Xu", "authorids": "~Ang_Li18;~Bin_Liang6;~Jingqian_Zhao1;~Bowen_Zhang5;~Min_Yang6;~Ruifeng_Xu1", "gender": "M;M;M;M;F;M", "homepage": "https://leon-francis.github.io/;https://binliang-nlp.github.io/;;;https://minyang.me/;http://faculty.hitsz.edu.cn/xuruifeng", "dblp": ";71/6053-4;;85/7433-5;02/1640-7;93/5407-1", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;djpQeLEAAAAJ;;2O1BOpEAAAAJ;_wop6KgAAAAJ;mObXnNIAAAAJ", "or_profile": "~Ang_Li18;~Bin_Liang6;~Jingqian_Zhao1;~Bowen_Zhang5;~Min_Yang6;~Ruifeng_Xu1", "aff": "Harbin Institute of Technology;The Chinese University of Hong Kong;Harbin Institute of Technology;Shenzhen Technology University;Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;cuhk.edu.hk;hit.edu.cn;sztu.edu.cn;siat.ac.cn;hit.edu.cn", "position": "MS student;Postdoc;Undergrad student;Assistant Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nli2023stance,\ntitle={Stance Detection on Social Media with Background Knowledge},\nauthor={Ang Li and Bin Liang and Jingqian Zhao and Bowen Zhang and Min Yang and Ruifeng Xu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0duz9dhwRc}\n}", "github": "", "project": "", "reviewers": "d5bz;sQss;ZhbM", "site": "https://openreview.net/forum?id=0duz9dhwRc", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;3", "excitement": "3;4;4", "reproducibility": "5;4;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-4472-4919;0000-0001-7234-1347;0009-0007-6777-5467;0000-0002-3581-9476;;0000-0002-4009-5679", "linkedin": ";;;;;", "aff_unique_index": "0;1;0;2;3;0", "aff_unique_norm": "Harbin Institute of Technology;Chinese University of Hong Kong;Shenzhen Technology University;Chinese Academy of Sciences", "aff_unique_dep": ";;;Shenzhen Institutes of Advanced Technology", "aff_unique_url": "http://www.hit.edu.cn/;https://www.cuhk.edu.hk;https://www.sztu.edu.cn;http://www.cas.cn", "aff_unique_abbr": "HIT;CUHK;;CAS", "aff_campus_unique_index": "0;1;0;3;0", "aff_campus_unique": "Harbin;Hong Kong SAR;;Shenzhen", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "0eWQVWvPgu", "title": "Unveiling the Power of Argument Arrangement in Online Persuasive Discussions", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Previous research on argumentation in online discussions has largely focused on examining individual comments and neglected the interactive nature of discussions. In line with previous work, we represent individual comments as sequences of semantic argumentative unit types. However, because it is intuitively necessary for dialogical argumentation to address the opposing viewpoints, we extend this model by clustering type sequences into different argument arrangement patterns and representing discussions as sequences of these patterns. These sequences of patterns are a symbolic representation of argumentation strategies that capture the overall structure of discussions. Using this novel approach, we conduct an in-depth analysis of the strategies in 34,393 discussions from the online discussion forum Change My View and show that our discussion model is effective for persuasiveness prediction, outperforming LLM-based classifiers on the same data. Our results provide valuable insights into argumentation dynamics in online discussions and, through the presented prediction procedure, are of practical importance for writing assistance and persuasive text generation systems.", "keywords": "Argument Mining;Persuasive Dialogues;Persuasion Strategies;ChangeMyView", "primary_area": "", "supplementary_material": "", "author": "Nailia Mirzakhmedova;Johannes Kiesel;Khalid Al Khatib;Benno Stein", "authorids": "~Nailia_Mirzakhmedova1;~Johannes_Kiesel1;~Khalid_Al_Khatib1;~Benno_Stein2", "gender": ";;M;", "homepage": ";https://kiesels.de/johannes;https://khalid-alkhatib.github.io/;https://weimar.webis.de", "dblp": ";118/3606;31/8936;69/4806-1", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;GCAVWqgAAAAJ", "or_profile": "~Nailia_Mirzakhmedova1;~Johannes_Kiesel1;~Khalid_Al_Khatib1;~Benno_Stein2", "aff": "Bauhaus Universit\u00e4t Weimar;Bauhaus Universit\u00e4t Weimar;University of Groningen;Bauhaus Universit\u00e4t Weimar", "aff_domain": "uni-weimar.de;uni-weimar.de;rug.nl;uni-weimar.de", "position": "PhD student;Postdoc;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nmirzakhmedova2023unveiling,\ntitle={Unveiling the Power of Argument Arrangement in Online Persuasive Discussions},\nauthor={Nailia Mirzakhmedova and Johannes Kiesel and Khalid Al Khatib and Benno Stein},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0eWQVWvPgu}\n}", "github": "", "project": "", "reviewers": "HDqE;Sf3y;w6PD", "site": "https://openreview.net/forum?id=0eWQVWvPgu", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;2;2", "excitement": "2;2;3", "reproducibility": "4;3;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 2.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-8143-1405;0000-0002-1617-6508;0009-0006-7255-5349;0000-0001-9033-2217", "linkedin": ";johannes-kiesel-b25032b7/;khalid-alkhatib/;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Bauhaus Universit\u00e4t Weimar;University of Groningen", "aff_unique_dep": ";", "aff_unique_url": "https://www.bauhaus-university.de;https://www.rug.nl", "aff_unique_abbr": "BUW;RUG", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Weimar;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Germany;Netherlands" }, { "id": "0hTPJBnncc", "title": "MQuAKE: Assessing Knowledge Editing in Language Models via Multi-Hop Questions", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The information stored in large language models (LLMs) falls out of date quickly, and retraining from scratch is often not an option. This has recently given rise to a range of techniques for injecting new facts through updating model weights. Current evaluation paradigms are extremely limited, mainly validating the recall of edited facts, but changing one fact should cause rippling changes to the model's related beliefs. If we edit the UK Prime Minister to now be Rishi Sunak, then we should get a different answer to Who is married to the British Prime Minister? In this work, we present a benchmark MQuAKE (Multi-hop Question Answering for Knowledge Editing) comprising multi-hop questions that assess whether edited models correctly answer questions where the answer should change as an entailed consequence of edited facts. While we find that current knowledge-editing approaches can recall edited facts accurately, they fail catastrophically on the constructed multi-hop questions. We thus propose a simple memory-based approach, MeLLo, which stores all edited facts externally while prompting the language model iteratively to generate answers that are consistent with the edited facts. While MQuAKE remains challenging, we show that MeLLo scales well with LLMs (up to 175B) and outperforms previous model editors by a large margin.", "keywords": "Knowledge Editing;Multi-hop Question Answering;Language Models", "primary_area": "", "supplementary_material": "", "author": "Zexuan Zhong;Zhengxuan Wu;Christopher D Manning;Christopher Potts;Danqi Chen", "authorids": "~Zexuan_Zhong1;~Zhengxuan_Wu1;~Christopher_D_Manning1;~Christopher_Potts1;~Danqi_Chen1", "gender": "M;M;M;M;F", "homepage": "https://www.cs.princeton.edu/~zzhong/;https://cs.stanford.edu/~wuzhengx/;https://nlp.stanford.edu/~manning/;http://web.stanford.edu/~cgpotts/;https://www.cs.princeton.edu/~danqic/", "dblp": "218/7257;234/4650;m/ChristopherDManning;13/2617;87/7949", "google_scholar": ";CBvE6lwAAAAJ;1zmDOdwAAAAJ;3j08YoAAAAAJ;sVR8ktkAAAAJ", "or_profile": "~Zexuan_Zhong1;~Zhengxuan_Wu1;~Christopher_D_Manning1;~Christopher_Potts1;~Danqi_Chen1", "aff": "Princeton University;Stanford University;Computer Science Department, Stanford University;Stanford University;Princeton University", "aff_domain": "princeton.edu;stanford.edu;cs.stanford.edu;stanford.edu;cs.princeton.edu", "position": "PhD student;PhD student;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhong2023mquake,\ntitle={{MQ}u{AKE}: Assessing Knowledge Editing in Language Models via Multi-Hop Questions},\nauthor={Zexuan Zhong and Zhengxuan Wu and Christopher D Manning and Christopher Potts and Danqi Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0hTPJBnncc}\n}", "github": "", "project": "", "reviewers": "ipS4;i7oa;ojrC;uapW", "site": "https://openreview.net/forum?id=0hTPJBnncc", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "3;5;4;4", "excitement": "4;4;3;4", "reproducibility": "5;5;4;4", "correctness": "4;4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.75, "reproducibility_avg": 4.5, "correctness_avg": 4.0, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-6155-649X;0000-0002-7978-6055;", "linkedin": ";;christopher-manning-011575/;;", "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "Princeton University;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.princeton.edu;https://www.stanford.edu", "aff_unique_abbr": "Princeton;Stanford", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "0hyn6MJmnP", "title": "TADI: Topic-aware Attention and Powerful Dual-encoder Interaction for Recall in News Recommendation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "News recommendation is one of the widest commercialization in natural language processing research area, which aims to recommend news according to user interests. New recall plays an important role in news recommendation. It is to recall candidates from a very large news database. Recent researches of news recall mostly adopt dual-encoder architecture as it provides a much faster recall scheme, and they encode each word equally. However, these works remain two challenges: irrelevant word distraction and weak dual-encoder interaction. Therefore, we propose a model Topic-aware Attention and powerful Dual-encoder Interaction for Recall in news recommendation (TADI). To avoid irrelevant word distraction, TADI designs a Topic-aware Attention (TA) which weights words according to news topics. To enhance dual-encoder interaction, TADI provides a cheap yet powerful interaction module, namely Dual-encoder Interaction (DI). DI helps dual encoders interact powerfully based on two aux targets. After performance comparisons between TADI and state-of-the-arts in a series of experiments, we verify the effectiveness of TADI.", "keywords": "news recommendation;recommendation", "primary_area": "", "supplementary_material": "", "author": "Junxiang Jiang", "authorids": "~Junxiang_Jiang1", "gender": "M", "homepage": "", "dblp": "https://dblp.uni-trier.de/pid/187/0078.html", "google_scholar": "", "or_profile": "~Junxiang_Jiang1", "aff": "Baidu", "aff_domain": "baidu.com", "position": "Researcher", "bibtex": "@inproceedings{\njiang2023tadi,\ntitle={{TADI}: Topic-aware Attention and Powerful Dual-encoder Interaction for Recall in News Recommendation},\nauthor={Junxiang Jiang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0hyn6MJmnP}\n}", "github": "", "project": "", "reviewers": "fcXJ;Gomy;o5wg", "site": "https://openreview.net/forum?id=0hyn6MJmnP", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;4;5", "excitement": "4;3;3", "reproducibility": "4;2;3", "correctness": "4;3;3", "rating_avg": 2.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 1, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "", "linkedin": "", "aff_unique_index": "0", "aff_unique_norm": "Baidu", "aff_unique_dep": "Baidu, Inc.", "aff_unique_url": "https://www.baidu.com", "aff_unique_abbr": "Baidu", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "0iRgUfkwp3", "title": "Causal Intervention-based Few-Shot Named Entity Recognition", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Few-shot named entity recognition (NER) systems aim to recognize new classes of entities with limited labeled samples. However, these systems face a significant challenge of overfitting compared to tasks with abundant samples. This overfitting is mainly caused by the spurious correlation resulting from the bias in selecting a few samples. To address this issue, we propose a causal intervention-based few-shot NER method in this paper. Our method, based on the prototypical network, intervenes in the context to block the backdoor path between context and label. In the one-shot scenario, where no additional context is available for intervention, we employ incremental learning to intervene on the prototype, which also helps mitigate catastrophic forgetting. Our experiments on various benchmarks demonstrate that our approach achieves new state-of-the-art results.", "keywords": "Causal Intervention;Few-Shot Learning;Named Entity Recognition", "primary_area": "", "supplementary_material": "", "author": "Zhen Yang;Yongbin Liu;Chunping Ouyang", "authorids": "~Zhen_Yang14;~Yongbin_Liu1;~Chunping_Ouyang1", "gender": "F;M;F", "homepage": ";;https://jsjxy.usc.edu.cn/info/2022/4741.htm", "dblp": ";79/9544;", "google_scholar": ";9sXgL3MAAAAJ;", "or_profile": "~Zhen_Yang14;~Yongbin_Liu1;~Chunping_Ouyang1", "aff": "University of South China;University of South China;University of South China", "aff_domain": "usc.edu.cn;usc.edu.cn;usc.edu.cn", "position": "MS student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nyang2023causal,\ntitle={Causal Intervention-based Few-Shot Named Entity Recognition},\nauthor={Zhen Yang and Yongbin Liu and Chunping Ouyang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0iRgUfkwp3}\n}", "github": "", "project": "", "reviewers": "c6Vr;56nu;djTo", "site": "https://openreview.net/forum?id=0iRgUfkwp3", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;3;3", "excitement": "3;4;3", "reproducibility": "4;3;4", "correctness": "2;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-5966-1784;;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "South China University", "aff_unique_dep": "", "aff_unique_url": "http://www.scu.edu.cn", "aff_unique_abbr": "SCU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "0ii51brFyn", "title": "Enhanced Simultaneous Machine Translation with Word-level Policies", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recent years have seen remarkable advances in the field of Simultaneous Machine Translation (SiMT) due to the introduction of innovative policies that dictate whether to READ or WRITE at each step of the translation process. However, a common assumption in many existing studies is that operations are carried out at the subword level, even though the standard unit for input and output in most practical scenarios is typically at the word level. This paper demonstrates that policies devised and validated at the subword level are surpassed by those operating at the word level, which process multiple subwords to form a complete word in a single step. Additionally, we suggest a method to boost SiMT models using language models (LMs), wherein the proposed word-level policy plays a vital role in addressing the subword disparity between LMs and SiMT models. Code is available at https://github.com/xl8-ai/WordSiMT.", "keywords": "Simultaneous Machine Translation;word-level policies", "primary_area": "", "supplementary_material": "", "author": "Kang Kim;Hankyu Cho", "authorids": "~Kang_Kim2;~Hankyu_Cho1", "gender": "M;M", "homepage": ";", "dblp": ";", "google_scholar": ";mrPdU5oAAAAJ", "or_profile": "~Kang_Kim2;~Hankyu_Cho1", "aff": "XL8;XL8 Inc.", "aff_domain": "xl8.ai;xl8.ai", "position": "Researcher;Researcher", "bibtex": "@inproceedings{\nkim2023enhanced,\ntitle={Enhanced Simultaneous Machine Translation with Word-level Policies},\nauthor={Kang Kim and Hankyu Cho},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0ii51brFyn}\n}", "github": "", "project": "", "reviewers": "5oLi;CvhJ;tsh7", "site": "https://openreview.net/forum?id=0ii51brFyn", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;4", "excitement": "3;4;4", "reproducibility": "3;4;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-4849-8138", "linkedin": "kang-kim-a6354210/;", "aff_unique_index": "0;1", "aff_unique_norm": "XL8;XL8 Inc.", "aff_unique_dep": ";", "aff_unique_url": ";", "aff_unique_abbr": ";", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";United States" }, { "id": "0isMLQIUpQ", "title": "Is ChatGPT the ultimate Data Augmentation Algorithm?", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "In the aftermath of GPT-3.5, commonly known as ChatGPT, research have attempted to assess its capacity for lowering annotation cost, either by doing zero-shot learning, generating new data, or replacing human annotators. Some studies have also investigated its use for data augmentation (DA), but only in limited contexts, which still leaves the question of how ChatGPT performs compared to state-of-the-art algorithms. In this paper, we use ChatGPT to create new data both with paraphrasing and with zero-shot generation, and compare it to seven other algorithms. We show that while ChatGPT performs exceptionally well on some simpler data, it overall does not perform better than the other algorithms, yet demands a much larger implication from the practitioner due to the ChatGPT often refusing to answer due to sensitive content in the datasets.", "keywords": "Data augmentation;ChatGPT;GPT-3.5;classification;T5", "primary_area": "", "supplementary_material": "", "author": "Fr\u00e9d\u00e9ric Piedboeuf;Philippe Langlais", "authorids": "~Fr\u00e9d\u00e9ric_Piedboeuf1;~Philippe_Langlais2", "gender": ";M", "homepage": ";http://www-labs.iro.umontreal.ca/~felipe/brand_new_home/creative-design/public_html/index.php?lg=en", "dblp": ";66/1102", "google_scholar": "https://scholar.google.ca/citations?user=TerngKQAAAAJ;VHd-kDEAAAAJ", "or_profile": "~Fr\u00e9d\u00e9ric_Piedboeuf1;~Philippe_Langlais2", "aff": "CTA;Universit\u00e9 de Montr\u00e9al", "aff_domain": "ena.ca;umontreal.ca", "position": "Researcher;Full Professor", "bibtex": "@inproceedings{\npiedboeuf2023is,\ntitle={Is Chat{GPT} the ultimate Data Augmentation Algorithm?},\nauthor={Fr{\\'e}d{\\'e}ric Piedboeuf and Philippe Langlais},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0isMLQIUpQ}\n}", "github": "", "project": "", "reviewers": "Fjbw;WQnu;FG9i", "site": "https://openreview.net/forum?id=0isMLQIUpQ", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;2;5", "excitement": "2;3;3", "reproducibility": "3;2;4", "correctness": "3;2;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 2.6666666666666665, "replies_avg": 9, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-7319-1595", "linkedin": "fr%C3%A9d%C3%A9ric-piedboeuf-31ba72126/;", "aff_unique_index": "0;1", "aff_unique_norm": "Chicago Transit Authority;Universit\u00e9 de Montr\u00e9al", "aff_unique_dep": ";", "aff_unique_url": "https://www.transitchicago.com;https://www.umontreal.ca", "aff_unique_abbr": "CTA;UdeM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Canada" }, { "id": "0juZSwZLA4", "title": "ScdNER: Span-Based Consistency-Aware Document-Level Named Entity Recognition", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Document-level NER approaches use global information via word-based key-value memory for accurate and consistent predictions. However, such global information on word level can introduce noise when the same word appears in different token sequences and has different labels. This work proposes a two-stage document-level NER model, ScdNER, for more accurate and consistent predictions via adaptive span-level global feature fusion. In the first stage, ScdNER trains a binary classifier to predict if a token sequence is an entity with a probability. Via a span-based key-value memory, the probabilities are further used to obtain the entity's global features with reduced impact of non-entity sequences. The second stage predicts the entity types using a gate mechanism to balance its local and global information, leading to adaptive global feature fusion. Experiments on benchmark datasets from scientific, biomedical, and general domains show the effectiveness of the proposed methods.", "keywords": "named entity recognition;span-based;document-level;consistency-aware", "primary_area": "", "supplementary_material": "", "author": "Ying Wei;Qi Li", "authorids": "~Ying_Wei4;~Qi_Li14", "gender": "F;F", "homepage": "https://www.cs.iastate.edu/yingwei;https://sites.google.com/iastate.edu/qili/", "dblp": ";181/2688-12", "google_scholar": ";Gvld0foAAAAJ", "or_profile": "~Ying_Wei4;~Qi_Li14", "aff": "Iowa State University;Iowa State University", "aff_domain": "iastate.edu;iastate.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwei2023scdner,\ntitle={Scd{NER}: Span-Based Consistency-Aware Document-Level Named Entity Recognition},\nauthor={Ying Wei and Qi Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0juZSwZLA4}\n}", "github": "", "project": "", "reviewers": "G1FQ;3NNy;iSfy", "site": "https://openreview.net/forum?id=0juZSwZLA4", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "3;3;3", "reproducibility": "3;4;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-3136-2157", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Iowa State University", "aff_unique_dep": "", "aff_unique_url": "https://www.iastate.edu", "aff_unique_abbr": "ISU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "0kseDcA5Nm", "title": "Give Me the Facts! A Survey on Factual Knowledge Probing in Pre-trained Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Pre-trained Language Models (PLMs) are trained on vast unlabeled data, rich in world knowledge. This fact has sparked the interest of the community in quantifying the amount of factual knowledge present in PLMs, as this explains their performance on downstream tasks, and potentially justifies their use as knowledge bases. In this work, we survey methods and datasets that are used to probe PLMs for factual knowledge. Our contributions are: (1) We propose a categorization scheme for factual probing methods that is based on how their inputs, outputs and the probed PLMs are adapted; (2) We provide an overview of the datasets used for factual probing; (3) We synthesize insights about knowledge retention and prompt optimization in PLMs, analyze obstacles to adopting PLMs as knowledge bases and outline directions for future work.", "keywords": "factual knowledge probing", "primary_area": "", "supplementary_material": "", "author": "Paul Youssef;Osman Alperen Kora\u015f;Meijie Li;J\u00f6rg Schl\u00f6tterer;Christin Seifert", "authorids": "~Paul_Youssef1;~Osman_Alperen_Kora\u015f1;~Meijie_Li2;~J\u00f6rg_Schl\u00f6tterer1;~Christin_Seifert1", "gender": ";M;F;;", "homepage": ";https://github.com/osmalpkoras/;https://wispermed.com/author/meijie-li/;;", "dblp": ";;;160/1725;", "google_scholar": "https://scholar.google.de/citations?user=VhMWcqYAAAAJ;;;5A2TGRgAAAAJ;", "or_profile": "~Paul_Youssef1;~Osman_Alperen_Kora\u015f1;~Meijie_Li2;~J\u00f6rg_Schl\u00f6tterer1;~Christin_Seifert1", "aff": "University of Duisburg-Essen;Universit\u00e4t Duisburg-Essen;Universit\u00e4t Duisburg-Essen;Universit\u00e4t Duisburg-Essen;", "aff_domain": "uni-due.de;uni-due.de;uni-duisburg-essen.de;uni-duisburg-essen.de;", "position": "PhD student;PhD student;PhD student;Junior Research Group Lead;", "bibtex": "@inproceedings{\nyoussef2023give,\ntitle={Give Me the Facts! A Survey on Factual Knowledge Probing in Pre-trained Language Models},\nauthor={Paul Youssef and Osman Alperen Kora{\\c{s}} and Meijie Li and J{\\\"o}rg Schl{\\\"o}tterer and Christin Seifert},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0kseDcA5Nm}\n}", "github": "", "project": "", "reviewers": "ASH9;nj8g;avt8", "site": "https://openreview.net/forum?id=0kseDcA5Nm", "pdf_size": 0, "rating": "2;2;2", "confidence": "1;1;4", "excitement": "3;4;3", "reproducibility": "4;0;5", "correctness": "4;3;3", "rating_avg": 2.0, "confidence_avg": 2.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0004-4953-4553;;;0000-0002-3678-0390;", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Duisburg-Essen", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-due.de", "aff_unique_abbr": "UDE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "id": "0lE7w8RJDw", "title": "Learning Knowledge-Enhanced Contextual Language Representations for Domain Natural Language Understanding", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Knowledge-Enhanced Pre-trained Language Models (KEPLMs) improve the performance of various downstream NLP tasks by injecting knowledge facts from large-scale Knowledge Graphs (KGs). However, existing methods for pre-training KEPLMs with relational triples are difficult to be adapted to close domains due to the lack of sufficient domain graph semantics. In this paper, we propose a Knowledge-enhanced language representation learning framework for various closed domains (KANGAROO) via capturing the implicit graph structure among the entities. Specifically, since the entity coverage rates of closed-domain KGs can be relatively low and may exhibit the global sparsity phenomenon for knowledge injection, we consider not only the shallow relational representations of triples but also the hyperbolic embeddings of deep hierarchical entity-class structures for effective knowledge fusion. Moreover, as two closed-domain entities under the same entity-class often havel locally dense neighbor subgraphs counted by max point biconnected component, we further propose a data augmentation strategy based on contrastive learning over subgraphs to construct hard negative samples of higher quality. It makes the underlying KELPMs better distinguish the semantics of these neighboring entities to further complement the global semantic sparsity. In the experiments, we evaluate KANGAROO over various knowledge-aware and general NLP tasks in both full and few-shot learning settings, outperforming various KEPLM training paradigms performance in closed-domains significantly.", "keywords": "Closed-domain;Pre-trained Language Model;Knowledge Graph", "primary_area": "", "supplementary_material": "", "author": "Taolin Zhang;Ruyao Xu;Chengyu Wang;Zhongjie Duan;Cen Chen;Minghui Qiu;Dawei Cheng;Xiaofeng He;Weining Qian", "authorids": "~Taolin_Zhang2;~Ruyao_Xu1;~Chengyu_Wang1;~Zhongjie_Duan1;~Cen_Chen1;~Minghui_Qiu1;~Dawei_Cheng1;~Xiaofeng_He2;~Weining_Qian1", "gender": "F;M;M;F;M;M;M;;M", "homepage": "https://github.com/RheaRia;https://chywang.github.io/;https://github.com/Artiprocher;https://sites.google.com/site/chencenpersonalwebsite/;https://sites.google.com/site/qiumh0727/;http://cs1.tongji.edu.cn/~dawei/;;;", "dblp": ";135/5147-1;;152/6215-1.html;132/3541;135/6864;;55/3364;270/2482-1", "google_scholar": ";_AVfRnQAAAAJ;;https://scholar.google.com.sg/citations?user=3Mn4S9UAAAAJ;https://scholar.google.com.sg/citations?user=xcqJyMgAAAAJ;4UD20ukAAAAJ;;;06Ctg4UAAAAJ", "or_profile": "~Ruyao_Xu1;~Chengyu_Wang1;~Zhongjie_Duan1;~Cen_Chen1;~Minghui_Qiu1;~Dawei_Cheng1;~Xiaofeng_He2;~Weining_Qian1;~taolin_zhang1", "aff": "East China Normal University;Alibaba Group;East China Normal University;East China Normal University;ByteDance;Tongji University;East China Normal University;East China Normal University;East China Normal University", "aff_domain": "ecnu.edu.cn;alibaba-inc.com;ecnu.edu.cn;dase.ecnu.edu.cn;bytedance.com;tongji.edu.cn;ecnu.edu.cn;ecnu.edu.cn;ecnu.edu.cn", "position": "MS student;Researcher;PhD student;Associate Professor;Researcher;Associate Professor;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nzhang2023learning,\ntitle={Learning Knowledge-Enhanced Contextual Language Representations for Domain Natural Language Understanding},\nauthor={Taolin Zhang and Ruyao Xu and Chengyu Wang and Zhongjie Duan and Cen Chen and Minghui Qiu and Dawei Cheng and Xiaofeng He and Weining Qian},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0lE7w8RJDw}\n}", "github": "", "project": "", "reviewers": "fcFt;Z6FT;m3BN", "site": "https://openreview.net/forum?id=0lE7w8RJDw", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;4", "excitement": "4;4;4", "reproducibility": "3;4;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-0325-1705;;0000-0002-5877-7387;0000-0002-6911-348X;;", "linkedin": ";;;;;;;;", "aff_unique_index": "0;1;0;0;2;3;0;0;0", "aff_unique_norm": "East China Normal University;Alibaba Group;ByteDance;Tongji University", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.ecnu.edu.cn;https://www.alibaba.com;https://www.bytedance.com;https://www.tongji.edu.cn", "aff_unique_abbr": "ECNU;Alibaba;ByteDance;Tongji", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "0n92zm014A", "title": "Self-ICL: Zero-Shot In-Context Learning with Self-Generated Demonstrations", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) have exhibited striking in-context learning (ICL) ability to adapt to target tasks with a few input-output demonstrations.\nFor better ICL, different methods are proposed to select representative demonstrations from existing training corpora.\nHowever, such settings are not aligned with real-world practices, as end-users usually query LMs without access to demonstration pools.\nIn this work, we introduce Self-ICL---a simple framework which bootstraps LMs' intrinsic capabilities to perform zero-shot ICL.\nGiven a test input, Self-ICL first prompts the model to generate pseudo-inputs.\nNext, the model predicts pseudo-labels for the pseudo-inputs via zero-shot prompting.\nFinally, we perform ICL for the test input with the pseudo-input-label pairs as demonstrations.\nEvaluation on 23 BIG-Bench Hard tasks shows Self-ICL outperforms zero-shot baselines on both average accuracy and head-to-head comparison.\nMoreover, with zero-shot chain-of-thought, Self-ICL achieves results comparable to using real demonstrations.\nAdditionally, we conduct a range of analyses to validate Self-ICL's effectiveness and provide insights for its behaviors under different settings.", "keywords": "in-context learning;zero-shot;bootstrapping", "primary_area": "", "supplementary_material": "", "author": "Wei-Lin Chen;Cheng-Kuang Wu;Yun-Nung Chen;Hsin-Hsi Chen", "authorids": "~Wei-Lin_Chen1;~Cheng-Kuang_Wu1;~Yun-Nung_Chen1;~Hsin-Hsi_Chen2", "gender": ";M;M;F", "homepage": "https://wlchen0206.github.io/;https://brian-ckwu.github.io/;http://nlg.csie.ntu.edu.tw/advisor.php;http://vivianchen.idv.tw", "dblp": "72/7187;88/415;84/3130.html;04/9878", "google_scholar": "https://scholar.google.com.tw/citations?user=Hrbne1wAAAAJ;hc_e7rsAAAAJ;CRth4q4AAAAJ;https://scholar.google.com.tw/citations?user=jQLg-_UAAAAJ", "or_profile": "~Wei-Lin_Chen1;~Cheng-Kuang_Wu1;~Hsin-Hsi_Chen2;~Vivian_Chen1", "aff": "National Taiwan University;National Taiwan University;National Taiwan University;Department of Computer Science and Informational Engineering, National Taiwan University", "aff_domain": "ntu.edu.tw;csie.ntu.edu.tw;ntu.edu.tw;csie.ntu.edu.tw", "position": "MS student;MS student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nchen2023selficl,\ntitle={Self-{ICL}: Zero-Shot In-Context Learning with Self-Generated Demonstrations},\nauthor={Wei-Lin Chen and Cheng-Kuang Wu and Yun-Nung Chen and Hsin-Hsi Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0n92zm014A}\n}", "github": "", "project": "", "reviewers": "aCDP;EFGJ;B1V1", "site": "https://openreview.net/forum?id=0n92zm014A", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;3", "excitement": "3;3;4", "reproducibility": "4;4;4", "correctness": "4;3;3", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-0740-0846;0000-0001-9757-9423;", "linkedin": ";cheng-kuang-wu-062214219/;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "National Taiwan University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.tw", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "0sDieI5GJh", "title": "QUADRo: Dataset and Models for QUestion-Answer Database Retrieval", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "An effective approach to design automated Question Answering (QA) systems is to efficiently retrieve answers from pre-computed databases containing question/answer pairs. \nOne of the main challenges to this design is the lack of training/testing data. Existing resources are limited in size and topics and either do not consider answers (question-question similarity only) or their quality in the annotation process. \nTo fill this gap, we introduce a novel open-domain annotated resource to train and evaluate models for this task. The resource consists of 15,211 input questions. Each question is paired with 30 similar question/answer pairs, resulting in a total of 443,000 annotated examples. The binary label associated with each pair indicates the relevance with respect to the input question. \nFurthermore, we report extensive experimentation to test the quality and properties of our resource with respect to various key aspects of QA systems, including answer relevance, training strategies, and models input configuration.", "keywords": "question answering;semantic similarity;nlp application;question answering database;question answering resources;question ranking and retrieval", "primary_area": "", "supplementary_material": "", "author": "Stefano Campese;Ivano Lauriola;Alessandro Moschitti", "authorids": "~Stefano_Campese1;~Ivano_Lauriola1;~Alessandro_Moschitti2", "gender": "M;M;M", "homepage": ";;http://disi.unitn.it/moschitti/", "dblp": "240/3488;;54/2140.html", "google_scholar": "BKtmswoAAAAJ;P74hLDIAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Stefano_Campese1;~Ivano_Lauriola1;~Alessandro_Moschitti2", "aff": "University of Trento;Amazon AI;Amazon AGI", "aff_domain": "unitn.it;amazon.com;amazon.com", "position": "PhD student;Researcher;Principal Researcher", "bibtex": "@inproceedings{\ncampese2023quadro,\ntitle={{QUADR}o: Dataset and Models for {QU}estion-Answer Database Retrieval},\nauthor={Stefano Campese and Ivano Lauriola and Alessandro Moschitti},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0sDieI5GJh}\n}", "github": "", "project": "", "reviewers": "XhtM;pEq3;VGzs", "site": "https://openreview.net/forum?id=0sDieI5GJh", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;4;4", "excitement": "4;4;3", "reproducibility": "2;3;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-2216-8034", "linkedin": ";;alessandro-moschitti-10999a4/", "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Trento;Amazon", "aff_unique_dep": ";Amazon AI", "aff_unique_url": "https://www.unitn.it;https://www.amazon.com", "aff_unique_abbr": "UniTN;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Italy;United States" }, { "id": "0tEed0ZiFX", "title": "Learning Semantic Role Labeling from Compatible Label Sequences", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Semantic role labeling (SRL) has multiple disjoint label sets, e.g., VerbNet and PropBank. Creating these datasets is challenging, therefore a natural question is how to use each one to help the other. Prior work has shown that cross-task interaction helps, but only explored multitask learning so far. A common issue with multi-task setup is that argument sequences are still separately decoded, running the risk of generating structurally inconsistent label sequences (as per lexicons like Semlink). In this paper, we eliminate such issue with a framework that jointly models VerbNet and PropBank labels as one sequence. In this setup, we show that enforcing Semlink constraints during decoding constantly improves the overall F1. With special input constructions, our joint model infers VerbNet arguments from given PropBank arguments with over 99 F1. For learning, we propose a constrained marginal model that learns with knowledge defined in Semlink to further benefit from the large amounts of PropBank-only data. On the joint benchmark based on CoNLL05, our models achieve state-of-the-art F1's, outperforming the prior best in-domain model by 3.5 (VerbNet) and 0.8 (PropBank). For out-of-domain generalization, our models surpass the prior best by 3.4 (VerbNet) and 0.2 (PropBank).", "keywords": "SRL", "primary_area": "", "supplementary_material": "", "author": "Tao Li;Ghazaleh Kazeminejad;Susan Windisch Brown;Vivek Srikumar;Martha Palmer", "authorids": "~Tao_Li11;~Ghazaleh_Kazeminejad1;~Susan_Windisch_Brown1;~Vivek_Srikumar1;~Martha_Palmer1", "gender": "M;F;;;F", "homepage": "https://www.cs.utah.edu/~tli/;;https://verbs.colorado.edu/brownsw/;https://svivek.com;https://www.colorado.edu/faculty/palmer-martha/", "dblp": "75/4601-39;;46/8156;37/44;p/MarthaStonePalmer.html", "google_scholar": "C1-ACVEAAAAJ;vC59Y2AAAAAJ;https://scholar.google.com/scholar?hl=en;TsTUfOIAAAAJ;pxc_-XYAAAAJ", "or_profile": "~Tao_Li11;~Ghazaleh_Kazeminejad1;~Susan_Windisch_Brown1;~Vivek_Srikumar1;~Martha_Palmer1", "aff": "Google DeepMind;University of Colorado at Boulder;University of Colorado at Boulder;University of Utah;University of Colorado at Boulder", "aff_domain": "google.com;colorado.edu;colorado.edu;utah.edu;colorado.edu", "position": "Researcher;Researcher;Researcher;Associate Professor;Researcher", "bibtex": "@inproceedings{\nli2023learning,\ntitle={Learning Semantic Role Labeling from Compatible Label Sequences},\nauthor={Tao Li and Ghazaleh Kazeminejad and Susan Windisch Brown and Vivek Srikumar and Martha Palmer},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0tEed0ZiFX}\n}", "github": "", "project": "", "reviewers": "wA9b;bCiH;Evjp", "site": "https://openreview.net/forum?id=0tEed0ZiFX", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "3;3;3", "reproducibility": "4;5;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0001-9864-6974", "linkedin": ";ghazaleh-kazeminejad/;susan-brown-93bb379/;;https://www.linkedin.com/feed/?trk=homepage-basic_signin-form_submit", "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "Google;University of Colorado;University of Utah", "aff_unique_dep": "Google DeepMind;;", "aff_unique_url": "https://deepmind.com;https://www.colorado.edu;https://www.utah.edu", "aff_unique_abbr": "DeepMind;CU;Utah", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Boulder", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "0u3O7Ju21x", "title": "Calibrated Seq2seq Models for Efficient and Generalizable Ultra-fine Entity Typing", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Ultra-fine entity typing plays a crucial role in information extraction by predicting fine-grained semantic types for entity mentions in text. However, this task poses significant challenges due to the massive number of entity types in the output space. The current state-of-the-art approaches, based on standard multi-label classifiers or cross-encoder models, suffer from poor generalization performance or inefficient inference speed. In this paper, we present CASENT, a seq2seq model designed for ultra-fine entity typing that predicts ultra-fine types with calibrated confidence scores. Our model takes an entity mention as input and employs constrained beam search to generate multiple types autoregressively. The raw sequence probabilities associated with the predicted types are then transformed into confidence scores using a novel calibration method. We conduct extensive experiments on the UFET dataset which contains over $10k$ types. Our method outperforms the previous state-of-the-art in terms of F1 score and calibration error, while achieving an inference speedup of over $50$ times. Additionally, we demonstrate the generalization capabilities of our model by evaluating it in zero-shot and few-shot settings on five specialized domain entity typing datasets that are unseen during training. Remarkably, our model outperforms large language models with 10 times more parameters in the zero-shot setting, and when fine-tuned on 50 examples, it significantly outperforms ChatGPT on all datasets.", "keywords": "entity typing;information extraction;probability calibration", "primary_area": "", "supplementary_material": "", "author": "Yanlin Feng;Adithya Pratapa;David R Mortensen", "authorids": "~Yanlin_Feng1;~Adithya_Pratapa1;~David_R_Mortensen1", "gender": ";M;M", "homepage": ";https://adithya7.github.io/;http://www.cs.cmu.edu/~dmortens/", "dblp": ";222/9370;180/5443", "google_scholar": ";BAT6abIAAAAJ;https://scholar.google.com/citations?authuser=1", "or_profile": "~Yanlin_Feng1;~Adithya_Pratapa1;~David_R_Mortensen1", "aff": ";Carnegie Mellon University;Carnegie Mellon University", "aff_domain": ";cmu.edu;cmu.edu", "position": ";PhD student;Systems Scientist", "bibtex": "@inproceedings{\nfeng2023calibrated,\ntitle={Calibrated Seq2seq Models for Efficient and Generalizable Ultra-fine Entity Typing},\nauthor={Yanlin Feng and Adithya Pratapa and David R Mortensen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=0u3O7Ju21x}\n}", "github": "", "project": "", "reviewers": "NPCS;hY4H;rRPt", "site": "https://openreview.net/forum?id=0u3O7Ju21x", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;5", "excitement": "3;2;4", "reproducibility": "2;4;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-3927-6851", "linkedin": ";;davidrmortensen/", "aff_unique_index": "0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "106xRbVC4k", "title": "Revisiting Entropy Rate Constancy in Text", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The uniform information density (UID) hypothesis states that humans tend to distribute information roughly evenly across an utterance or discourse. Early evidence in support of the UID hypothesis came from Genzel and Charniak (2002), which proposed an entropy rate constancy principle based on the probability of English text under $n$-gram language models. We re-evaluate the claims of Genzel and Charniak (2002) with neural language models, failing to find clear evidence in support of entropy rate constancy. We conduct a range of experiments across datasets, model sizes, and languages and discuss implications for the uniform information density hypothesis and linguistic theories of efficient communication more broadly.", "keywords": "Uniform Information Density;Entropy Rate;Large Language Models;Linguistic Theories", "primary_area": "", "supplementary_material": "", "author": "Vivek Verma;Nicholas Tomlin;Dan Klein", "authorids": "~Vivek_Verma2;~Nicholas_Tomlin1;~Dan_Klein1", "gender": "M;M;", "homepage": "https://vivek.lol;https://people.eecs.berkeley.edu/~nicholas_tomlin/;http://people.eecs.berkeley.edu/~klein/", "dblp": ";;", "google_scholar": "D40otBIAAAAJ;zV5vhUcAAAAJ;", "or_profile": "~Vivek_Verma2;~Nicholas_Tomlin1;~Dan_Klein1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu", "position": "Undergrad student;PhD student;Full Professor", "bibtex": "@inproceedings{\nverma2023revisiting,\ntitle={Revisiting Entropy Rate Constancy in Text},\nauthor={Vivek Verma and Nicholas Tomlin and Dan Klein},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=106xRbVC4k}\n}", "github": "", "project": "", "reviewers": "sMtY;vesJ;8v3T", "site": "https://openreview.net/forum?id=106xRbVC4k", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;2", "excitement": "4;4;3", "reproducibility": "4;2;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;dan-klein/", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "10iYooV68H", "title": "A Training-Free Debiasing Framework with Counterfactual Reasoning for Conversational Emotion Detection", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Unintended dataset biases typically exist in existing Emotion Recognition in Conversations (ERC) datasets, including label bias, where models favor the majority class due to imbalanced training data, as well as the speaker and neutral word bias, where models make unfair predictions because of excessive correlations between specific neutral words or speakers and classes. However, previous studies in ERC generally focus on capturing context-sensitive and speaker-sensitive dependencies, ignoring the unintended dataset biases of data, which hampers the generalization and fairness in ERC. To address this issue, we propose a Training-Free Debiasing framework (TFD) that operates during prediction without additional training. To ensure compatibility with various ERC models, it does not balance data or modify the model structure. Instead, TFD extracts biases from the model by generating counterfactual utterances and contexts and mitigates them using simple yet empirically robust element-wise subtraction operations. Extensive experiments on three public datasets demonstrate that TFD effectively improves generalization ability and fairness across different ERC models.", "keywords": "Conversational Emotion Detection;Counterfactual Reasoning;Debiasing", "primary_area": "", "supplementary_material": "", "author": "Geng Tu;Ran Jing;Bin Liang;Min Yang;Kam-Fai Wong;Ruifeng Xu", "authorids": "~Geng_Tu2;~Ran_Jing1;~Bin_Liang6;~Min_Yang6;~Kam-Fai_Wong2;~Ruifeng_Xu1", "gender": "M;M;M;F;M;M", "homepage": ";https://github.com/stddddd;https://binliang-nlp.github.io/;https://minyang.me/;http://www.se.cuhk.edu.hk/~kfwong;http://faculty.hitsz.edu.cn/xuruifeng", "dblp": ";;71/6053-4;02/1640-7;w/KamFaiWong;93/5407-1", "google_scholar": "https://scholar.google.com.hk/citations?user=OvI-eTkAAAAJ;;djpQeLEAAAAJ;_wop6KgAAAAJ;;mObXnNIAAAAJ", "or_profile": "~Geng_Tu2;~Ran_Jing1;~Bin_Liang6;~Min_Yang6;~Kam-Fai_Wong2;~Ruifeng_Xu1", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;The Chinese University of Hong Kong;Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences;The Chinese University of Hong Kong;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;hit.edu.cn;cuhk.edu.hk;siat.ac.cn;cuhk.edu.hk;hit.edu.cn", "position": "PhD student;Undergrad student;Postdoc;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\ntu2023a,\ntitle={A Training-Free Debiasing Framework with Counterfactual Reasoning for Conversational Emotion Detection},\nauthor={Geng Tu and Ran Jing and Bin Liang and Min Yang and Kam-Fai Wong and Ruifeng Xu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=10iYooV68H}\n}", "github": "", "project": "", "reviewers": "p6EJ;LsJC;X5kt", "site": "https://openreview.net/forum?id=10iYooV68H", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;3", "excitement": "4;4;4", "reproducibility": "3;3;4", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-7234-1347;;0000-0002-9427-5659;0000-0002-4009-5679", "linkedin": ";;;;;", "aff_unique_index": "0;0;1;2;1;0", "aff_unique_norm": "Harbin Institute of Technology;Chinese University of Hong Kong;Chinese Academy of Sciences", "aff_unique_dep": ";;Shenzhen Institutes of Advanced Technology", "aff_unique_url": "http://www.hit.edu.cn/;https://www.cuhk.edu.hk;http://www.cas.cn", "aff_unique_abbr": "HIT;CUHK;CAS", "aff_campus_unique_index": "0;0;1;2;1;0", "aff_campus_unique": "Harbin;Hong Kong SAR;Shenzhen", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "14WRhMNq7H", "title": "MolCA: Molecular Graph-Language Modeling with Cross-Modal Projector and Uni-Modal Adapter", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Language Models (LMs) have demonstrated impressive molecule understanding ability on various 1D text-related tasks. However, they inherently lack 2D graph perception \u2014 a critical ability of human professionals in comprehending molecules' topological structures. To bridge this gap, we propose MolCA: Molecular Graph-Language Modeling with Cross-Modal Projector and Uni-Modal Adapter. MolCA enables an LM (i.e., Galactica) to understand both text- and graph-based molecular contents via the cross-modal projector. Specifically, the cross-modal projector is implemented as a Q-Former to connect a graph encoder's representation space and an LM's text space. Further, MolCA employs a uni-modal adapter (i.e., LoRA) for the LM's efficient adaptation to downstream tasks. Unlike previous studies that couple an LM with a graph encoder via cross-modal contrastive learning, MolCA retains the LM's ability of open-ended text generation and augments it with 2D graph information. To showcase its effectiveness, we extensively benchmark MolCA on tasks of molecule captioning, IUPAC name prediction, and molecule-text retrieval, on which MolCA significantly outperforms the baselines.", "keywords": "Molecular Language Modeling;Cross-Modal Alignment;Molecule Captioning;Molecule-Text Retrieval", "primary_area": "", "supplementary_material": "", "author": "Zhiyuan Liu;Sihang Li;Yanchen Luo;Hao Fei;Yixin Cao;Kenji Kawaguchi;Xiang Wang;Tat-Seng Chua", "authorids": "~Zhiyuan_Liu5;~Sihang_Li1;~Yanchen_Luo1;~Hao_Fei1;~Yixin_Cao2;~Kenji_Kawaguchi1;~Xiang_Wang6;~Tat-Seng_Chua2", "gender": "M;;M;M;M;;M;", "homepage": "https://acharkq.github.io/;;https://github.com/lyc0930;https://haofei.vip/;https://sites.google.com/view/yixin-homepage;https://ml.comp.nus.edu.sg/#members;https://github.com/xiangwang1223;", "dblp": "53/3245-10;;359/3305;81/3569-1;20/8038-2;;31/2864-10;", "google_scholar": "https://scholar.google.com.sg/citations?user=zF0AH64AAAAJ;;e5SeNbMAAAAJ;YGDX46AAAAAJ;https://scholar.google.co.uk/citations?user=CnhTvdoAAAAJ;aLl3rYoAAAAJ;https://scholar.google.com.sg/citations?user=HdhaQB0AAAAJ;", "or_profile": "~Zhiyuan_Liu5;~Sihang_Li1;~Yanchen_Luo1;~Hao_Fei1;~Yixin_Cao2;~Kenji_Kawaguchi1;~Xiang_Wang6;~Tat-Seng_Chua2", "aff": "National University of Singapore;;University of Science and Technology of China;National University of Singapore;Singapore Management University;National University of Singapore;University of Science and Technology of China;", "aff_domain": "nus.edu.sg;;ustc.edu.cn;nus.edu.sg;smu.edu.sg;nus.edu;ustc.edu.cn;", "position": "PhD student;;PhD student;Postdoc;Assistant Professor;Presidential Young Professor;Full Professor;", "bibtex": "@inproceedings{\nliu2023molca,\ntitle={Mol{CA}: Molecular Graph-Language Modeling with Cross-Modal Projector and Uni-Modal Adapter},\nauthor={Zhiyuan Liu and Sihang Li and Yanchen Luo and Hao Fei and Yixin Cao and Kenji Kawaguchi and Xiang Wang and Tat-Seng Chua},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=14WRhMNq7H}\n}", "github": "", "project": "", "reviewers": "Cn2f;JBDY;YfAf", "site": "https://openreview.net/forum?id=14WRhMNq7H", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;3", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "4;4;5", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0009-0009-2637-176X;0000-0003-3026-6347;;;0000-0002-6148-6329;", "linkedin": ";;;;;;;", "aff_unique_index": "0;1;0;2;0;1", "aff_unique_norm": "National University of Singapore;University of Science and Technology of China;Singapore Management University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;http://www.ustc.edu.cn;https://www.smu.edu.sg", "aff_unique_abbr": "NUS;USTC;SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;1", "aff_country_unique": "Singapore;China" }, { "id": "16ZOs6YPDT", "title": "Variance Matters: Detecting Semantic Differences without Corpus/Word Alignment", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In this paper, we propose methods for discovering semantic differences in words appearing in two corpora. The key idea is to measure the coverage of meanings of a word in a corpus through the norm of its mean word vector, which is equivalent to examining a kind of variance of the word vector distribution. The proposed methods do not require alignments between words and/or corpora for comparison that previous methods do. All they require are to compute variance (or norms of mean word vectors) for each word type. Nevertheless, they rival the best-performing system in the SemEval-2020 Task 1. In addition, they are (i) robust for the skew in corpus sizes; (ii) capable of detecting semantic differences in infrequent words; and (iii) effective in pinpointing word instances that have a meaning missing in one of the two corpora under comparison. We show these advantages for historical corpora and also for native/non-native English corpora.", "keywords": "Semantic difference;semantic shift;word vectors;variance;concentration parameter", "primary_area": "", "supplementary_material": "", "author": "Ryo Nagata;Hiroya Takamura;Naoki Otani;Yoshifumi Kawasaki", "authorids": "~Ryo_Nagata1;~Hiroya_Takamura1;~Naoki_Otani2;~Yoshifumi_Kawasaki1", "gender": ";M;M;M", "homepage": ";;http://www.tufs.ac.jp/research/researcher/people/otani_naoki.html;https://researchmap.jp/16211665/?lang=en", "dblp": ";75/3612;;184/8540.html", "google_scholar": ";o57RFqgAAAAJ;;OEyLrBYAAAAJ", "or_profile": "~Ryo_Nagata1;~Hiroya_Takamura1;~Naoki_Otani2;~Yoshifumi_Kawasaki1", "aff": ";AIST, National Institute of Advanced Industrial Science and Technology;Tokyo University of Foreign Studies;The University of Tokyo", "aff_domain": ";aist.go.jp;tufs.ac.jp;u-tokyo.ac.jp", "position": ";Researcher;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nnagata2023variance,\ntitle={Variance Matters: Detecting Semantic Differences without Corpus/Word Alignment},\nauthor={Ryo Nagata and Hiroya Takamura and Naoki Otani and Yoshifumi Kawasaki},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=16ZOs6YPDT}\n}", "github": "", "project": "", "reviewers": "TMju;3y58;Nr3G", "site": "https://openreview.net/forum?id=16ZOs6YPDT", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;3", "excitement": "3;4;4", "reproducibility": "3;4;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 9, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-3244-8294;;0000-0002-1100-474X", "linkedin": ";hiroya-takamura-7125b832/;;", "aff_unique_index": "0;1;2", "aff_unique_norm": "National Institute of Advanced Industrial Science and Technology;Tokyo University of Foreign Studies;University of Tokyo", "aff_unique_dep": ";;", "aff_unique_url": "https://www.aist.go.jp;https://www.tufs.ac.jp;https://www.u-tokyo.ac.jp", "aff_unique_abbr": "AIST;TUFS;UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "id": "18skb5S2Gv", "title": "Nearest Neighbor Machine Translation is Meta-Optimizer on Output Projection Layer", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Nearest Neighbor Machine Translation ($k$NN-MT) has achieved great success in domain adaptation tasks by integrating pre-trained Neural Machine Translation (NMT) models with domain-specific token-level retrieval. However, the reasons underlying its success have not been thoroughly investigated. \nIn this paper, we comprehensively analyze $k$NN-MT through theoretical and empirical studies. Initially, we provide new insights into the working mechanism of $k$NN-MT as an efficient technique to implicitly execute gradient descent on the output projection layer of NMT, indicating that it is a specific case of model fine-tuning. Subsequently, we conduct multi-domain experiments and word-level analysis to examine the differences in performance between $k$NN-MT and entire-model fine-tuning. Our findings suggest that: ($i$) Incorporating $k$NN-MT with adapters yields comparable translation performance to fine-tuning on in-domain test sets, while achieving better performance on out-of-domain test sets; \n($ii$) Fine-tuning significantly outperforms $k$NN-MT on the recall of in-domain low-frequency words, but this gap could be bridged by optimizing the context representations with additional adapter layers.", "keywords": "Nearest Neighbor Machine Translation;meta-optimization;domain adaptation;Neural Machine Translation", "primary_area": "", "supplementary_material": "", "author": "Ruize Gao;Zhirui Zhang;Yichao Du;Lemao Liu;Rui Wang", "authorids": "~Ruize_Gao2;~Zhirui_Zhang1;~Yichao_Du1;~Lemao_Liu3;~Rui_Wang10", "gender": "M;M;M;M;M", "homepage": "https://ruizgao.github.io/;;https://lemaoliu.github.io/homepage/;https://wangruinlp.github.io/;", "dblp": ";202/1838;41/10887.html;w/RuiWang15;271/6727", "google_scholar": ";C8Ylo7sAAAAJ;;oTU0v5IAAAAJ;UC4wSP0AAAAJ", "or_profile": "~Ruize_Gao2;~Zhirui_Zhang1;~lemao_liu1;~Rui_Wang7;~Du_Yichao1", "aff": "Shanghai Jiaotong University;Tencent AI Lab;Tencent;Shanghai Jiaotong University;University of Science and Technology of China", "aff_domain": "sjtu.edu.cn;tencent.com;tencent.com;sjtu.edu.cn;ustc.edu.cn", "position": "MS student;Senior Researcher;Researcher;Associate Professor;PhD student", "bibtex": "@inproceedings{\ngao2023nearest,\ntitle={Nearest Neighbor Machine Translation is Meta-Optimizer on Output Projection Layer},\nauthor={Ruize Gao and Zhirui Zhang and Yichao Du and Lemao Liu and Rui Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=18skb5S2Gv}\n}", "github": "", "project": "", "reviewers": "QnkT;4kop;Gf9v", "site": "https://openreview.net/forum?id=18skb5S2Gv", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "4;3;3", "reproducibility": "3;4;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "my-orcid?orcid=0000-0003-4500-2459;;;0000-0001-8007-2503;", "linkedin": ";;;;", "aff_unique_index": "0;1;1;0;2", "aff_unique_norm": "Shanghai Jiao Tong University;Tencent;University of Science and Technology of China", "aff_unique_dep": ";Tencent AI Lab;", "aff_unique_url": "https://www.sjtu.edu.cn;https://ai.tencent.com;http://www.ustc.edu.cn", "aff_unique_abbr": "SJTU;Tencent AI Lab;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "19sGqVUxQw", "title": "Inverse Scaling Can Become U-Shaped", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Scaling up language models has been empirically shown to improve performance on a wide range of downstream tasks. However, if we were to observe worse performance as a function of scale (\"inverse scaling\") on certain tasks, this would indicate that scaling can also encourage behaviors that are misaligned with human preferences. The Inverse Scaling Prize (McKenzie et al. 2023) identified eleven such inverse scaling tasks, evaluated on models of up to 280B parameters and up to 500 zettaFLOPs of training compute. This paper takes a closer look at these inverse scaling tasks. In this paper, we evaluate models of up to 540B parameters, trained on five times more compute than those evaluated in the Inverse Scaling Prize. With this increased range of model sizes and compute, only four out of the eleven tasks remain inverse scaling. Six tasks exhibit \"U-shaped scaling\", where performance decreases up to a certain size, and then increases again up to the largest model evaluated (the one remaining task displays positive scaling). In addition, 1-shot examples and chain-of-thought can help mitigate undesirable scaling patterns even further. U-shaped scaling suggests that the inverse scaling trend observed in McKenzie et al. (2023) may not continue to hold for larger models, which we attribute to the presence of distractor tasks that only sufficiently large models can avoid.", "keywords": "inverse scaling;scaling;language models;evaluation", "primary_area": "", "supplementary_material": "", "author": "Jason Wei;Najoung Kim;Yi Tay;Quoc V Le", "authorids": "~Jason_Wei1;~Najoung_Kim1;~Yi_Tay1;~Quoc_V_Le1", "gender": "M;F;M;M", "homepage": "https://jasonwei20.github.io;https://najoungkim.github.io;http://yitay.net;", "dblp": "02/11220.html;194/1249;;29/6166", "google_scholar": ";Uod-_B8AAAAJ;VBclY_cAAAAJ;", "or_profile": "~Jason_Wei1;~Najoung_Kim1;~Yi_Tay1;~Quoc_V_Le1", "aff": "OpenAI;Google;Google;Google", "aff_domain": "openai.com;google.com;google.com;google.com", "position": "Researcher;Researcher;Research Scientist;Scientist", "bibtex": "@inproceedings{\nwei2023inverse,\ntitle={Inverse Scaling Can Become U-Shaped},\nauthor={Jason Wei and Najoung Kim and Yi Tay and Quoc V Le},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=19sGqVUxQw}\n}", "github": "", "project": "", "reviewers": "Fce9;95LF;bS6X", "site": "https://openreview.net/forum?id=19sGqVUxQw", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "4;4;4", "reproducibility": "3;2;2", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;1;1;1", "aff_unique_norm": "OpenAI;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://openai.com;https://www.google.com", "aff_unique_abbr": "OpenAI;Google", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "19uudhc1s8", "title": "Analyzing Film Adaptation through Narrative Alignment", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Novels are often adapted into feature films, but the differences between the two media usually require dropping sections of the source text from the movie script. Here we study this screen adaptation process by constructing narrative alignments using the Smith-Waterman local alignment algorithm coupled with SBERT embedding distance to quantify text similarity between scenes and book units. We use these alignments to perform an automated analysis of 40 adaptations, revealing insights into the screenwriting process concerning (i) faithfulness of adaptation, (ii) importance of dialog, (iii) preservation of narrative order, and (iv) gender representation issues reflective of the Bechdel test.", "keywords": "Text Alignment;Book Movie Alignment", "primary_area": "", "supplementary_material": "", "author": "Tanzir Pial;Shahreen Salim Aunti;Charuta Pethe;Allen Kim;Steven Skiena", "authorids": "~Tanzir_Pial1;~Shahreen_Salim_Aunti1;~Charuta_Pethe1;~Allen_Kim1;~Steven_Skiena1", "gender": "M;F;;M;F", "homepage": ";https://www3.cs.stonybrook.edu/~cpethe;http://cs.stonybrook.edu/~allekim/;https://www.cs.stonybrook.edu/~skiena;", "dblp": "226/7216;;;s/StevenSkiena.html;", "google_scholar": "YUcK9-MAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?hl=en;", "or_profile": "~Tanzir_Pial1;~Charuta_Pethe1;~Allen_Kim1;~Steven_Skiena1;~Shahreen_Salim1", "aff": "State University of New York at Stony Brook;;;State University of New York at Stony Brook;, State University of New York at Stony Brook", "aff_domain": "stonybrook.edu;;;stonybrook.edu;cs.stonybrook.edu", "position": "PhD student;;;Full Professor;PhD student", "bibtex": "@inproceedings{\npial2023analyzing,\ntitle={Analyzing Film Adaptation through Narrative Alignment},\nauthor={Tanzir Pial and Shahreen Salim Aunti and Charuta Pethe and Allen Kim and Steven Skiena},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=19uudhc1s8}\n}", "github": "", "project": "", "reviewers": "CXES;ybnJ;MAg3", "site": "https://openreview.net/forum?id=19uudhc1s8", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;5", "excitement": "4;4;4", "reproducibility": "3;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "tanzir-pial/;;;;shahreen-salim-15a783131", "aff_unique_index": "0;0;0", "aff_unique_norm": "State University of New York at Stony Brook", "aff_unique_dep": "", "aff_unique_url": "https://www.stonybrook.edu", "aff_unique_abbr": "SUNY Stony Brook", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stony Brook", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "1BMj6opwbj", "title": "From Values to Opinions: Predicting Human Behaviors and Stances Using Value-Injected Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Being able to predict people's opinions on issues and behaviors in realistic scenarios can be helpful in various domains, such as politics and marketing. However, conducting large-scale surveys like the European Social Survey to solicit people's opinions on individual issues can incur prohibitive costs. Leveraging prior research showing influence of core human values on individual decisions and actions, we propose to use value-injected large language models (LLM) to predict opinions and behaviors. To this end, we present Value Injection Method (VIM), a collection of two methods---argument generation and question answering---designed to inject targeted value distributions into LLMs via fine-tuning. We then conduct a series of experiments on four tasks to test the effectiveness of VIM and the possibility of using value-injected LLMs to predict opinions and behaviors of people. We find that LLMs value-injected with variations of VIM substantially outperform the baselines. Also, the results suggest that opinions and behaviors can be better predicted using value-injected LLMs than the baseline approaches.", "keywords": "Schwartz Value Theory;Large Language Model;Human Behavior;Personality;Value Injection", "primary_area": "", "supplementary_material": "", "author": "Dongjun Kang;Joonsuk Park;Yohan Jo;JinYeong Bak", "authorids": "~Dongjun_Kang1;~Joonsuk_Park1;~Yohan_Jo1;~JinYeong_Bak2", "gender": "M;M;;M", "homepage": "https://hli.skku.edu/dongjun_kang/;http://www.joonsuk.org;https://yohanjo.github.io/;https://nosyu.kr", "dblp": ";50/9717;40/8877;22/11519", "google_scholar": ";3SPMM3oAAAAJ;xp3LGRQAAAAJ;https://scholar.google.co.kr/citations?user=oYK9Z_IAAAAJ", "or_profile": "~Dongjun_Kang1;~Joonsuk_Park1;~Yohan_Jo1;~JinYeong_Bak2", "aff": "Sung Kyun Kwan University;University of Richmond;Amazon;Sungkyunkwan University", "aff_domain": "skku.edu;richmond.edu;amazon.com;skku.edu", "position": "MS student;Assistant Professor;Applied Scientist;Assistant Professor", "bibtex": "@inproceedings{\nkang2023from,\ntitle={From Values to Opinions: Predicting Human Behaviors and Stances Using Value-Injected Large Language Models},\nauthor={Dongjun Kang and Joonsuk Park and Yohan Jo and JinYeong Bak},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=1BMj6opwbj}\n}", "github": "", "project": "", "reviewers": "uZ69;1Yni;FesA;vyUS", "site": "https://openreview.net/forum?id=1BMj6opwbj", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;3;4;4", "excitement": "4;4;3;4", "reproducibility": "4;4;3;4", "correctness": "5;3;3;4", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 3.75, "reproducibility_avg": 3.75, "correctness_avg": 3.75, "replies_avg": 13, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-1182-4836;;0000-0002-3212-5241", "linkedin": ";;;jybak/", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Sungkyunkwan University;University of Richmond;Amazon", "aff_unique_dep": ";;Amazon.com, Inc.", "aff_unique_url": "https://www.skku.edu;https://www.richmond.edu;https://www.amazon.com", "aff_unique_abbr": "SKKU;UR;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "South Korea;United States" }, { "id": "1CaBi9kEng", "title": "ScanDL: A Diffusion Model for Generating Synthetic Scanpaths on Texts", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Eye movements in reading play a crucial role in psycholinguistic research studying the cognitive mechanisms underlying human language processing. More recently, the tight coupling between eye movements and cognition has also been leveraged for language-related machine learning tasks such as the interpretability, enhancement, and pre-training of language models, as well as the inference of reader- and text-specific properties. However, scarcity of eye movement data and its unavailability at application time poses a major challenge for this line of research. Initially, this problem was tackled by resorting to cognitive models for synthesizing eye movement data. However, for the sole purpose of generating human-like scanpaths, purely data-driven machine-learning-based methods have proven to be more suitable. Following recent advances in adapting diffusion processes to discrete data, we propose ScanDL, a novel discrete sequence-to-sequence diffusion model that generates synthetic scanpaths on texts. By leveraging pre-trained word representations and jointly embedding both the stimulus text and the fixation sequence, our model captures multi-modal interactions between the two inputs. We evaluate ScanDL within- and across-dataset and demonstrate that it significantly outperforms state-of-the-art scanpath generation methods. Finally, we provide an extensive psycholinguistic analysis that underlines the model's ability to exhibit human-like reading behavior. Our implementation is made available at https://github.com/DiLi-Lab/ScanDL.", "keywords": "scanpath generation;eye movements;diffusion models;computational psycholinguistics;deep neural networks;transformer;eye tracking", "primary_area": "", "supplementary_material": "", "author": "Lena Sophia Bolliger;David Robert Reich;Patrick Haller;Deborah Noemie Jakobi;Paul Prasse;Lena Ann J\u00e4ger", "authorids": "~Lena_Sophia_Bolliger1;~David_Robert_Reich1;~Patrick_Haller1;~Deborah_Noemie_Jakobi1;~Paul_Prasse1;~Lena_Ann_J\u00e4ger1", "gender": "F;M;M;;;F", "homepage": "https://www.cl.uzh.ch/en/research-groups/digital-linguistics/people/lab-members/bolliger.html;https://david.reich.ai;https://www.cl.uzh.ch/phaller;https://www.cl.uzh.ch/en/digital-linguistics/people/lab-members/jakobi.html;https://www.uni-potsdam.de/de/cs-ml/staff/phd/prasse;https://www.cl.uzh.ch/en/research-groups/digital-linguistics.html", "dblp": ";321/1783.html;302/4394-1;;116/3028;198/0994.html", "google_scholar": ";Tc-NKJgAAAAJ;-o8WsYQAAAAJ;;https://scholar.google.de/citations?user=qAbXPJQAAAAJ;3vfyy40AAAAJ", "or_profile": "~Lena_Sophia_Bolliger1;~David_Robert_Reich1;~Patrick_Haller1;~Deborah_Noemie_Jakobi1;~Paul_Prasse1;~Lena_Ann_J\u00e4ger1", "aff": "University of Zurich;Universit\u00e4t Potsdam;University of Zurich;University of Zurich;Universit\u00e4t Potsdam;Universit\u00e4t Potsdam", "aff_domain": "uzh.ch;uni-potsdam.de;uzh.ch;uzh.ch;uni-potsdam.de;uni-potsdam.de", "position": "PhD student;PhD student;PhD student;PhD student;Postdoc;Principal Researcher", "bibtex": "@inproceedings{\nbolliger2023scandl,\ntitle={Scan{DL}: A Diffusion Model for Generating Synthetic Scanpaths on Texts},\nauthor={Lena Sophia Bolliger and David Robert Reich and Patrick Haller and Deborah Noemie Jakobi and Paul Prasse and Lena Ann J{\\\"a}ger},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=1CaBi9kEng}\n}", "github": "", "project": "", "reviewers": "iKVr;JBSW;cEYa", "site": "https://openreview.net/forum?id=1CaBi9kEng", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "4;4;3", "reproducibility": "4;4;4", "correctness": "5;3;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-5776-7235;0000-0002-3524-3788;0000-0002-8968-7587;;0000-0003-1842-3645;0000-0001-9018-9713", "linkedin": ";;halp/;deborah-jakobi/;;https://ch.linkedin.com/company/digital-linguistics-uzh", "aff_unique_index": "0;1;0;0;1;1", "aff_unique_norm": "University of Zurich;University of Potsdam", "aff_unique_dep": ";", "aff_unique_url": "https://www.unizh.ch;https://www.uni-potsdam.de", "aff_unique_abbr": "UZH;UP", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;1;1", "aff_country_unique": "Switzerland;Germany" }, { "id": "1IRFq6qdke", "title": "BanglaAbuseMeme: A Dataset for Bengali Abusive Meme Classification", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The dramatic increase in the use of social media platforms for information sharing has also fueled a steep growth in online abuse. A simple yet effective way of abusing individuals or communities is by creating memes, which often integrate an image with a short piece of text layered on top of it. Such harmful elements are in rampant use and are a threat to online safety. Hence it is necessary to develop efficient models to detect and flag abusive memes. The problem becomes more challenging in a low-resource setting (e.g., Bengali memes, i.e., images with Bengali text embedded on it) because of the absence of benchmark datasets on which AI models could be trained. In this paper we bridge this gap by building a Bengali meme dataset. To setup an effective benchmark we implement several baseline models for classifying abusive memes using this dataset. We observe that multimodal models that use both textual and visual information outperform unimodal models. Our best-performing model achieves a macro F1 score of 70.51. Finally, we perform a qualitative error analysis of the misclassified memes of the best-performing text-based, image-based and multimodal models.", "keywords": "abusive meme;low-resource language;social media", "primary_area": "", "supplementary_material": "", "author": "Mithun Das;Animesh Mukherjee", "authorids": "~Mithun_Das1;~Animesh_Mukherjee2", "gender": "M;M", "homepage": "https://das-mithun.github.io/;http://cse.iitkgp.ac.in/~animeshm", "dblp": "283/2951;m/AnimeshMukherjee.html", "google_scholar": "tebayusAAAAJ;lf7-deEAAAAJ", "or_profile": "~Mithun_Das1;~Animesh_Mukherjee1", "aff": "Indian Institute of Technology Kharagpur;Indian Institute of Technology Kharagpur", "aff_domain": "iitkgp.ac.in;iitkgp.ac.in", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\ndas2023banglaabusememe,\ntitle={BanglaAbuseMeme: A Dataset for Bengali Abusive Meme Classification},\nauthor={Mithun Das and Animesh Mukherjee},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=1IRFq6qdke}\n}", "github": "", "project": "", "reviewers": "zToY;UVRc;xN2X", "site": "https://openreview.net/forum?id=1IRFq6qdke", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;4;5", "excitement": "4;4;2", "reproducibility": "4;4;5", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 4.666666666666667, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Indian Institute of Technology Kharagpur", "aff_unique_dep": "", "aff_unique_url": "https://www.iitkgp.ac.in", "aff_unique_abbr": "IIT Kharagpur", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Kharagpur", "aff_country_unique_index": "0;0", "aff_country_unique": "India" }, { "id": "1N5Ia3KLX8", "title": "Closed Boundary Learning for Classification Tasks with the Universum Class", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The Universum class, often known as the *other* class or the*miscellaneous* class, is defined as a collection of samples that do not belong to any class of interest. It is a typical class that exists in many classification-based tasks in NLP, such as relation extraction, named entity recognition, sentiment analysis, etc. The Universum class exhibits very different properties, namely heterogeneity and lack of representativeness in training data; however, existing methods often treat the Universum class equally with the classes of interest, leading to problems such as overfitting, misclassification, and diminished model robustness. In this work, we propose a closed boundary learning method that applies closed decision boundaries to classes of interest and designates the area outside all closed boundaries in the feature space as the space of the Universum class. Specifically, we formulate closed boundaries as arbitrary shapes, propose the inter-class rule-based probability estimation for the Universum class to cater to its unique properties, and propose a boundary learning loss to adjust decision boundaries based on the balance of misclassified samples inside and outside the boundary. In adherence to the natural properties of the Universum class, our method enhances both accuracy and robustness of classification models, demonstrated by improvements on six state-of-the-art works across three different tasks. Our code is available at https://github.com/hzzhou01/Closed-Boundary-Learning.", "keywords": "classification tasks;representation learning;the miscellaneous class", "primary_area": "", "supplementary_material": "", "author": "Hanzhang Zhou;Zijian Feng;Kezhi Mao", "authorids": "~Hanzhang_Zhou1;~Zijian_Feng2;~Kezhi_Mao1", "gender": "M;M;M", "homepage": ";;https://dr.ntu.edu.sg/cris/rp/rp00158", "dblp": "295/8180;45/10114;m/KezhiMao", "google_scholar": ";;jCsRJXUAAAAJ", "or_profile": "~Hanzhang_Zhou1;~Zijian_Feng2;~Kezhi_Mao1", "aff": "Nanyang Technological University;Nanyang Technological University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nzhou2023closed,\ntitle={Closed Boundary Learning for Classification Tasks with the Universum Class},\nauthor={Hanzhang Zhou and Zijian Feng and Kezhi Mao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=1N5Ia3KLX8}\n}", "github": "", "project": "", "reviewers": "RZMD;gG1E;G4Yh", "site": "https://openreview.net/forum?id=1N5Ia3KLX8", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "4;3;3", "reproducibility": "3;4;3", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-3758-636X;0000-0003-1311-988X;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Nanyang Technological University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.sg", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "id": "1PXPP9Gzgc", "title": "BERTwich: Extending BERT\u2019s Capabilities to Model Dialectal and Noisy Text", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Real-world NLP applications often deal with nonstandard text (e.g., dialectal, informal, or misspelled text). However, language models like BERT deteriorate in the face of dialect variation or noise. How do we push BERT\u2019s modeling capabilities to encompass nonstandard text? Fine-tuning helps, but it is designed for specializing a model to a task and does not seem to bring about the deeper, more pervasive changes needed to adapt a model to nonstandard language. In this paper, we introduce the novel idea of sandwiching BERT's encoder stack between additional encoder layers trained to perform masked language modeling on noisy text. We find that our approach, paired with recent work on including character-level noise in fine-tuning data, can promote zero-shot transfer to dialectal text, as well as reduce the distance in the embedding space between words and their noisy counterparts.", "keywords": "BERT;language modeling;dialects;noisy text;fine-tuning", "primary_area": "", "supplementary_material": "", "author": "Aarohi Srivastava;David Chiang", "authorids": "~Aarohi_Srivastava1;~David_Chiang1", "gender": "F;M", "homepage": "https://nlp.nd.edu/aarohi/;https://nd.edu/~dchiang", "dblp": "322/1811;https://dblp.org/pers/hd/c/Chiang_0001:David", "google_scholar": "Mv6HFkAAAAAJ;dok0514AAAAJ", "or_profile": "~Aarohi_Srivastava1;~David_Chiang1", "aff": "University of Notre Dame;University of Notre Dame", "aff_domain": "nd.edu;nd.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nsrivastava2023bertwich,\ntitle={{BERT}wich: Extending {BERT}{\\textquoteright}s Capabilities to Model Dialectal and Noisy Text},\nauthor={Aarohi Srivastava and David Chiang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=1PXPP9Gzgc}\n}", "github": "", "project": "", "reviewers": "FYgr;FFsj;x8AR", "site": "https://openreview.net/forum?id=1PXPP9Gzgc", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;3;4", "excitement": "2;3;4", "reproducibility": "3;3;3", "correctness": "2;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-0435-4864", "linkedin": "aarohi-srivastava;", "aff_unique_index": "0;0", "aff_unique_norm": "University of Notre Dame", "aff_unique_dep": "", "aff_unique_url": "https://www.nd.edu", "aff_unique_abbr": "Notre Dame", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "1RVUxlrFJZ", "title": "Can Retriever-Augmented Language Models Reason? The Blame Game Between the Retriever and the Language Model", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Augmenting pretrained language models with retrievers has shown promise in effectively solving common NLP problems, such as language modeling and question answering. In this paper, we evaluate the strengths and weaknesses of popular retriever-augmented language models, namely kNN-LM, REALM, DPR + FiD, Contriever + ATLAS, and Contriever + Flan-T5, in reasoning over retrieved statements across different tasks. \nOur findings indicate that the simple similarity metric employed by retrievers is insufficient for retrieving all the necessary statements for reasoning. Additionally, the language models do not exhibit strong reasoning even when provided with only the required statements. \nFurthermore, when combined with imperfect retrievers, the performance of the language models becomes even worse, e.g., Flan-T5's performance drops by 28.6% when retrieving 5 statements using Contriever. While larger language models improve performance, there is still a substantial room for enhancement. Our further analysis indicates that multihop retrieve-and-read is promising for large language models like GPT-3.5, but does not generalize to other language models like Flan-T5-xxl. The code is available at https://github.com/McGill-NLP/retriever-lm-reasoning.", "keywords": "retriever-augmented language models;reasoning of language models", "primary_area": "", "supplementary_material": "", "author": "Parishad BehnamGhader;Santiago Miret;Siva Reddy", "authorids": "~Parishad_BehnamGhader1;~Santiago_Miret1;~Siva_Reddy1", "gender": "F;M;M", "homepage": "https://parishadbehnam.github.io;https://www.intel.ai/bio/santiago-miret/;http://sivareddy.in", "dblp": "334/4116;241/5030;64/8153", "google_scholar": "pw9mblYAAAAJ;HLQ_te4AAAAJ;", "or_profile": "~Parishad_BehnamGhader1;~Santiago_Miret1;~Siva_Reddy1", "aff": "McGill University - Mila;Intel;Mila, McGill University", "aff_domain": "mail.mcgill.ca;intel.com;mila.quebec", "position": "PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nbehnamghader2023can,\ntitle={Can Retriever-Augmented Language Models Reason? The Blame Game Between the Retriever and the Language Model},\nauthor={Parishad BehnamGhader and Santiago Miret and Siva Reddy},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=1RVUxlrFJZ}\n}", "github": "", "project": "", "reviewers": "cXZR;k18u;orzP;WssQ", "site": "https://openreview.net/forum?id=1RVUxlrFJZ", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;3;4", "excitement": "4;3;3;4", "reproducibility": "4;4;4;4", "correctness": "4;4;4;4", "rating_avg": 3.0, "confidence_avg": 3.75, "excitement_avg": 3.5, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-5121-3853;", "linkedin": "https://linkedin.com/in/parishadbehnam;santiago-miret/;", "aff_unique_index": "0;1;0", "aff_unique_norm": "McGill University;Intel", "aff_unique_dep": "Mila;Intel Corporation", "aff_unique_url": "https://www.mcgill.ca;https://www.intel.com", "aff_unique_abbr": "McGill;Intel", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Canada;United States" }, { "id": "1Sn1dpNaP3", "title": "Evaluating Parameter-Efficient Finetuning Approaches for Pre-trained Models on the Financial Domain", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Large-scale language models with millions, billions, or trillions of trainable parameters are becoming increasingly popular. However, they risk becoming rapidly over-parameterized and the adaptation cost of fully fine-tuning them increases significantly. Storing them becomes progressively impractical as it requires keeping a separate copy of all the fine-tuned weights for each task. By freezing all pre-trained weights during fine-tuning, parameter-efficient tuning approaches have become an appealing alternative to traditional fine-tuning. The performance of these approaches has been evaluated on common NLP tasks of the GLUE benchmark and shown to match full fine-tuning performance, however, their impact is less researched in domain-specific fields such as finance. This work compares the performance of a set of financial BERT-like models to their fully fine-tuned counterparts by leveraging different parameter-efficient tuning methods. We see that results are comparable to traditional fine-tuning while gaining in time and resource efficiency.", "keywords": "NLP;fine-tuning;parameter efficiency;financial domain", "primary_area": "", "supplementary_material": "", "author": "Isabella Olariu;Cedric Lothritz;Jacques Klein;Tegawend\u00e9 F. Bissyand\u00e9;Siwen Guo;Shohreh Haddadan", "authorids": "~Isabella_Olariu1;~Cedric_Lothritz1;~Jacques_Klein1;~Tegawend\u00e9_F._Bissyand\u00e91;~Siwen_Guo1;~Shohreh_Haddadan1", "gender": "F;M;M;M;;F", "homepage": ";;https://jacquesklein2302.github.io/;https://bissyande.github.io/;;https://shohrehhd.github.io", "dblp": ";280/0086;k/JacquesKlein;00/8006.html;;245/8736.html", "google_scholar": ";H8prvCkAAAAJ;https://scholar.google.fr/citations?user=9E_KKT4AAAAJ;t73Mqm8AAAAJ;;G7BMGQYAAAAJ", "or_profile": "~Isabella_Olariu1;~Cedric_Lothritz1;~Jacques_Klein1;~Tegawend\u00e9_F._Bissyand\u00e91;~Siwen_Guo1;~Shohreh_Haddadan1", "aff": "University of Luxemburg;University of Luxemburg;University of Luxemburg;University of Luxemburg;Zortify S. A.;Zortify", "aff_domain": "uni.lu;uni.lu;uni.lu;uni.lu;zortify.com;zortify.com", "position": "PhD student;PhD student;Full Professor;Associate Professor;Researcher;data scientist", "bibtex": "@inproceedings{\nolariu2023evaluating,\ntitle={Evaluating Parameter-Efficient Finetuning Approaches for Pre-trained Models on the Financial Domain},\nauthor={Isabella Olariu and Cedric Lothritz and Jacques Klein and Tegawend{\\'e} F. Bissyand{\\'e} and Siwen Guo and Shohreh Haddadan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=1Sn1dpNaP3}\n}", "github": "", "project": "", "reviewers": "JNPW;yiyh;7Xqg;mQ9a", "site": "https://openreview.net/forum?id=1Sn1dpNaP3", "pdf_size": 0, "rating": "2;2;2;2", "confidence": "5;4;5;4", "excitement": "2;3;2;2", "reproducibility": "4;4;3;4", "correctness": "2;2;1;4", "rating_avg": 2.0, "confidence_avg": 4.5, "excitement_avg": 2.25, "reproducibility_avg": 3.75, "correctness_avg": 2.25, "replies_avg": 14, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-5372-7970;0000-0003-4052-475X;0000-0001-7270-9869;;0000-0001-5586-5675", "linkedin": "isabella-olariu/;cedric-lothritz-35a698182/;jacques-klein-188b0b5/;;siwen-guo-9b318a129;https://linkedin.com/in/shohreh-haddadan-45803aa3", "aff_unique_index": "0;0;0;0;1;1", "aff_unique_norm": "University of Luxembourg;Zortify", "aff_unique_dep": ";", "aff_unique_url": "https://wwwen.uniluxembourg.lu;", "aff_unique_abbr": "Uni Lu;Zortify", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "Luxembourg;Unknown;" }, { "id": "1UCopEeGz7", "title": "Rationale-Enhanced Language Models are Better Continual Relation Learners", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Continual relation extraction (CRE) aims to solve the problem of catastrophic forgetting when learning a sequence of newly emerging relations. Recent CRE studies have found that catastrophic forgetting arises from the model's lack of robustness against future analogous relations. To address the issue, we introduce rationale, i.e., the explanations of relation classification results generated by Large Language Models (LLM), into CRE task. Specifically, we design the multi-task rationale tuning strategy to help the model learn current relations robustly. We also conduct contrastive rationale replay to further distinguish analogous relations. Experimental results on two standard benchmarks demonstrate that our method outperforms the state-of-the-art CRE models.", "keywords": "continual learning;relation extraction;rationale", "primary_area": "", "supplementary_material": "", "author": "Weimin Xiong;Yifan Song;Peiyi Wang;Sujian Li", "authorids": "~Weimin_Xiong1;~Yifan_Song2;~Peiyi_Wang1;~Sujian_Li1", "gender": "M;M;M;F", "homepage": "https://github.com/WeiminXiong;https://yifan-song793.github.io/;;https://pku-tangent.github.io/", "dblp": "342/9246;;236/6569.html;05/4288", "google_scholar": "UwYq5tgAAAAJ;;K0uQ3ygAAAAJ;https://scholar.google.com.tw/citations?user=RvBDhSwAAAAJ", "or_profile": "~Weimin_Xiong1;~Yifan_Song2;~Peiyi_Wang1;~Sujian_Li1", "aff": "Peking University;Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "Undergrad student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nxiong2023rationaleenhanced,\ntitle={Rationale-Enhanced Language Models are Better Continual Relation Learners},\nauthor={Weimin Xiong and Yifan Song and Peiyi Wang and Sujian Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=1UCopEeGz7}\n}", "github": "", "project": "", "reviewers": "ergu;HVe5;hR9i", "site": "https://openreview.net/forum?id=1UCopEeGz7", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;3;3", "reproducibility": "3;4;3", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "1VsVZm4DLg", "title": "All Things Considered: Detecting Partisan Events from News Media with Cross-Article Comparison", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Public opinion is shaped by the information news media provide, and that information in turn may be shaped by the ideological preferences of media outlets. But while much attention has been devoted to media bias via overt ideological language or topic selection, a more unobtrusive way in which the media shape opinion is via the strategic inclusion or omission of $\\textit{partisan events}$ that may $\\textit{support}$ one side or the other. We develop a latent variable-based framework to predict the ideology of news articles by comparing multiple articles on the same story and identifying partisan events whose inclusion or omission reveals ideology. Our experiments first validate the existence of partisan event selection, and then show that article alignment and cross-document comparison detect partisan events and article ideology better than competitive baselines. \nOur results reveal the high-level form of media bias, which is present even among mainstream media with strong norms of objectivity and nonpartisanship. Our codebase and dataset are available at https://github.com/launchnlp/ATC.", "keywords": "Partisan event detection;media bias", "primary_area": "", "supplementary_material": "", "author": "Yujian Liu;Xinliang Frederick Zhang;Kaijian Zou;Ruihong Huang;Nicholas Beauchamp;Lu Wang", "authorids": "~Yujian_Liu1;~Xinliang_Frederick_Zhang1;~Kaijian_Zou1;~Ruihong_Huang1;~Nicholas_Beauchamp1;~Lu_Wang9", "gender": "M;M;M;F;M;F", "homepage": "https://yujianll.github.io;https://web.eecs.umich.edu/~xlfzhang/;https://zkjzou.github.io/;https://people.engr.tamu.edu/huangrh/index.html;http://nickbeauchamp.com;https://web.eecs.umich.edu/~wangluxy/", "dblp": "206/8853;277/5381;;42/4811.html;220/2037;49/3800-8", "google_scholar": "rLetNLIAAAAJ;-uGCT5QAAAAJ;q2tM5CYAAAAJ;https://scholar.google.com.tw/citations?user=NU2aHWUAAAAJ;;uczqEdUAAAAJ", "or_profile": "~Yujian_Liu1;~Xinliang_Frederick_Zhang1;~Kaijian_Zou1;~Ruihong_Huang1;~Nicholas_Beauchamp1;~Lu_Wang9", "aff": "University of California, Santa Barbara;Bloomberg;University of Michigan - Ann Arbor;Texas A&M University;Northeastern University;University of Michigan", "aff_domain": "ucsb.edu;bloomberg.net;umich.edu;cse.tamu.edu;northeastern.edu;umich.edu", "position": "PhD student;Intern;PhD student;Associate Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nliu2023all,\ntitle={All Things Considered: Detecting Partisan Events from News Media with Cross-Article Comparison},\nauthor={Yujian Liu and Xinliang Frederick Zhang and Kaijian Zou and Ruihong Huang and Nicholas Beauchamp and Lu Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=1VsVZm4DLg}\n}", "github": "", "project": "", "reviewers": "9YKN;MVtQ;bwkA", "site": "https://openreview.net/forum?id=1VsVZm4DLg", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "4;4;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";frederick-x-zhang/?locale=en_US;kaijian-kai-zou-19991107/;;;", "aff_unique_index": "0;1;2;3;4;2", "aff_unique_norm": "University of California, Santa Barbara;Bloomberg;University of Michigan;Texas A&M University;Northeastern University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.ucsb.edu;https://www.bloomberg.com;https://www.umich.edu;https://www.tamu.edu;https://www.northeastern.edu", "aff_unique_abbr": "UCSB;Bloomberg;UM;TAMU;NEU", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Santa Barbara;;Ann Arbor", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "1WJoJPXwiG", "title": "FinEntity: Entity-level Sentiment Classification for Financial Texts", "track": "main", "status": "Short Main", "tldr": "", "abstract": "In the financial domain, conducting entity-level sentiment analysis is crucial for accurately assessing the sentiment directed toward a specific financial entity. To our knowledge, no publicly available dataset currently exists for this purpose. In this work, we introduce an entity-level sentiment classification dataset, called FinEntity, that annotates financial entity spans and their sentiment (positive, neutral, and negative) in financial news. We document the dataset construction process in the paper. Additionally, we benchmark several pre-trained models (BERT, FinBERT, etc.) and ChatGPT on entity-level sentiment classification. In a case study, we demonstrate the practical utility of using FinEntity in monitoring cryptocurrency markets. The data and code of FinEntity is available at https://github.com/yixuantt/FinEntity.", "keywords": "Named Entity Recognition;Sentiment Analysis;Financial NLP", "primary_area": "", "supplementary_material": "", "author": "Yixuan Tang;Yi Yang;Allen H Huang;Andy Tam;Justin Z. Tang", "authorids": "~Yixuan_Tang2;~Yi_Yang7;~Allen_H_Huang1;~Andy_Tam1;~Justin_Z._Tang1", "gender": ";;;M;", "homepage": "https://yixuantt.github.io/;http://yya518.github.io/;http://allenhuang.org;https://www.hkma.gov.hk/eng/;", "dblp": ";;;;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=Prh_dHkAAAAJ;https://scholar.google.com.hk/citations?user=dPlHYZoAAAAJ;;", "or_profile": "~Yixuan_Tang2;~Yi_Yang7;~Allen_H_Huang1;~Andy_Tam1;~Justin_Z._Tang1", "aff": ";Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;;", "aff_domain": ";ust.hk;ust.hk;;", "position": ";Assistant Professor;Associate Professor;;", "bibtex": "@inproceedings{\ntang2023finentity,\ntitle={FinEntity: Entity-level Sentiment Classification for Financial Texts},\nauthor={Yixuan Tang and Yi Yang and Allen H Huang and Andy Tam and Justin Z. Tang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=1WJoJPXwiG}\n}", "github": "", "project": "", "reviewers": "kPZc;NjN2;R2TV", "site": "https://openreview.net/forum?id=1WJoJPXwiG", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;2;4", "reproducibility": "3;4;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0006-2405-2026;0000-0001-8863-112X;0000-0001-8565-5791;;", "linkedin": "yixuan-tang-2023yixuan;;allenhhuang/;https://linkedin.com/in/tam-kiu-fai-andy-4aa55596;", "aff_unique_index": "0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "1Xht3SKAoY", "title": "ExpNote: Black-box Large Language Models are better Task Solvers with Experience Notebook", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Black-box Large Language Models (LLMs) have shown great power in solving various tasks and are considered general problem solvers. However, LLMs still fail in many specific tasks although understand the task instruction. In this paper, we focus on the problem of boosting the ability of black-box LLMs to solve downstream tasks. We propose ExpNote, an automated framework to help LLMs better adapt to unfamiliar tasks through reflecting and noting experiences from training data and retrieving them from external memory during testing. We evaluate ExpNote on multiple tasks and the experimental results demonstrate that the proposed method significantly improves the performance of black-box LLMs. The data and code are available at https://github.com/forangel2014/ExpNote.", "keywords": "large language model;self-reflection;in-context learning", "primary_area": "", "supplementary_material": "", "author": "Wangtao Sun;Xuanqing Yu;Shizhu He;Jun Zhao;Kang Liu", "authorids": "~Wangtao_Sun1;~Xuanqing_Yu1;~Shizhu_He2;~Jun_Zhao4;~Kang_Liu1", "gender": "M;M;M;M;M", "homepage": "https://github.com/forangel2014;https://seabiscuityu.github.io;https://heshizhu.github.io/;http://nlpr-web.ia.ac.cn/cip/english/~junzhao/index.html;http://www.nlpr.ia.ac.cn/cip/~liukang/index.html", "dblp": "360/6158;360/6667;136/8650;https://dblp.uni-trier.de/pid/47/2026-1.html;42/4903.html", "google_scholar": ";;zBPIt3QAAAAJ;https://scholar.google.com.hk/citations?user=HljRttwAAAAJ;DtZCfl0AAAAJ", "or_profile": "~Wangtao_Sun1;~Xuanqing_Yu1;~Shizhu_He2;~Jun_Zhao4;~Kang_Liu1", "aff": "Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of automation, Chinese academy of science;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;ia.ac.cn;ia.ac.cn;nlpr.ia.ac.cn;ia.ac.cn", "position": "PhD student;PhD student;Associate Researcher;Full Professor;Professor", "bibtex": "@inproceedings{\nsun2023expnote,\ntitle={ExpNote: Black-box Large Language Models are better Task Solvers with Experience Notebook},\nauthor={Wangtao Sun and Xuanqing Yu and Shizhu He and Jun Zhao and Kang Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=1Xht3SKAoY}\n}", "github": "", "project": "", "reviewers": "ZAJN;RD22;CVaE;r6eq", "site": "https://openreview.net/forum?id=1Xht3SKAoY", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "5;4;3;4", "excitement": "2;4;3;3", "reproducibility": "3;4;5;4", "correctness": "2;3;2;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 2.5, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation", "aff_unique_url": "http://www.ia.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "1cKjvlvR7Z", "title": "Test-Time Self-Adaptive Small Language Models for Question Answering", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Recent instruction-finetuned large language models (LMs) have achieved notable performances in various tasks, such as question-answering (QA). However, despite their ability to memorize a vast amount of general knowledge across diverse tasks, they might be suboptimal on specific tasks due to their limited capacity to transfer and adapt knowledge to target tasks. Moreover, further finetuning LMs with labeled datasets is often infeasible due to their absence, but it is also questionable if we can transfer smaller LMs having limited knowledge only with unlabeled test data. In this work, we show and investigate the capabilities of smaller self-adaptive LMs, only with unlabeled test data. In particular, we first stochastically generate multiple answers, and then ensemble them while filtering out low-quality samples to mitigate noise from inaccurate labels. Our proposed self-adaption strategy demonstrates significant performance improvements on benchmark QA datasets with higher robustness across diverse prompts, enabling LMs to stay stable. Code is available at: https://github.com/starsuzi/T-SAS.", "keywords": "Language Models;Question Answering;Test-Time Adaption", "primary_area": "", "supplementary_material": "", "author": "Soyeong Jeong;Jinheon Baek;Sukmin Cho;Sung Ju Hwang;Jong C. Park", "authorids": "~Soyeong_Jeong1;~Jinheon_Baek1;~Sukmin_Cho1;~Sung_Ju_Hwang1;~Jong_C._Park2", "gender": "F;M;M;;M", "homepage": "https://starsuzi.github.io/;https://jinheonbaek.github.io;http://nlpcl.kaist.ac.kr/home/;;http://nlpcl.kaist.ac.kr/prof", "dblp": "164/0452;262/6003;316/9906;;73/5376", "google_scholar": "0wnquCEAAAAJ;U1FHaSUAAAAJ;https://scholar.google.co.kr/citations?user=YuV8kEoAAAAJ;;XP5heVgAAAAJ", "or_profile": "~Soyeong_Jeong1;~Jinheon_Baek1;~Sukmin_Cho1;~Sung_Ju_Hwang1;~Jong_C._Park2", "aff": "Korea Advanced Institute of Science & Technology;Microsoft Research;Korea Advanced Institute of Science & Technology;;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;microsoft.com;kaist.ac.kr;;kaist.ac.kr", "position": "PhD student;Intern;PhD student;;Full Professor", "bibtex": "@inproceedings{\njeong2023testtime,\ntitle={Test-Time Self-Adaptive Small Language Models for Question Answering},\nauthor={Soyeong Jeong and Jinheon Baek and Sukmin Cho and Sung Ju Hwang and Jong C. Park},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=1cKjvlvR7Z}\n}", "github": "", "project": "", "reviewers": "kW16;FMJN;ruV4", "site": "https://openreview.net/forum?id=1cKjvlvR7Z", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;5", "excitement": "3;3;3", "reproducibility": "2;3;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-9367-560X;;;0000-0002-8859-5111", "linkedin": "soyeong-jeong-900155141;jinheon-baek-8100a8144/;;;", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.kaist.ac.kr;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "KAIST;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "South Korea;United States" }, { "id": "1faXw8rfeq", "title": "Anaphor Assisted Document-Level Relation Extraction", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Document-level relation extraction (DocRE) involves identifying relations between entities distributed in multiple sentences within a document.\nExisting methods focus on building a heterogeneous document graph to model the internal structure of an entity and the external interaction between entities. \nHowever, there are two drawbacks in existing methods. On one hand, anaphor plays an important role in reasoning to identify relations between entities but is ignored by these methods. On the other hand, these methods achieve cross-sentence entity interactions implicitly by utilizing a document or sentences as intermediate nodes. Such an approach has difficulties in learning fine-grained interactions between entities across different sentences, resulting in sub-optimal performance. To address these issues, we propose an Anaphor-Assisted (AA) framework for DocRE tasks. Experimental results on the widely-used datasets demonstrate that our model achieves a new state-of-the-art performance.", "keywords": "relation extraction;document level;anaphor;graph", "primary_area": "", "supplementary_material": "", "author": "Chonggang Lu;Richong Zhang;Kai Sun;Jaein Kim;Cunwang Zhang;Yongyi Mao", "authorids": "~Chonggang_Lu1;~Richong_Zhang1;~Kai_Sun6;~Jaein_Kim1;~Cunwang_Zhang2;~Yongyi_Mao2", "gender": "M;M;M;F;M;M", "homepage": "https://github.com/BurgerBurgerBurger;http://act.buaa.edu.cn/zhangrc;;;https://PierreZhancw.github.io;http://www.eecs.uottawa.ca/~yymao", "dblp": ";61/1229;;;;86/2933", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=buUlnJUAAAAJ;t7GykZ4AAAAJ;;https://scholar.google.ca/citations?user=jM5l70wAAAAJ", "or_profile": "~Chonggang_Lu1;~Richong_Zhang1;~Kai_Sun6;~Jaein_Kim1;~Cunwang_Zhang2;~Yongyi_Mao1", "aff": "Beihang University;Beihang University;Beihang University;Beihang University;;University of Ottawa", "aff_domain": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;;eecs.uottawa.ca", "position": "MS student;Full Professor;PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nlu2023anaphor,\ntitle={Anaphor Assisted Document-Level Relation Extraction},\nauthor={Chonggang Lu and Richong Zhang and Kai Sun and Jaein Kim and Cunwang Zhang and Yongyi Mao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=1faXw8rfeq}\n}", "github": "", "project": "", "reviewers": "cmU3;VsBi;dWHw", "site": "https://openreview.net/forum?id=1faXw8rfeq", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;1;4", "excitement": "3;4;4", "reproducibility": "4;5;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-1207-0300;;;;0000-0001-5298-5778", "linkedin": ";;;;;", "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Beihang University;University of Ottawa", "aff_unique_dep": ";", "aff_unique_url": "http://www.buaa.edu.cn/;https://www.uottawa.ca", "aff_unique_abbr": "BUAA;U Ottawa", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;Canada" }, { "id": "1gUUznQgVC", "title": "SAC$^3$: Reliable Hallucination Detection in Black-Box Language Models via Semantic-aware Cross-check Consistency", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Hallucination detection is a critical step toward understanding the trustworthiness of modern language models (LMs). To achieve this goal, we re-examine existing detection approaches based on the self-consistency of LMs and uncover two types of hallucinations resulting from 1) question-level and 2) model-level, which cannot be effectively identified through self-consistency check alone. Building upon this discovery, we propose a novel sampling-based method, i.e., semantic-aware cross-check consistency (SAC$^3$) that expands on the principle of self-consistency checking. Our SAC$^3$ approach incorporates additional mechanisms to detect both question-level and model-level hallucinations by leveraging advances including semantically equivalent question perturbation and cross-model response consistency checking. Through extensive and systematic empirical analysis, we demonstrate that SAC$^3$ outperforms the state of the art in detecting both non-factual and factual statements across multiple question-answering and open-domain generation benchmarks.", "keywords": "Hallucination;semantic consistency;blackbox;large language models;confidence", "primary_area": "", "supplementary_material": "", "author": "Jiaxin Zhang;Zhuohang Li;Kamalika Das;Bradley A. Malin;Sricharan Kumar", "authorids": "~Jiaxin_Zhang2;~Zhuohang_Li1;~Kamalika_Das1;~Bradley_Malin1;~Sricharan_Kumar1", "gender": "M;M;;;M", "homepage": "https://jxzhangjhu.github.io/;https://zhuohang.li/;;;http://www-personal.umich.edu/~kksreddy/", "dblp": "32/7698-5.html;;;;26/8762.html", "google_scholar": "LiDm8jEAAAAJ;_FgPQ50AAAAJ;AF6kWHUAAAAJ;;KHtiAOMAAAAJ", "or_profile": "~Jiaxin_Zhang2;~Zhuohang_Li1;~Kamalika_Das1;~Bradley_Malin1;~Sricharan_Kumar1", "aff": "Intuit AI Research;Vanderbilt University;Intuit;;Intuit", "aff_domain": "intuit.com;vanderbilt.edu;intuit.com;;intuit.com", "position": "Researcher;PhD student;Researcher;;Researcher", "bibtex": "@inproceedings{\nzhang2023sac,\ntitle={{SAC}\\${\\textasciicircum}3\\$: Reliable Hallucination Detection in Black-Box Language Models via Semantic-aware Cross-check Consistency},\nauthor={Jiaxin Zhang and Zhuohang Li and Kamalika Das and Bradley A. Malin and Sricharan Kumar},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=1gUUznQgVC}\n}", "github": "", "project": "", "reviewers": "vZxH;eydx;G3kV", "site": "https://openreview.net/forum?id=1gUUznQgVC", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;5;4", "excitement": "4;3;3", "reproducibility": "4;5;4", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "jiaxin-zhang-1425289b/;;;;", "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Intuit;Vanderbilt University;Intuit Inc.", "aff_unique_dep": "Intuit AI Research;;", "aff_unique_url": "https://intuit.com/;https://www.vanderbilt.edu;https://www.intuit.com/", "aff_unique_abbr": "Intuit;Vanderbilt;Intuit", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "1iQMzgmKeD", "title": "Extrapolating Multilingual Understanding Models as Multilingual Generators", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Multilingual understanding models (or encoder-based), pre-trained via masked language modeling, have achieved promising results on many language understanding tasks (e.g., mBERT). However, these models are not capable of generating high-quality text compared with decoder-based causal language models. Can we transform a pre-trained language understanding model into an effective language generation model? We propose a Semantic-Guided Alignment-then-Denoising (SGA) approach to adapt a multilingual encoder to a multilingual generator with a small number of additional parameters. Experiments show that the proposed approach is an effective adaption method, outperforming widely-used initialization-based methods with gains of 9.4 BLEU on machine translation, 8.1 Rouge-L on question generation, and 5.5 METEOR on story generation on XLM-R$_{large}$. On the other hand, we observe that XLM-R is still inferior to mBART in supervised settings despite better results on zero-shot settings, indicating that more exploration is required to make understanding models strong generators. Our code is available at https://github.com/chengzhipanpan/XLMR4MT.", "keywords": "Multilingual Translation;Prompt Tuning;Non-autoregressive Generation", "primary_area": "", "supplementary_material": "", "author": "Bohong Wu;Fei Yuan;hai zhao;Lei Li;Jingjing Xu", "authorids": "~Bohong_Wu1;~Fei_Yuan2;~hai_zhao1;~Lei_Li11;~Jingjing_Xu1", "gender": ";;M;M;F", "homepage": ";;http://bcmi.sjtu.edu.cn/~zhaohai/;https://www.cs.cmu.edu/~leili;", "dblp": ";;25/1145-1.html;13/7007-5.html;25/624", "google_scholar": ";;https://scholar.google.com.tw/citations?user=4dU5KS0AAAAJ;BYXqAlwAAAAJ;", "or_profile": "~Bohong_Wu1;~Fei_Yuan2;~hai_zhao1;~Lei_Li11;~Jingjing_Xu1", "aff": ";;Shanghai Jiaotong University;Computer Science Department, UC Santa Barbara;", "aff_domain": ";;sjtu.edu.cn;cs.ucsb.edu;", "position": ";;Full Professor;Assistant Professor;", "bibtex": "@inproceedings{\nwu2023extrapolating,\ntitle={Extrapolating Multilingual Understanding Models as Multilingual Generators},\nauthor={Bohong Wu and Fei Yuan and hai zhao and Lei Li and Jingjing Xu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=1iQMzgmKeD}\n}", "github": "", "project": "", "reviewers": "Ne93;qScF;5rFu", "site": "https://openreview.net/forum?id=1iQMzgmKeD", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;4;4", "excitement": "4;4;2", "reproducibility": "3;3;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-3095-9776;", "linkedin": ";;;;", "aff_unique_index": "0;1", "aff_unique_norm": "Shanghai Jiao Tong University;University of California, Santa Barbara", "aff_unique_dep": ";Computer Science Department", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.ucsb.edu", "aff_unique_abbr": "SJTU;UCSB", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Barbara", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United States" }, { "id": "1kmIDTfQ4N", "title": "BERT Has More to Offer: BERT Layers Combination Yields Better Sentence Embeddings", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Obtaining sentence representations from BERT-based models as feature extractors is invaluable as it takes much less time to pre-compute a one-time representation of the data and then use it for the downstream tasks, rather than fine-tune the whole BERT. Most previous works acquire a sentence's representation by passing it to BERT and averaging its last layer. In this paper, we propose that the combination of certain layers of a BERT-based model rested on the data set and model \ncan achieve substantially better results.\nWe empirically show the effectiveness of our method for different BERT-based models on different tasks and data sets. Specifically, on seven standard semantic textual similarity data sets, we outperform the baseline BERT \nby improving the Spearman's correlation by up to 25.75\\% and on average 16.32\\% without any further training. We also achieved state-of-the-art results on eight transfer data sets by reducing the relative error by up to 37.41\\% and on average 17.92\\%.", "keywords": "Sentence Embedding;BERT;BERT-LC;Layers Combination", "primary_area": "", "supplementary_material": "", "author": "MohammadSaleh Hosseini;Munawara Saiyara Munia;Latifur Khan", "authorids": "~MohammadSaleh_Hosseini1;~Munawara_Saiyara_Munia1;~Latifur_Khan1", "gender": "M;F;M", "homepage": "https://personal.utdallas.edu/~sxh175430/;;https://www.utdallas.edu/~lkhan/", "dblp": ";272/9663.html;k/LatifurKhan", "google_scholar": ";s2XATxcAAAAJ;https://scholar.google.com.tw/citations?user=7nERaWEAAAAJ", "or_profile": "~MohammadSaleh_Hosseini1;~Munawara_Saiyara_Munia1;~Latifur_Khan1", "aff": "University of Texas at Dallas;University of Texas at Dallas ;University of Texas at Dallas", "aff_domain": "utdallas.edu;cs.utdallas.edu;utdallas.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nhosseini2023bert,\ntitle={{BERT} Has More to Offer: {BERT} Layers Combination Yields Better Sentence Embeddings},\nauthor={MohammadSaleh Hosseini and Munawara Saiyara Munia and Latifur Khan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=1kmIDTfQ4N}\n}", "github": "", "project": "", "reviewers": "ciSn;YvLi;mPVd", "site": "https://openreview.net/forum?id=1kmIDTfQ4N", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;3;3", "excitement": "3;2;4", "reproducibility": "3;4;3", "correctness": "3;2;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";munawara-saiyara-munia/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Texas at Dallas", "aff_unique_dep": "", "aff_unique_url": "https://www.utdallas.edu", "aff_unique_abbr": "UT Dallas", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Dallas", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "1mGD6ZLTwv", "title": "Assessing Privacy Risks in Language Models: A Case Study on Summarization Tasks", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models have revolutionized the field of NLP by achieving state-of-the-art performance on various tasks. However, there is a concern that these models may disclose information in the training data. In this study, we focus on the summarization task and investigate the membership inference (MI) attack: given a sample and black-box access to a model's API, it is possible to determine if the sample was part of the training data. We exploit text similarity and the model's resistance to document modifications as potential MI signals and evaluate their effectiveness on widely used datasets. Our results demonstrate that summarization models are at risk of exposing data membership, even in cases where the reference summary is not available. Furthermore, we discuss several safeguards for training summarization models to protect against MI attacks and discuss the inherent trade-off between privacy and utility.", "keywords": "Summarization;Membership Inference Attack;Privacy;Language Model", "primary_area": "", "supplementary_material": "", "author": "Ruixiang Tang;Gord Lueck;Rodolfo Quispe;Huseyin A Inan;Janardhan Kulkarni;Xia Hu", "authorids": "~Ruixiang_Tang1;~Gord_Lueck1;~Rodolfo_Quispe1;~Huseyin_A_Inan1;~Janardhan_Kulkarni2;~Xia_Hu4", "gender": "M;;M;M;;M", "homepage": "https://www.ruixiangtang.net/;;https://rquispec.github.io/;;;https://cs.rice.edu/~xh37/index.html", "dblp": "239/1928;;;54/1978;41/11141;256/9406.html", "google_scholar": "T575jsoAAAAJ;;-dX0cAIAAAAJ;_fxnybwAAAAJ;BGN4egcAAAAJ;https://scholar.google.com.tw/citations?user=pcCS60IAAAAJ", "or_profile": "~Ruixiang_Tang1;~Gord_Lueck1;~Rodolfo_Quispe1;~Janardhan_Kulkarni2;~Huseyin_Atahan_Inan1;~Xia_Hu2", "aff": "Rice University;Microsoft;Universidade Estadual de Campinas;Microsoft Research, Redmond;Microsoft;Rice University", "aff_domain": "rice.edu;microsoft.com;unicamp.br;microsoft.com;microsoft.com;rice.edu", "position": "PhD student;Principal Researcher;PhD student;Researcher;Researcher;Associate Professor", "bibtex": "@inproceedings{\ntang2023assessing,\ntitle={Assessing Privacy Risks in Language Models: A Case Study on Summarization Tasks},\nauthor={Ruixiang Tang and Gord Lueck and Rodolfo Quispe and Huseyin A Inan and Janardhan Kulkarni and Xia Hu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=1mGD6ZLTwv}\n}", "github": "", "project": "", "reviewers": "9rho;NwCg;Eh5X", "site": "https://openreview.net/forum?id=1mGD6ZLTwv", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "4;3;4", "reproducibility": "4;3;3", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-1661-3720;;;", "linkedin": "ruixiang-tang-91660717b/;gordl;rodolfoquispec/;;;", "aff_unique_index": "0;1;2;1;1;0", "aff_unique_norm": "Rice University;Microsoft;Universidade Estadual de Campinas", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "https://www.rice.edu;https://www.microsoft.com;https://www.unicamp.br", "aff_unique_abbr": "Rice;Microsoft;UNICAMP", "aff_campus_unique_index": "1", "aff_campus_unique": ";Redmond", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;Brazil" }, { "id": "1pxxAJwBXj", "title": "CorefPrompt: Prompt-based Event Coreference Resolution by Measuring Event Type and Argument Compatibilities", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Event coreference resolution (ECR) aims to group event mentions referring to the same real-world event into clusters. Most previous studies adopt the \"encoding first, then scoring\" framework, making the coreference judgment rely on event encoding. Furthermore, current methods struggle to leverage human-summarized ECR rules, e.g., coreferential events should have the same event type, to guide the model. To address these two issues, we propose a prompt-based approach, CorefPrompt, to transform ECR into a cloze-style MLM (masked language model) task. This allows for simultaneous event modeling and coreference discrimination within a single template, with a fully shared context. In addition, we introduce two auxiliary prompt tasks, event-type compatibility and argument compatibility, to explicitly demonstrate the reasoning process of ECR, which helps the model make final predictions. Experimental results show that our method CorefPrompt performs well in a state-of-the-art (SOTA) benchmark.", "keywords": "event coreference;event coreference resolution;prompt", "primary_area": "", "supplementary_material": "", "author": "Sheng Xu;PEIFENG LI;Qiaoming Zhu", "authorids": "~Sheng_Xu9;~PEIFENG_LI2;~Qiaoming_Zhu1", "gender": ";M;M", "homepage": "https://xiaosheng.blog/;http://web.suda.edu.cn/pfli/;https://scst.suda.edu.cn/0f/a2/c11250a528290/page.htm", "dblp": "10/1887-6.html;00/1996.html;28/1279", "google_scholar": "kEcZZPAAAAAJ;NY3GrVIAAAAJ;6BXGJK8AAAAJ", "or_profile": "~Sheng_Xu9;~PEIFENG_LI2;~Qiaoming_Zhu1", "aff": "Soochow University;Soochow University, China;Soochow University", "aff_domain": "suda.edu.cn;suda.edu.cn;suda.edu.cn", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nxu2023corefprompt,\ntitle={CorefPrompt: Prompt-based Event Coreference Resolution by Measuring Event Type and Argument Compatibilities},\nauthor={Sheng Xu and PEIFENG LI and Qiaoming Zhu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=1pxxAJwBXj}\n}", "github": "", "project": "", "reviewers": "TxVr;PPJ5;B9w6", "site": "https://openreview.net/forum?id=1pxxAJwBXj", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "2;4;4", "reproducibility": "2;4;4", "correctness": "2;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-4850-3128;0000-0002-2708-8976", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Soochow University", "aff_unique_dep": "", "aff_unique_url": "https://www.soochow.edu.cn", "aff_unique_abbr": "Soochow U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "1qJgZUAc8j", "title": "Exploring the Numerical Reasoning Capabilities of Language Models: A Comprehensive Analysis on Tabular Data", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Numerical data plays a crucial role in various real-world domains like finance, economics, and science. Thus, understanding and reasoning with numbers are essential in these fields. Recent benchmarks have assessed the numerical reasoning abilities of language models, revealing their limitations in limited and specific numerical aspects. In this paper, we propose a complete hierarchical taxonomy for numerical reasoning skills, encompassing over ten reasoning types across four levels: representation, number sense, manipulation, and complex reasoning. We conduct a comprehensive evaluation of state-of-the-art models on all reasoning types. To identify challenging reasoning types for different model types, we develop a diverse and extensive set of numerical probes and measure performance shifts. By employing a semi-automated approach, we focus on the tabular Natural Language Inference (TNLI) task as a case study. While no single model excels in all reasoning types, FlanT5 (few-/zero-shot) and GPT3.5 (few-shot) demonstrate strong overall numerical reasoning skills compared to other models in our probes.", "keywords": "numerical reasoning;probing language models;numeracy;tables;tabular data", "primary_area": "", "supplementary_material": "", "author": "Mubashara Akhtar;Abhilash Shankarampeta;Vivek Gupta;Arpit Patil;Oana Cocarascu;Elena Simperl", "authorids": "~Mubashara_Akhtar1;~Abhilash_Shankarampeta1;~Vivek_Gupta2;~Arpit_Patil1;~Oana_Cocarascu2;~Elena_Simperl1", "gender": "F;;M;M;;", "homepage": "https://www.mubasharaakhtar.com/;https://abhilashreddys.github.io/;https://vgupta123.github.io;https://arpit2607.github.io;;", "dblp": "324/3336;286/6689;71/5332-1;;185/7576;p/ElenaPaslaruBontasSimperl", "google_scholar": "x8K6TisAAAAJ;4qahycgAAAAJ;https://scholar.google.co.in/citations?user=Bs5H0S4AAAAJ;;https://scholar.google.co.uk/citations?hl=en;", "or_profile": "~Mubashara_Akhtar1;~Abhilash_Shankarampeta1;~Vivek_Gupta2;~Arpit_Patil1;~Oana_Cocarascu2;~Elena_Simperl1", "aff": "King's College London;Meesho;University of Utah, United States;University of Utah;King's College London;King's College London", "aff_domain": "kcl.ac.uk;meesho.com;cs.utah.edu;utah.edu;kcl.ac.uk;kcl.ac.uk", "position": "PhD student;Data Scientist;PhD student;MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nakhtar2023exploring,\ntitle={Exploring the Numerical Reasoning Capabilities of Language Models: A Comprehensive Analysis on Tabular Data},\nauthor={Mubashara Akhtar and Abhilash Shankarampeta and Vivek Gupta and Arpit Patil and Oana Cocarascu and Elena Simperl},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=1qJgZUAc8j}\n}", "github": "", "project": "", "reviewers": "CcwK;FvRi;pyHU;9VRp", "site": "https://openreview.net/forum?id=1qJgZUAc8j", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;4;3;4", "excitement": "2;4;3;3", "reproducibility": "3;4;2;4", "correctness": "3;4;3;4", "rating_avg": 3.0, "confidence_avg": 3.5, "excitement_avg": 3.0, "reproducibility_avg": 3.25, "correctness_avg": 3.5, "replies_avg": 13, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-6711-7789;;0000-0002-5127-2319;;", "linkedin": ";abhilashreddys/;keviv9/;arpit-patil/;;", "aff_unique_index": "0;1;2;2;0;0", "aff_unique_norm": "King's College London;Meesho;University of Utah", "aff_unique_dep": ";;", "aff_unique_url": "https://www.kcl.ac.uk;https://www.meesho.com;https://www.utah.edu", "aff_unique_abbr": "KCL;Meesho;Utah", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;2;0;0", "aff_country_unique": "United Kingdom;India;United States" }, { "id": "1tZxE1WPKz", "title": "Incorporating Object-Level Visual Context for Multimodal Fine-Grained Entity Typing", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Fine-grained entity typing (FGET) aims to assign appropriate fine-grained types to entity mentions within their context, which is an important foundational task in natural language processing. Previous approaches for FGET only utilized textual context information. However, in the form of short text, the contextual semantic information is often insufficient for FGET. In many real-world scenarios, text is often accompanied by images, and the visual context is valuable for FGET. To this end, we firstly propose a new task called multimodal fine-grained entity typing (MFGET). Then we construct a large-scale dataset for multimodal fine-grained entity typing called MFIGER based on FIGER. To fully leverage both textual and visual information, we propose a novel Multimodal Object-Level Visual Context Network (MOVCNet). MOVCNet can capture fine-grained semantic information by detecting objects in images, and effectively merge both textual and visual context. Experimental results demonstrate that our approach achieves superior classification performance compared to previous text-based approaches.", "keywords": "Fine-Grained Entity Typing;Multimodal Learning.", "primary_area": "", "supplementary_material": "", "author": "Ying Zhang;Wenbo Fan;Kehui Song;Yu Zhao;Xuhui Sui;Xiaojie Yuan", "authorids": "~Ying_Zhang7;~Wenbo_Fan1;~Kehui_Song1;~Yu_Zhao14;~Xuhui_Sui1;~Xiaojie_Yuan1", "gender": "F;M;F;F;;", "homepage": "https://dbis.nankai.edu.cn/2023/0322/c12139a506904/page.htm;https://github.com/Web-FAN;;https://scholar.google.com/citations?user=47fMA2QAAAAJ&hl=en;https://www.linkedin.com/in/%E6%97%AD%E8%BE%89-%E9%9A%8B-0305b334b/;https://dbis.nankai.edu.cn/2023/0322/c12139a506919/page.htm", "dblp": "13/6769-15;;197/1051.html;57/2056-43;321/6900.html;79/2280", "google_scholar": ";;;47fMA2QAAAAJ;;", "or_profile": "~Ying_Zhang7;~Wenbo_Fan1;~Kehui_Song1;~Yu_Zhao14;~Xuhui_Sui1;~Xiaojie_Yuan1", "aff": "Nankai University;Nankai University;Nankai University;Nankai University;Nankai University;Nankai University", "aff_domain": "nankai.edu.cn;nankai.edu.cn;nankai.edu.cn;nankai.edu.cn;nankai.edu.cn;nankai.edu.cn", "position": "Full Professor;MS student;Postdoc;MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nzhang2023incorporating,\ntitle={Incorporating Object-Level Visual Context for Multimodal Fine-Grained Entity Typing},\nauthor={Ying Zhang and Wenbo Fan and Kehui Song and Yu Zhao and Xuhui Sui and Xiaojie Yuan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=1tZxE1WPKz}\n}", "github": "", "project": "", "reviewers": "JYT1;mhGN;MZdU", "site": "https://openreview.net/forum?id=1tZxE1WPKz", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;3;3", "excitement": "2;4;3", "reproducibility": "2;4;4", "correctness": "2;5;3", "rating_avg": 2.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-4906-5828;;;0000-0002-0326-7152;0000-0001-5386-9912;0000-0002-5876-6856", "linkedin": ";;;;;", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Nankai University", "aff_unique_dep": "", "aff_unique_url": "http://www.nankai.edu.cn", "aff_unique_abbr": "NKU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "219K9bcUgC", "title": "Does Listener Gaze in Face-to-Face Interaction Follow the Entropy Rate Constancy Principle: An Empirical Study", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "It is generally assumed that language (written and spoken) follows the entropy rate constancy (ERC) principle, which states that the information density of a text is constant over time. Recently, this has also been found for nonverbal gestures used in monologue, but it is still unclear whether the ERC principle also applies to listeners' nonverbal signals. We focus on listeners' gaze behaviour extracted from video-recorded conversations and trained a transformer-based neural sequence model to process the gaze data of the dialogues and compute its information density. We also compute the information density of the corresponding speech using a pre-trained language model. Our results show (1) that listeners' gaze behaviour in dialogues roughly follows the ERC principle, as well as (2) a congruence between information density of speech and listeners' gaze behaviour.", "keywords": "entropy rate constancy principle;information density;dialogue;nonverbal behaviour;gaze", "primary_area": "", "supplementary_material": "", "author": "Yu Wang;Hendrik Buschmeier", "authorids": "~Yu_Wang65;~Hendrik_Buschmeier1", "gender": "M;", "homepage": "https://ekvv.uni-bielefeld.de/pers_publ/publ/PersonDetail.jsp?personId=315283146&lang=DE;", "dblp": ";", "google_scholar": "jooa3MMAAAAJ;", "or_profile": "~Yu_Wang65;~Hendrik_Buschmeier1", "aff": "KTH Royal Institute of Technology;", "aff_domain": "kth.se;", "position": "Researcher;", "bibtex": "@inproceedings{\nwang2023does,\ntitle={Does Listener Gaze in Face-to-Face Interaction Follow the Entropy Rate Constancy Principle: An Empirical Study},\nauthor={Yu Wang and Hendrik Buschmeier},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=219K9bcUgC}\n}", "github": "", "project": "", "reviewers": "V3pQ;Ck9q;H3tb", "site": "https://openreview.net/forum?id=219K9bcUgC", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;4;3", "excitement": "3;4;3", "reproducibility": "4;4;4", "correctness": "2;4;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0", "aff_unique_norm": "KTH Royal Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kth.se", "aff_unique_abbr": "KTH", "aff_country_unique_index": "0", "aff_country_unique": "Sweden" }, { "id": "266rF9DyWk", "title": "Automatic Transcription of Handwritten Old Occitan Language", "track": "main", "status": "Long Main", "tldr": "", "abstract": "While existing neural network-based approaches have shown promising results in Handwritten Text Recognition (HTR) for high-resource languages and standardized/machine-written text, their application to low-resource languages often presents challenges, resulting in reduced effectiveness. In this paper, we propose an innovative HTR approach that leverages the Transformer architecture for recognizing handwritten Old Occitan language. Given the limited availability of data, which comprises only word pairs of graphical variants and lemmas, we develop and rely on elaborate data augmentation techniques for both text and image data. Our model combines a custom-trained Swin image encoder with a BERT text decoder, which we pre-train using a large-scale augmented synthetic data set and fine-tune on the small human-labeled data set. Experimental results reveal that our approach surpasses the performance of current state-of-the-art models for Old Occitan HTR, including open-source Transformer-based models such as a fine-tuned TrOCR and commercial applications like Google Cloud Vision. To nurture further research and development, we make our models, data sets, and code publicly available.", "keywords": "handwritten text recognition;low-resource languages;transformer;computer vision;natural language processing", "primary_area": "", "supplementary_material": "", "author": "Esteban Garces Arias;Vallari Pai;Matthias Sch\u00f6ffel;Christian Heumann;Matthias A\u00dfenmacher", "authorids": "~Esteban_Garces_Arias1;~Vallari_Pai1;~Matthias_Sch\u00f6ffel1;~Christian_Heumann1;~Matthias_A\u00dfenmacher1", "gender": "M;F;M;M;M", "homepage": "https://www.misoda.statistik.uni-muenchen.de/personen/mitarbeiter/garcesarias/index.html;;https://www.romanistik.uni-muenchen.de/personen/wiss_ma/schoeffel/index.html;https://www.misoda.statistik.uni-muenchen.de/personen/professoren/heumann/index.html;https://www.slds.stat.uni-muenchen.de/people/assenmacher/", "dblp": "352/2933;;355/0484.html;10/8427;256/0948", "google_scholar": "https://scholar.google.com/citations?hl=de;;;https://scholar.google.de/citations?user=H6LdyzoAAAAJ;qmQ-l84AAAAJ", "or_profile": "~Esteban_Garces_Arias1;~Vallari_Pai1;~Matthias_Sch\u00f6ffel1;~Christian_Heumann1;~Matthias_A\u00dfenmacher1", "aff": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen", "aff_domain": "lmu.de;lmu.de;lmu.de;lmu.de;lmu.de", "position": "PhD student;MS student;PhD student;Associate Professor;Postdoc", "bibtex": "@inproceedings{\narias2023automatic,\ntitle={Automatic Transcription of Handwritten Old Occitan Language},\nauthor={Esteban Garces Arias and Vallari Pai and Matthias Sch{\\\"o}ffel and Christian Heumann and Matthias A{\\ss}enmacher},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=266rF9DyWk}\n}", "github": "", "project": "", "reviewers": "6eLD;udXX;GHMV", "site": "https://openreview.net/forum?id=266rF9DyWk", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;5;1", "excitement": "4;4;3", "reproducibility": "5;3;3", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0003-2154-5774", "linkedin": ";vallari-pai/;;;m-assenmacher/", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen", "aff_unique_dep": "", "aff_unique_url": "https://www.lmu.de", "aff_unique_abbr": "LMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Germany" }, { "id": "27HNeESZQF", "title": "PromptARA: Improving Deep Representation in Hybrid Automatic Readability Assessment with Prompt and Orthogonal Projection", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Readability assessment aims to automatically classify texts based on readers' reading levels. The hybrid automatic readability assessment (ARA) models using both deep and linguistic features have attracted rising attention in recent years due to their impressive performance. However, deep features are not fully explored due to the scarcity of training data, and the fusion of deep and linguistic features is not very effective in existing hybrid ARA models. In this paper, we propose a novel hybrid ARA model called PromptARA through employing prompts to improve deep feature representations and an orthogonal projection layer to fuse both deep and linguistic features. A series of experiments are conducted over four English and two Chinese corpora to show the effectiveness of the proposed model. Experimental results demonstrate that the proposed model is superior to state-of-the-art models.", "keywords": "Readability assessment; Deep learning; Prompt learning; Orthogonal projection layer; Linguistic feature", "primary_area": "", "supplementary_material": "", "author": "Jinshan Zeng;Xianglong Yu;Xianchao Tong;Wenyan Xiao", "authorids": "~Jinshan_Zeng1;~Xianglong_Yu1;~Xianchao_Tong1;~Wenyan_Xiao2", "gender": "M;;F;M", "homepage": ";https://blog.csdn.net/tongxianchao?spm=1000.2115.3001.5343;;http://mail.jxnu.edu.cn/cgi-bin/frame_html?sid=Gw21PAIzCQCLqkou,2&sign_type=&r=fe2709411ce6548e03e00f7cffbf9b09", "dblp": "57/10341;;;", "google_scholar": "au5gb2EAAAAJ;;;", "or_profile": "~Jinshan_Zeng1;~Xianchao_Tong1;~Wenyan_Xiao2;~Xianglong_Yv1", "aff": "Jiangxi Normal University;Jiangxi Normal University;Jiangxi University of Science and Technology;Jiangxi Normal University", "aff_domain": "jxnu.edu.cn;jxnu.edu.cn;jxust.edu.cn;jxnu.edu.cn", "position": "Full Professor;MS student;Researcher;MS student", "bibtex": "@inproceedings{\nzeng2023promptara,\ntitle={Prompt{ARA}: Improving Deep Representation in Hybrid Automatic Readability Assessment with Prompt and Orthogonal Projection},\nauthor={Jinshan Zeng and Xianglong Yu and Xianchao Tong and Wenyan Xiao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=27HNeESZQF}\n}", "github": "", "project": "", "reviewers": "otsg;p25R;JPZk;b6jz", "site": "https://openreview.net/forum?id=27HNeESZQF", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;4;3;5", "excitement": "4;3;3;3", "reproducibility": "4;3;3;5", "correctness": "3;3;3;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.25, "reproducibility_avg": 3.75, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-1719-3358;;0000-0001-6253-2414;", "linkedin": ";;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Jiangxi Normal University;Jiangxi University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.jxnu.edu.cn;http://www.jxust.edu.cn", "aff_unique_abbr": "JXNU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "2AF1OrD7Y1", "title": "Rethinking Word-Level Auto-Completion in Computer-Aided Translation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Word-level auto-completion (WLAC) plays a crucial role in Computer-Assisted Translation. While previous studies have primarily focused on designing complex model architectures, this paper takes a different perspective by rethinking the fundamental question: what kind of words are good auto-completions? We introduce a measurable criterion to address this question and discover that existing WLAC models often fail to meet this criterion. Building upon this observation, we propose an effective approach to enhance WLAC performance by promoting adherence to the criterion. Notably, the proposed approach is general and can be applied to various encoder-based architectures. Through extensive experiments, we demonstrate that our approach outperforms the top-performing system submitted to the WLAC shared tasks in WMT2022, while utilizing significantly smaller model sizes.", "keywords": "machine translation;interactive machine translation;computer assisted translation;word level auto completion", "primary_area": "", "supplementary_material": "", "author": "Xingyu Chen;Lemao Liu;Guoping Huang;Zhirui Zhang;Mingming Yang;Shuming Shi;Rui Wang", "authorids": "~Xingyu_Chen5;~Lemao_Liu3;~Guoping_Huang2;~Zhirui_Zhang1;~Mingming_Yang1;~Shuming_Shi1;~Rui_Wang10", "gender": "M;M;M;M;M;M;M", "homepage": "https://speechlab.sjtu.edu.cn/members/xingyu-chen;;;;;https://lemaoliu.github.io/homepage/;https://wangruinlp.github.io/", "dblp": ";165/3047;202/1838;https://dblp.uni-trier.de/pid/29/3866;s/ShumingShi;41/10887.html;w/RuiWang15", "google_scholar": "https://scholar.google.com/citations?hl=en;xSkkA7UAAAAJ;C8Ylo7sAAAAJ;wh6aMMcAAAAJ;Lg31AKMAAAAJ;;oTU0v5IAAAAJ", "or_profile": "~Xingyu_Chen5;~Guoping_Huang2;~Zhirui_Zhang1;~Mingming_Yang1;~Shuming_Shi1;~lemao_liu1;~Rui_Wang7", "aff": "Shanghai Jiaotong University;Tencent AI Lab;Tencent AI Lab;Tencent AI Lab;Tencent AI Lab;Tencent;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;tencent.com;tencent.com;tencent.com;tencent.com;tencent.com;sjtu.edu.cn", "position": "PhD student;Researcher;Senior Researcher;Researcher;Principal Researcher;Researcher;Associate Professor", "bibtex": "@inproceedings{\nchen2023rethinking,\ntitle={Rethinking Word-Level Auto-Completion in Computer-Aided Translation},\nauthor={Xingyu Chen and Lemao Liu and Guoping Huang and Zhirui Zhang and Mingming Yang and Shuming Shi and Rui Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2AF1OrD7Y1}\n}", "github": "", "project": "", "reviewers": "Nd7e;4EMT;1XZn", "site": "https://openreview.net/forum?id=2AF1OrD7Y1", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;3", "excitement": "4;4;4", "reproducibility": "4;4;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-9896-3232;;;0000-0001-8007-2503", "linkedin": ";guoping-huang-473708b9/;;;;;", "aff_unique_index": "0;1;1;1;1;1;0", "aff_unique_norm": "Shanghai Jiao Tong University;Tencent", "aff_unique_dep": ";Tencent AI Lab", "aff_unique_url": "https://www.sjtu.edu.cn;https://ai.tencent.com", "aff_unique_abbr": "SJTU;Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "2FDty4mLqP", "title": "Open Information Extraction via Chunks", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Open Information Extraction (OIE) aims to extract relational tuples from open-domain sentences. Existing OIE systems split a sentence into tokens and recognize token spans as tuple relations and arguments. We instead propose Sentence as Chunk sequence (SaC) and recognize chunk spans as tuple relations and arguments. We argue that SaC has better properties for OIE than sentence as token sequence, and evaluate four choices of chunks (i.e., CoNLL chunks, OIA simple phrases, noun phrases, and spans from SpanOIE). Also, we propose a simple end-to-end BERT-based model, Chunk-OIE, for sentence chunking and tuple extraction on top of SaC. Chunk-OIE achieves state-of-the-art results on multiple OIE datasets, showing that SaC benefits the OIE task.", "keywords": "Information Extraction;sentence chunking", "primary_area": "", "supplementary_material": "", "author": "Kuicai Dong;Aixin Sun;Jung-jae Kim;Xiaoli Li", "authorids": "~Kuicai_Dong1;~Aixin_Sun1;~Jung-jae_Kim1;~Xiaoli_Li1", "gender": "M;M;;M", "homepage": ";https://personal.ntu.edu.sg/axsun/;;https://personal.ntu.edu.sg/xlli/", "dblp": "292/3629;78/5155;21/4791;l/XiaoliLi.html", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.sg/citations?user=wyKGVKUAAAAJ;iMKgkrQAAAAJ;E3yQKloAAAAJ", "or_profile": "~Kuicai_Dong1;~Aixin_Sun1;~Jung-jae_Kim1;~Xiaoli_Li1", "aff": "Nanyang Technological University;Nanyang Technological University;A*STAR;A*STAR", "aff_domain": "e.ntu.edu.sg;ntu.edu.sg;a-star.edu.sg;a-star.edu.sg", "position": "PhD student;Associate Professor;Researcher;Principal Researcher", "bibtex": "@inproceedings{\ndong2023open,\ntitle={Open Information Extraction via Chunks},\nauthor={Kuicai Dong and Aixin Sun and Jung-jae Kim and Xiaoli Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2FDty4mLqP}\n}", "github": "", "project": "", "reviewers": "Pzff;zi3N;s459", "site": "https://openreview.net/forum?id=2FDty4mLqP", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;3;3", "reproducibility": "3;3;2", "correctness": "3;2;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-0764-4258;;0000-0002-0762-6562", "linkedin": ";aixin-sun-%E5%AD%99%E7%88%B1%E6%AC%A3-43056622/;;li-xiaoli-41027ba/", "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Nanyang Technological University;Agency for Science, Technology and Research", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;https://www.a-star.edu.sg", "aff_unique_abbr": "NTU;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore" }, { "id": "2IfYI3dkX7", "title": "RexUIE: A Recursive Method with Explicit Schema Instructor for Universal Information Extraction", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Universal Information Extraction (UIE) is an area of interest due to the challenges posed by varying targets, heterogeneous structures, and demand-specific schemas. \nPrevious works have achieved success by unifying a few tasks, such as Named Entity Recognition (NER) and Relation Extraction (RE), while they fall short of being true UIE models particularly when extracting other general schemas such as quadruples and quintuples.\nAdditionally, these models used an implicit structural schema instructor, which could lead to incorrect links between types, hindering the model's generalization and performance in low-resource scenarios. \nIn this paper, we redefine the true UIE with a formal formulation that covers almost all extraction schemas. To the best of our knowledge, we are the first to introduce UIE for any kind of schemas. \nIn addition, we propose RexUIE, which is a Recursive Method with Explicit Schema Instructor for UIE. To avoid interference between different types, we reset the position ids and attention mask matrices. RexUIE shows strong performance under both full-shot and few-shot settings and achieves state-of-the-art results on the tasks of extracting complex schemas.", "keywords": "Universal Information Extraction;Few-Shot Learning", "primary_area": "", "supplementary_material": "", "author": "Chengyuan Liu;Fubang Zhao;Yangyang Kang;Jingyuan Zhang;Xiang Zhou;Changlong Sun;Kun Kuang;Fei Wu", "authorids": "~Chengyuan_Liu1;~Fubang_Zhao3;~Yangyang_Kang1;~Jingyuan_Zhang4;~Xiang_Zhou11;~Changlong_Sun2;~Kun_Kuang1;~Fei_Wu1", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://github.com/liuchengyuan123;;https://github.com/zjy-ucas;https://person.zju.edu.cn/0020355;;http://kunkuang.github.io;https://person.zju.edu.cn/wufei;", "dblp": "175/9334;162/0109;;;https://dblp.uni-trier.de/pers/hd/s/Sun:Changlong;194/4245;84/3254-1;https://dblp.uni-trier.de/pid/249/5765.html", "google_scholar": "https://scholar.google.hk/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;pdj-Em0AAAAJ;;https://scholar.google.com/citations?;https://scholar.google.com.hk/citations?user=FOsNiMQAAAAJ;XJLn4MYAAAAJ;", "or_profile": "~Chengyuan_Liu1;~Yangyang_Kang1;~Jingyuan_Zhang4;~Xiang_Zhou11;~Changlong_Sun2;~Kun_Kuang1;~Fei_Wu1;~FUBANG_ZHAO2", "aff": "Zhejiang University;Alibaba Group;Alibaba Group;Zhejiang University;Alibaba Group;Zhejiang University;Zhejiang University;Alibaba Group", "aff_domain": "zju.edu.cn;alibaba.com;alibaba-inc.com;zju.edu.cn;alibaba-inc.com;zju.edu.cn;zju.edu.cn;alibaba-inc.com", "position": "PhD student;Staff Algorithm Engineer;Researcher;Associate Professor;Researcher;Associate Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nliu2023rexuie,\ntitle={Rex{UIE}: A Recursive Method with Explicit Schema Instructor for Universal Information Extraction},\nauthor={Chengyuan Liu and Fubang Zhao and Yangyang Kang and Jingyuan Zhang and Xiang Zhou and Changlong Sun and Kun Kuang and Fei Wu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2IfYI3dkX7}\n}", "github": "", "project": "", "reviewers": "SsUQ;zhWZ;zUQ9", "site": "https://openreview.net/forum?id=2IfYI3dkX7", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;3;4", "excitement": "3;4;3", "reproducibility": "5;5;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.666666666666667, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0009-0000-7528-8131;;", "linkedin": ";;;;;;;", "aff_unique_index": "0;1;1;0;1;0;0;1", "aff_unique_norm": "Zhejiang University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "ZJU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "2KTvN4Edvl", "title": "Guideline Learning for In-Context Information Extraction", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) can perform a new task by merely conditioning on task instructions and a few input-output examples, without optimizing any parameters. This is called In-Context Learning (ICL). In-context Information Extraction (IE) has recently garnered attention in the research community. However, the performance of In-context IE generally lags behind the state-of-the-art supervised expert models. We highlight a key reason for this shortfall: underspecified task description. The limited-length context struggles to thoroughly express the intricate IE task instructions and various edge cases, leading to misalignment in task comprehension with humans. In this paper, we propose a Guideline Learning (GL) framework for In-context IE which reflectively learns and follows guidelines. During the learning phrase, GL automatically synthesizes a set of guidelines based on a few error cases, and during inference, GL retrieves helpful guidelines for better ICL. Moreover, we propose a self-consistency-based active learning method to enhance the efficiency of GL. Experiments on event extraction and relation extraction show that GL can significantly improve the performance of in-context IE.", "keywords": "information extraction;in-context learning;large language models", "primary_area": "", "supplementary_material": "", "author": "Chaoxu Pang;Yixuan Cao;Qiang Ding;Ping Luo", "authorids": "~Chaoxu_Pang1;~Yixuan_Cao1;~Qiang_Ding1;~Ping_Luo1", "gender": "M;M;M;M", "homepage": "https://shine-starburst-4ef.notion.site/Chaoxu-Pang-s-Homepage-8b5ebcb5639140099530b979a9116f70;https://yixuancao.github.io/;https://github.com/DingQiang2018;https://ping-luo.github.io/", "dblp": ";217/4359;;54/4989-1.html", "google_scholar": ";Q5XWFacAAAAJ;;", "or_profile": "~Chaoxu_Pang1;~Yixuan_Cao1;~Qiang_Ding1;~Ping_Luo1", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn", "position": "PhD student;Associate Professor;PhD student;Associate Professor", "bibtex": "@inproceedings{\npang2023guideline,\ntitle={Guideline Learning for In-Context Information Extraction},\nauthor={Chaoxu Pang and Yixuan Cao and Qiang Ding and Ping Luo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2KTvN4Edvl}\n}", "github": "", "project": "", "reviewers": "NFNq;B35m;6ZCo", "site": "https://openreview.net/forum?id=2KTvN4Edvl", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;4;4", "reproducibility": "3;4;3", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-1721-5927;;", "linkedin": ";;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Computing Technology", "aff_unique_url": "http://www.ict.ac.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "2MDPYm3FPl", "title": "Hallucination Detection for Generative Large Language Models by Bayesian Sequential Estimation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large Language Models (LLMs) have made remarkable advancements in the field of natural language generation. However, the propensity of LLMs to generate inaccurate or non-factual content, termed \"hallucinations\", remains a significant challenge. Current hallucination detection methods often necessitate the retrieval of great numbers of relevant evidence, thereby increasing response times. We introduce a unique framework that leverages statistical decision theory and Bayesian sequential analysis to optimize the trade-off between costs and benefits during the hallucination detection process. This approach does not require a predetermined number of observations. Instead, the analysis proceeds in a sequential manner, enabling an expeditious decision towards \"belief\" or \"disbelief\" through a stop-or-continue strategy. Extensive experiments reveal that this novel framework surpasses existing methods in both efficiency and precision of hallucination detection. Furthermore, it requires fewer retrieval steps on average, thus decreasing response times.", "keywords": "Hallucination dectection;fact checking;Bayesian sequential estimation;generative large language models", "primary_area": "", "supplementary_material": "", "author": "Xiaohua Wang;Yuliang Yan;Longtao Huang;Xiaoqing Zheng;Xuanjing Huang", "authorids": "~Xiaohua_Wang2;~Yuliang_Yan2;~Longtao_Huang2;~Xiaoqing_Zheng2;~Xuanjing_Huang1", "gender": ";M;M;;F", "homepage": ";https://yuliangyan0807.github.io/;http://people.ucas.edu.cn/~huanglongtao?language=en;;https://xuanjing-huang.github.io/", "dblp": ";;76/10119;;05/6735-1", "google_scholar": ";ZukVBVUAAAAJ;EQDfV9cAAAAJ;;RGsMgZA4H78C", "or_profile": "~Xiaohua_Wang2;~Yuliang_Yan2;~Longtao_Huang2;~Xiaoqing_Zheng2;~Xuanjing_Huang1", "aff": "Fudan University;Fudan University;Alibaba Group;;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;alibaba-inc.com;;fudan.edu.cn", "position": "PhD student;MS student;Researcher;;Full Professor", "bibtex": "@inproceedings{\nwang2023hallucination,\ntitle={Hallucination Detection for Generative Large Language Models by Bayesian Sequential Estimation},\nauthor={Xiaohua Wang and Yuliang Yan and Longtao Huang and Xiaoqing Zheng and Xuanjing Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2MDPYm3FPl}\n}", "github": "", "project": "", "reviewers": "o6Kk;rSTF;6dtQ", "site": "https://openreview.net/forum?id=2MDPYm3FPl", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;3", "excitement": "3;4;4", "reproducibility": "3;3;4", "correctness": "3;3;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-1124-855X;;;;0000-0001-9197-9426", "linkedin": ";;;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Fudan University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.fudan.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "Fudan;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "2MXXycs2T6", "title": "QADYNAMICS: Training Dynamics-Driven Synthetic QA Diagnostic for Zero-Shot Commonsense Question Answering", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Zero-shot commonsense Question-Answering (QA) requires models to reason about general situations beyond specific benchmarks. State-of-the-art approaches fine-tune language models on QA pairs constructed from CommonSense Knowledge Bases (CSKBs) to equip the models with more commonsense knowledge in a QA context. However, current QA synthesis protocols may introduce noise from the CSKBs and generate ungrammatical questions and false negative options, which impede the model\u2019s ability to generalize. To address these issues, we propose QADYNAMICS, a training dynamics-driven framework for QA diagnostics and refinement. Our approach analyzes the training dynamics of each QA pair at both the question level and option level, discarding machine-detectable artifacts by removing uninformative QA pairs and mislabeled or false-negative options. Extensive experiments demonstrate the effectiveness of our approach, which outperforms all baselines while using only 33% of the synthetic data, even including LLMs such as ChatGPT. Moreover, expert evaluations confirm that our framework significantly improves the quality of QA synthesis. Our code and model checkpoints are available at https://github.com/HKUST-KnowComp/QaDynamics.", "keywords": "commonsense reasoning;question-answering;training dynamics;zero shot", "primary_area": "", "supplementary_material": "", "author": "Haochen Shi;Weiqi Wang;Tianqing Fang;Baixuan Xu;Wenxuan Ding;Xin Liu;Yangqiu Song", "authorids": "~Haochen_Shi4;~Weiqi_Wang1;~Tianqing_Fang1;~Baixuan_Xu1;~Wenxuan_Ding1;~Xin_Liu9;~Yangqiu_Song1", "gender": "M;M;M;F;M;M;M", "homepage": "https://mighty-weaver.github.io/;http://fangtq.com/;https://tonyxu12138.github.io/;https://wenwen-d.github.io/;https://www.cse.ust.hk/~xliucr/;https://www.cse.ust.hk/~yqsong/;", "dblp": "51/5775-1;283/4921;187/0059.html;36/1339-1;76/1820-39.html;86/2159;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=Tb3rc34AAAAJ;YhWGUKUAAAAJ;GyHBjwQAAAAJ;https://scholar.google.com.hk/citations?user=WvC4upQAAAAJ;MdQZ-q8AAAAJ;1dteS3wAAAAJ", "or_profile": "~Weiqi_Wang1;~Tianqing_Fang1;~Baixuan_Xu1;~Wenxuan_Ding1;~Xin_Liu9;~Yangqiu_Song1;~Haochen_SHI3", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;ust.hk;ust.hk;ust.hk;ust.hk;ust.hk;ust.hk", "position": "PhD student;PhD student;Undergrad student;Undergrad student;PhD student;Associate Professor;Undergrad student", "bibtex": "@inproceedings{\nshi2023qadynamics,\ntitle={{QADYNAMICS}: Training Dynamics-Driven Synthetic {QA} Diagnostic for Zero-Shot Commonsense Question Answering},\nauthor={Haochen Shi and Weiqi Wang and Tianqing Fang and Baixuan Xu and Wenxuan Ding and Xin Liu and Yangqiu Song},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2MXXycs2T6}\n}", "github": "", "project": "", "reviewers": "k4z2;ukoU;3DE3", "site": "https://openreview.net/forum?id=2MXXycs2T6", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;5;4", "excitement": "3;3;3", "reproducibility": "3;4;3", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-1617-9805;;0000-0001-8175-7598;;0000-0001-9610-9526;0000-0002-7818-6090;", "linkedin": "weiqi-wang-a49b5019a/;;;wenxuan-ding-0b299923b/;xin-liu-179830143;yqsong/;shi-haochen-8b02901bb/", "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "2MiTZxLFA9", "title": "GRACE: Discriminator-Guided Chain-of-Thought Reasoning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In the context of multi-step reasoning, e.g., with chain-of-thought, language models (LMs) can easily assign a high likelihood to incorrect steps. As a result, decoding strategies that optimize for solution likelihood often yield incorrect solutions.\nTo address this issue, we propose Guiding chain-of-thought ReAsoning with a CorrectnEss Discriminator (GRACE), a stepwise decoding approach that steers the decoding process towards producing correct reasoning steps. \nGRACE employs a discriminator trained with a contrastive loss over correct and incorrect steps, which is used during decoding to score next-step candidates based on their correctness. \nImportantly, GRACE only requires sampling from the LM, without the need for LM training or fine-tuning.\nUsing models from FLAN-T5 and LLaMA families, we evaluate GRACE over four math and two symbolic reasoning tasks, where it exhibits substantial performance gains compared to greedy decoding, verifiers, and self-consistency in most settings. When further combined with self-consistency, GRACE outperforms all the baselines by sizeable margins. Human and LLM evaluations over GSM8K show that GRACE not only improves the final answer accuracy but also the correctness of the intermediate reasoning.", "keywords": "chain-of-thought;guided decoding;math reasoning;symbolic reasoning;multi-step reasoning;large language models", "primary_area": "", "supplementary_material": "", "author": "Muhammad Khalifa;Lajanugen Logeswaran;Moontae Lee;Honglak Lee;Lu Wang", "authorids": "~Muhammad_Khalifa2;~Lajanugen_Logeswaran1;~Moontae_Lee1;~Honglak_Lee2;~Lu_Wang9", "gender": "M;M;;F;M", "homepage": "https://mukhal.github.io;https://sites.google.com/umich.edu/llajan/;https://moontae.people.uic.edu;https://web.eecs.umich.edu/~wangluxy/;http://web.eecs.umich.edu/~honglak", "dblp": "246/4401;157/3603;132/1761;49/3800-8;58/2562", "google_scholar": "tnmUr30AAAAJ;dcv4kpIAAAAJ;BMvYy9cAAAAJ;uczqEdUAAAAJ;fmSHtE8AAAAJ", "or_profile": "~Muhammad_Khalifa2;~Lajanugen_Logeswaran1;~Moontae_Lee1;~Lu_Wang9;~Honglak_Lee1", "aff": "University of Michigan - Ann Arbor;LG AI Research;University of Illinois, Chicago;University of Michigan;University of Michigan", "aff_domain": "umich.edu;lgresearch.ai;uic.edu;umich.edu;umich.edu", "position": "PhD student;Researcher;Assistant Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nkhalifa2023grace,\ntitle={{GRACE}: Discriminator-Guided Chain-of-Thought Reasoning},\nauthor={Muhammad Khalifa and Lajanugen Logeswaran and Moontae Lee and Honglak Lee and Lu Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2MiTZxLFA9}\n}", "github": "", "project": "", "reviewers": "GMbU;2yfe;BkE7", "site": "https://openreview.net/forum?id=2MiTZxLFA9", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;3;4", "excitement": "3;4;3", "reproducibility": "5;4;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-5542-3463;;", "linkedin": "muhammaad-khalifa-9a467b100/;;moontae-lee-975248123/;;", "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "University of Michigan;LG;University of Illinois at Chicago", "aff_unique_dep": ";LG AI Research;", "aff_unique_url": "https://www.umich.edu;https://www.lgaires.com;https://www.uic.edu", "aff_unique_abbr": "UM;LG AI;UIC", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Ann Arbor;;Chicago", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;South Korea" }, { "id": "2O39az85g6", "title": "Exploring Context-Aware Evaluation Metrics for Machine Translation", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Previous studies on machine translation evaluation mostly focused on the quality of individual sentences, while overlooking the important role of contextual information. Although WMT Metrics Shared Tasks have introduced context content into the human annotations of translation evaluation since 2019, the relevant metrics and methods still did not take advantage of the corresponding context. In this paper, we propose a context-aware machine translation evaluation metric called Cont-COMET, built upon the effective COMET framework. Our approach simultaneously considers the preceding and subsequent contexts of the sentence to be evaluated and trains our metric to be aligned with the setting during human annotation. We also introduce a content selection method to extract and utilize the most relevant information. The experiments and evaluation of Cont-COMET on the official test framework from WMT show improvements in both system-level and segment-level assessments.", "keywords": "Machine Translation;Automatic Evaluation Metric;Context Awareness", "primary_area": "", "supplementary_material": "", "author": "Xinyu Hu;Xunjian Yin;Xiaojun Wan", "authorids": "~Xinyu_Hu1;~Xunjian_Yin1;~Xiaojun_Wan1", "gender": ";;M", "homepage": ";https://xunjianyin.github.io/;https://wanxiaojun.github.io", "dblp": ";320/5519;07/1521", "google_scholar": ";PociQ5EAAAAJ;lTTeBdkAAAAJ", "or_profile": "~Xinyu_Hu1;~Xunjian_Yin1;~Xiaojun_Wan1", "aff": ";Peking University;Peking University", "aff_domain": ";pku.edu.cn;pku.edu.cn", "position": ";MS student;Full Professor", "bibtex": "@inproceedings{\nhu2023exploring,\ntitle={Exploring Context-Aware Evaluation Metrics for Machine Translation},\nauthor={Xinyu Hu and Xunjian Yin and Xiaojun Wan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2O39az85g6}\n}", "github": "", "project": "", "reviewers": "7yHH;KJ38;844M;C6zw", "site": "https://openreview.net/forum?id=2O39az85g6", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;3;4", "excitement": "2;3;4;3", "reproducibility": "3;3;4;4", "correctness": "3;4;3;3", "rating_avg": 3.0, "confidence_avg": 3.75, "excitement_avg": 3.0, "reproducibility_avg": 3.5, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "2Rdfdri2oT", "title": "Making Large Language Models Better Data Creators", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Although large language models (LLMs) have advanced the state-of-the-art in NLP significantly, deploying them for downstream applications is still challenging due to cost, responsiveness, control, or concerns around privacy and security.\nAs such, trainable models are still the preferred option in some cases.\nHowever, these models still require human-labeled data for optimal performance, which is expensive and time-consuming to obtain.\nIn order to address this issue, several techniques to reduce human effort involve labeling or generating data using LLMs.\nAlthough these methods are effective for certain applications, in practice they encounter difficulties in real-world scenarios.\nLabeling data requires careful data selection, while generating data necessitates task-specific prompt engineering.\nIn this paper, we propose a unified data creation pipeline that requires only a single formatting example, and which is applicable to a broad range of tasks, including traditionally problematic ones with semantically devoid label spaces.\nIn our experiments we demonstrate that instruction-following LLMs are highly cost-effective data creators, and that models trained with these data exhibit performance better than those trained with human-labeled data (by up to 17.5\\%) on out-of-distribution evaluation, while maintaining comparable performance on in-distribution tasks. These results have important implications for the robustness of NLP systems deployed in the real-world.", "keywords": "Large Language Model;Data Creation", "primary_area": "", "supplementary_material": "", "author": "Dong-Ho Lee;Jay Pujara;Mohit Sewak;Ryen W White;Sujay Kumar Jauhar", "authorids": "~Dong-Ho_Lee1;~Jay_Pujara1;~Mohit_Sewak1;~Ryen_W_White1;~Sujay_Kumar_Jauhar2", "gender": "M;;;;M", "homepage": "https://danny-lee.info;https://www.jaypujara.org;https://scholar.google.co.in/citations?user=9kreV1oAAAAJ&hl=en;;https://www.microsoft.com/en-us/research/people/sjauhar/", "dblp": ";65/10103;224/4600.html;w/RyenWWhite;136/8739.html", "google_scholar": "oei2TXwAAAAJ;yvdSr4AAAAAJ;https://scholar.google.co.in/citations?user=9kreV1oAAAAJ;;V93soS4AAAAJ", "or_profile": "~Dong-Ho_Lee1;~Jay_Pujara1;~Mohit_Sewak1;~Ryen_W_White1;~Sujay_Kumar_Jauhar2", "aff": "Snap Inc.;University of Southern California;Microsoft;Microsoft;Microsoft Research", "aff_domain": "snapchat.com;usc.edu;microsoft.com;microsoft.com;microsoft.com", "position": "Intern;Assistant Professor;Principal Researcher;Research Manager;Researcher", "bibtex": "@inproceedings{\nlee2023making,\ntitle={Making Large Language Models Better Data Creators},\nauthor={Dong-Ho Lee and Jay Pujara and Mohit Sewak and Ryen W White and Sujay Kumar Jauhar},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2Rdfdri2oT}\n}", "github": "", "project": "", "reviewers": "zQ9a;6qx1;47zu", "site": "https://openreview.net/forum?id=2Rdfdri2oT", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;4", "excitement": "4;1;4", "reproducibility": "4;3;3", "correctness": "4;2;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-6921-1744;0000-0001-8375-5713;;", "linkedin": ";pujara;mohitsewak/;;sujay-kumar-jauhar-99579131", "aff_unique_index": "0;1;2;2;2", "aff_unique_norm": "Snap Inc.;University of Southern California;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://www.snapinc.com;https://www.usc.edu;https://www.microsoft.com", "aff_unique_abbr": "Snap;USC;Microsoft", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "2TtN6DqjWa", "title": "Learning Interpretable Style Embeddings via Prompting LLMs", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Style representation learning builds content-independent representations of author style in text. To date, no large dataset of texts with stylometric annotations on a wide range of style dimensions has been compiled, perhaps because the linguistic expertise to perform such annotation would be prohibitively expensive. Therefore, current style representation approaches make use of unsupervised neural methods to disentangle style from content to create style vectors. These approaches, however, result in uninterpretable representations, complicating their usage in downstream applications like authorship attribution where auditing and explainability is critical. In this work, we use prompting to perform stylometry on a large number of texts to generate a synthetic stylometry dataset. We use this synthetic data to then train human-interpretable style representations we call LISA embeddings. We release our synthetic dataset (StyleGenome) and our interpretable style embedding model (LISA) as resources.", "keywords": "style;stylometry;representation learning;embeddings;vectors;interpretability;prompting;llm", "primary_area": "", "supplementary_material": "", "author": "Ajay Patel;Delip Rao;Ansh Kothary;Kathleen McKeown;Chris Callison-Burch", "authorids": "~Ajay_Patel2;~Delip_Rao1;~Ansh_Kothary1;~Kathleen_McKeown1;~Chris_Callison-Burch1", "gender": "M;M;;F;M", "homepage": "https://ajayp.app;https://deliprao.com;;http://www.cs.columbia.edu/~kathy/;https://www.cis.upenn.edu/~ccb/", "dblp": "96/5051;;;m/KathleenMcKeown;", "google_scholar": "mkeU33IAAAAJ;u-T21zUAAAAJ;;https://scholar.google.com.tw/citations?user=ujDhg2sAAAAJ;nv-MV58AAAAJ", "or_profile": "~Ajay_Patel2;~Delip_Rao1;~Ansh_Kothary1;~Kathleen_McKeown1;~Chris_Callison-Burch1", "aff": "University of Pennsylvania;;Columbia University;Columbia University;Allen Institute for Artificial Intelligence", "aff_domain": "cis.upenn.edu;;columbia.edu;columbia.edu;allenai.org", "position": "PhD student;;MS student;Full Professor;Researcher", "bibtex": "@inproceedings{\npatel2023learning,\ntitle={Learning Interpretable Style Embeddings via Prompting {LLM}s},\nauthor={Ajay Patel and Delip Rao and Ansh Kothary and Kathleen McKeown and Chris Callison-Burch},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2TtN6DqjWa}\n}", "github": "", "project": "", "reviewers": "cLTM;UhN1;9v8q", "site": "https://openreview.net/forum?id=2TtN6DqjWa", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;3;4", "excitement": "3;3;3", "reproducibility": "4;3;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;ansh-kothary;;chris-callison-burch-40bb87b7/", "aff_unique_index": "0;1;1;2", "aff_unique_norm": "University of Pennsylvania;Columbia University;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.upenn.edu;https://www.columbia.edu;https://allenai.org", "aff_unique_abbr": "UPenn;Columbia;AI2", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "2U9hDBaOCn", "title": "Specialist or Generalist? Instruction Tuning for Specific NLP Tasks", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The potential of large language models (LLMs) to simultaneously perform a wide range of natural language processing (NLP) tasks has been the subject of extensive research. Although instruction tuning has proven to be a data-efficient method for transforming LLMs into such generalist models, their performance still lags behind specialist models trained exclusively for specific tasks. In this paper, we investigate whether incorporating broadcoverage generalist instruction tuning can contribute to building a specialist model. We hypothesize that its efficacy depends on task specificity and skill requirements. Our experiments assess four target tasks with distinct coverage levels, revealing that integrating generalist instruction tuning consistently enhances model performance when the task coverage is broad. The effect is particularly pronounced when the amount of task-specific training data is limited. Further investigation into three target tasks focusing on different capabilities demonstrates that generalist instruction tuning improves understanding and reasoning abilities. However, for tasks requiring factual knowledge, generalist data containing hallucinatory information may negatively affect the model\u2019s performance. Overall, our work provides a systematic guide for developing specialist models with general instruction tuning.", "keywords": "LLMs;Instruction Tuning;Data Efficiency", "primary_area": "", "supplementary_material": "", "author": "Chufan Shi;Yixuan Su;Cheng Yang;Yujiu Yang;Deng Cai", "authorids": "~Chufan_Shi1;~Yixuan_Su1;~Cheng_Yang7;~Yujiu_Yang2;~Deng_Cai1", "gender": "M;M;;M;M", "homepage": ";https://yxuansu.github.io/;;https://sites.google.com/view/iigroup-thu;https://jcyk.github.io/", "dblp": "342/5731;262/3282.html;;30/3847;c/DCai-2", "google_scholar": "BYWnPHYAAAAJ;VuVuWEoAAAAJ;;4gH3sxsAAAAJ;KpbRLYcAAAAJ", "or_profile": "~Chufan_Shi1;~Yixuan_Su1;~Cheng_Yang7;~Yujiu_Yang2;~Deng_Cai1", "aff": "Tsinghua University;University of Cambridge;;Tsinghua University;Tencent AI Lab", "aff_domain": "tsinghua.edu.cn;cam.ac.uk;;tsinghua.edu.cn;tencent.com", "position": "MS student;PhD student;;Associate Professor;Research Scientist", "bibtex": "@inproceedings{\nshi2023specialist,\ntitle={Specialist or Generalist? Instruction Tuning for Specific {NLP} Tasks},\nauthor={Chufan Shi and Yixuan Su and Cheng Yang and Yujiu Yang and Deng Cai},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2U9hDBaOCn}\n}", "github": "", "project": "", "reviewers": "JSaf;b21v;ReUf", "site": "https://openreview.net/forum?id=2U9hDBaOCn", "pdf_size": 0, "rating": "5;5;5", "confidence": "2;4;4", "excitement": "4;4;4", "reproducibility": "3;3;4", "correctness": "3;3;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0005-7889-5187;0000-0002-1472-7791;;0000-0002-6427-1024;", "linkedin": ";;;;", "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Tsinghua University;University of Cambridge;Tencent", "aff_unique_dep": ";;Tencent AI Lab", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.cam.ac.uk;https://ai.tencent.com", "aff_unique_abbr": "THU;Cambridge;Tencent AI Lab", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;United Kingdom" }, { "id": "2UJvVc8gnP", "title": "Masked Path Modeling for Vision-and-Language Navigation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Vision-and-language navigation (VLN) agents are trained to navigate in real-world environments based on natural language instructions. A major challenge in VLN is the limited available training data, which hinders the models' ability to generalize effectively. Previous approaches have attempted to alleviate this issue by using external tools to generate pseudo-labeled data or integrating web-scaled image-text pairs during training. However, these methods often rely on automatically-generated or out-of-domain data, leading to challenges such as suboptimal data quality and domain mismatch. In this paper, we introduce a masked path modeling (MPM) objective. MPM pretrains an agent using self-collected data for subsequent navigation tasks, eliminating the need for external tools. Specifically, our method allows the agent to explore navigation environments and record the paths it traverses alongside the corresponding agent actions. Subsequently, we train the agent on this collected data to reconstruct the original action sequence when given a randomly masked subsequence of the original path. This approach enables the agent to accumulate a diverse and substantial dataset, facilitating the connection between visual observations of paths and the agent's actions, which is the foundation of the VLN task. Importantly, the collected data are in-domain, and the training process avoids synthetic data with uncertain quality, addressing previous issues. We conduct experiments on various VLN datasets and demonstrate the applications of MPM across different levels of instruction complexity. Our results exhibit significant improvements in success rates, with enhancements of 1.3\\%, 1.1\\%, and 1.2\\% on the val-unseen split of the Room-to-Room, Room-for-Room, and Room-across-Room datasets, respectively. Additionally, we underscore the adaptability of MPM as well as the potential for additional improvements when the agent is allowed to explore unseen environments prior to testing.", "keywords": "Vision-and-Language Navigation;Masked Data Modeling", "primary_area": "", "supplementary_material": "", "author": "Zi-Yi Dou;Feng Gao;Nanyun Peng", "authorids": "~Zi-Yi_Dou1;~Feng_Gao2;~Nanyun_Peng1", "gender": ";M;F", "homepage": "https://zdou0830.github.io/;https://fen9.github.io/;https://violetpeng.github.io/", "dblp": "205/8985;10/2674-13;117/4036", "google_scholar": "RWogNsEAAAAJ;amaLnocAAAAJ;XxRXvX0AAAAJ", "or_profile": "~Zi-Yi_Dou1;~Feng_Gao2;~Nanyun_Peng1", "aff": "University of California, Los Angeles;Amazon;University of California, Los Angeles", "aff_domain": "ucla.edu;amazon.com;ucla.edu", "position": "PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\ndou2023masked,\ntitle={Masked Path Modeling for Vision-and-Language Navigation},\nauthor={Zi-Yi Dou and Feng Gao and Nanyun Peng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2UJvVc8gnP}\n}", "github": "", "project": "", "reviewers": "otNW;Sdnd;rViw", "site": "https://openreview.net/forum?id=2UJvVc8gnP", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;4", "excitement": "3;3;3", "reproducibility": "4;4;5", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-1515-1357;", "linkedin": ";;", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of California, Los Angeles;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.ucla.edu;https://www.amazon.com", "aff_unique_abbr": "UCLA;Amazon", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "2WZ4Wp1OSo", "title": "Building Multi-domain Dialog State Trackers from Single-domain Dialogs", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Existing multi-domain dialog state tracking (DST) models are developed based on multi-domain dialogs, which require significant manual effort to define domain relations and collect data. This process can be challenging and expensive, particularly when numerous domains are involved. In this paper, we propose a divide-and-conquer (DAC) DST paradigm and a multi-domain dialog synthesis framework, which makes building multi-domain DST models from single-domain dialogs possible. The DAC paradigm segments a multi-domain dialog into multiple single-domain dialogs for DST, which makes models generalize better on dialogs involving unseen domain combinations. The multi-domain dialog synthesis framework merges several potentially related single-domain dialogs into one multi-domain dialog and modifies the dialog to simulate domain relations. The synthesized dialogs can help DST models capture the value transfer between domains. Experiments with three representative DST models on two datasets demonstrate the effectiveness of our proposed DAC paradigm and data synthesis framework.", "keywords": "dialog state tracking;multi-domain dialog;conversational query rewrite", "primary_area": "", "supplementary_material": "", "author": "Qi Zhu;Zheng Zhang;Xiaoyan Zhu;Minlie Huang", "authorids": "~Qi_Zhu8;~Zheng_Zhang12;~Xiaoyan_Zhu1;~Minlie_Huang1", "gender": "M;M;F;M", "homepage": ";;;http://coai.cs.tsinghua.edu.cn/hml", "dblp": "66/5923-7;181/2621-20.html;50/1222-1;", "google_scholar": "sNpTNo8AAAAJ;S2bil1cAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Qi_Zhu8;~Zheng_Zhang12;~Xiaoyan_Zhu1;~Minlie_Huang1", "aff": "Department of Computer Science and Technology, Tsinghua University;Tsinghua University;Department of Computer Science and Technology, Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;cs.tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhu2023building,\ntitle={Building Multi-domain Dialog State Trackers from Single-domain Dialogs},\nauthor={Qi Zhu and Zheng Zhang and Xiaoyan Zhu and Minlie Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2WZ4Wp1OSo}\n}", "github": "", "project": "", "reviewers": "9qPF;DJWY;NSN3", "site": "https://openreview.net/forum?id=2WZ4Wp1OSo", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "3;3;4", "reproducibility": "4;4;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "Department of Computer Science and Technology", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "2X5RXTOsLU", "title": "Dialect Transfer for Swiss German Speech Translation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "This paper investigates the challenges in building Swiss German speech translation systems, specifically focusing on the impact of dialect diversity and differences between Swiss German and Standard German. Swiss German is a spoken language with no formal writing system, it comprises many diverse dialects and is a low-resource language with only around 5 million speakers. The study is guided by two key research questions: how does the inclusion and exclusion of dialects during the training of speech translation models for Swiss German impact the performance on specific dialects, and how do the differences between Swiss German and Standard German impact the performance of the systems? We show that dialect diversity and linguistic differences pose significant challenges to Swiss German speech translation, which is in line with linguistic hypotheses derived from empirical investigations.", "keywords": "speech to text;speech translation;swiss german;low resource", "primary_area": "", "supplementary_material": "", "author": "Claudio Paonessa;Yanick Schraner;Jan Milan Deriu;Manuela H\u00fcrlimann;Manfred Vogel;Mark Cieliebak", "authorids": "~Claudio_Paonessa1;~Yanick_Schraner1;~Jan_Milan_Deriu1;~Manuela_H\u00fcrlimann1;~Manfred_Vogel1;~Mark_Cieliebak1", "gender": "M;M;M;F;M;M", "homepage": ";;https://www.zhaw.ch/de/ueber-uns/person/deri/;https://www.zhaw.ch/en/about-us/person/hueu/;https://www.fhnw.ch/de/personen/manfred-vogel;https://www.zhaw.ch/en/about-us/person/ciel/", "dblp": "347/9860;260/6789;172/0961.html;167/4861;;34/1478.html", "google_scholar": "P_AWNqEAAAAJ;8EVlA9kAAAAJ;PvHXh9wAAAAJ;https://scholar.google.ch/citations?user=SDD3aJ8AAAAJ;;yT-vIQMAAAAJ", "or_profile": "~Claudio_Paonessa1;~Yanick_Schraner1;~Jan_Milan_Deriu1;~Manuela_H\u00fcrlimann1;~Manfred_Vogel1;~Mark_Cieliebak1", "aff": "FHNW - Fachhochschule Nordwestschweiz;FHNW - Fachhochschule Nordwestschweiz;ZHAW - Z\u00fcrcher Hochschule f\u00fcr Angewandte Wissenschaften;ZHAW - Z\u00fcrcher Hochschule f\u00fcr Angewandte Wissenschaften;FHNW - Fachhochschule Nordwestschweiz;Zurich University of Applied Sciences ZHAW", "aff_domain": "fhnw.ch;fhnw.ch;zhaw.ch;zhaw.ch;fhnw.ch;zhaw.ch", "position": "Researcher;Researcher;Researcher;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\npaonessa2023dialect,\ntitle={Dialect Transfer for Swiss German Speech Translation},\nauthor={Claudio Paonessa and Yanick Schraner and Jan Milan Deriu and Manuela H{\\\"u}rlimann and Manfred Vogel and Mark Cieliebak},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2X5RXTOsLU}\n}", "github": "", "project": "", "reviewers": "LM7U;xjs8;qYnP", "site": "https://openreview.net/forum?id=2X5RXTOsLU", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;3;4", "reproducibility": "3;3;4", "correctness": "4;3;5", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-8405-1344;0009-0008-5185-6876;;", "linkedin": "claudio-paonessa-328983231/;;jan-deriu-444092104/;manuela-h%C3%BCrlimann-a16410127/;;mark-cieliebak-8988a234/", "aff_unique_index": "0;0;1;1;0;2", "aff_unique_norm": "Fachhochschule Nordwestschweiz;Z\u00fcrcher Hochschule f\u00fcr Angewandte Wissenschaften;Zurich University of Applied Sciences", "aff_unique_dep": ";;", "aff_unique_url": "https://www.fhnw.ch;https://www.zhaw.ch;https://www.zhawk.ch", "aff_unique_abbr": "FHNW;ZHAW;ZHAW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Switzerland" }, { "id": "2XDbDwNlTn", "title": "FACTIFY3M: A benchmark for multimodal fact verification with explainability through 5W Question-Answering", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Combating disinformation is one of the burning societal crises - about 67% of the American population believes that disinformation produces a lot of uncertainty, and 10% of them knowingly propagate disinformation. Evidence shows that disinformation can manipulate democratic processes and public opinion, causing disruption in the share market, panic and anxiety in society, and even death during crises. Therefore, disinformation should be identified promptly and, if possible, mitigated. With approximately 3.2 billion images and 720,000 hours of video shared online daily on social media platforms, scalable detection of multimodal disinformation requires efficient fact verification. Despite progress in automatic text-based fact verification (e.g., FEVER, LIAR), the research community lacks substantial effort in multimodal fact verification. To address this gap, we introduce FACTIFY 3M, a dataset of 3 million samples that pushes the boundaries of the domain of fact verification via a multimodal fake news dataset, in addition to offering explainability through the concept of 5W question-answering. Salient features of the dataset include: (i) textual claims, (ii) ChatGPT-generated paraphrased claims, (iii) associated images, (iv) stable diffusion-generated additional images (i.e., visual paraphrases), (v) pixel-level image heatmap to foster image-text explainability of the claim, (vi) 5W QA pairs, and (vii) adversarial fake news stories.", "keywords": "Multimodality;Fact Verification;Disinformation;Explainability", "primary_area": "", "supplementary_material": "", "author": "Megha Chakraborty;Khushbu Pahwa;Anku Rani;Shreyas Chatterjee;Dwip Dalal;Harshit Dave;Ritvik G;Preethi Gurumurthy;Adarsh Ashok Mahor;Samahriti Mukherjee;Aditya Pakala;Ishan Paul;Janvita Reddy;Arghya Sarkar;Kinjal Sensharma;Aman Chadha;Amit P. Sheth;Amitava Das", "authorids": "~Megha_Chakraborty1;~Khushbu_Pahwa1;~Anku_Rani2;~Shreyas_Chatterjee1;~Dwip_Dalal1;~Harshit_Dave1;~Ritvik_G1;~Preethi_Gurumurthy1;~Adarsh_Ashok_Mahor1;~Samahriti_Mukherjee1;~Aditya_Pakala1;~Ishan_Paul1;~Janvita_Reddy1;~Arghya_Sarkar1;~Kinjal_Sensharma1;~Aman_Chadha1;~Amit_P._Sheth1;~Amitava_Das3", "gender": "F;F;F;M;M;M;M;F;M;F;M;M;F;M;M;M;M;M", "homepage": ";;https://sites.google.com/view/ankurani/;;http://dwipddalal.me/;;https://ritvikg.com;;;;;;;;;https://aman.ai;http://aiisc.ai/amit;https://amitavadas.com/", "dblp": ";299/8490;294/5245;;344/3845;;;;;;;;;;;55/10360;s/AmitPSheth;", "google_scholar": "Jqq0mHoAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;8ZRL-ekAAAAJ;https://scholar.google.ca/citations?user=zrGsmv8AAAAJ;;Qoo3fZIAAAAJ;;;;;;;;;gPGQuBQAAAAJ;https://scholar.google.com/citations?hl=en;", "or_profile": "~Megha_Chakraborty1;~Khushbu_Pahwa1;~Anku_Rani2;~Shreyas_Chatterjee1;~Dwip_Dalal1;~Harshit_Dave1;~Ritvik_G1;~Preethi_Gurumurthy1;~Adarsh_Ashok_Mahor1;~Samahriti_Mukherjee1;~Aditya_Pakala1;~Ishan_Paul1;~Janvita_Reddy1;~Arghya_Sarkar1;~Kinjal_Sensharma1;~Aman_Chadha1;~Amit_P._Sheth1;~Amitava_Das3", "aff": "University of South Carolina, Columbia;University of California, Los Angeles;university of south carolina;Indian Institute of Technology Jammu;Indian Institute of Technology, Gandhinagar;University of South Carolina;National Institute of Technology Andhra Pradesh;Indian Institute of Information Technology, Sri City;Sardar Vallabhbhai National Institute of Technology;Indian Statistical Institute;Indian Institute of Technology, Delhi;Indian Statistical Institute ;Sardar Vallabhbhai National Institute of Technology;Indian Statistical Institute, Kolkata;University of Alberta;Amazon Web Services;University of South Carolina;University of South Carolina", "aff_domain": "uofsc.edu;ucla.edu;mailbox.sc.edu;iitjammu.ac.in;iitgn.ac.in;sc.edu;nitandhra.ac.in;iiits.in;svnit.ac.in;isical.ac.in;iitd.ac.in;isical.ac.in;med.svnit.ac.in;isical.ac.in;ualberta.ca;amazon.com;sc.edu;uofsc.edu", "position": "PhD student;MS student;Researcher;Undergrad student;Undergrad student;Intern;Undergrad student;Undergrad student;Undergrad student;Undergrad student;Intern;Undergrad student;Undergrad student;Undergrad student;Intern;GenAI Science Manager;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nchakraborty2023factifym,\ntitle={{FACTIFY}3M: A benchmark for multimodal fact verification with explainability through 5W Question-Answering},\nauthor={Megha Chakraborty and Khushbu Pahwa and Anku Rani and Shreyas Chatterjee and Dwip Dalal and Harshit Dave and Ritvik G and Preethi Gurumurthy and Adarsh Ashok Mahor and Samahriti Mukherjee and Aditya Pakala and Ishan Paul and Janvita Reddy and Arghya Sarkar and Kinjal Sensharma and Aman Chadha and Amit P. Sheth and Amitava Das},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2XDbDwNlTn}\n}", "github": "", "project": "", "reviewers": "woHT;c4E1;Nv99", "site": "https://openreview.net/forum?id=2XDbDwNlTn", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "4;4;4", "reproducibility": "3;3;3", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 18, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0009-0007-4063-1076;0000-0003-4232-5009;0009-0004-7472-4690;0000-0002-5898-3447;;;;;;;;0000-0001-6621-9003;0000-0002-0021-5293;", "linkedin": "megha-chakraborty-9a324b165/;khushbupahwa;anku-rani/;shreyas-chatterjee-7b174418b/;dwip-dalal-a7a440190/;harshit-dave/;ritvikg/;preethigurumurthy/;adarsh-mahor-a4a8691a7/;samahriti-mukherjee-a7229923b/;aditya-pakala-8160a61a0/;ishan-paul-21476620a;janvita/;arghya-sarkar-369464236/;kinjal-sensharma-2597081bb/;https://linkedin.aman.ai/;amitsheth/;", "aff_unique_index": "0;1;0;2;3;0;4;5;6;7;8;7;6;7;9;10;0;0", "aff_unique_norm": "University of South Carolina;University of California, Los Angeles;Indian Institute of Technology Jammu;Indian Institute of Technology Gandhinagar;National Institute of Technology, Andhra Pradesh;Indian Institute of Information Technology;Sardar Vallabhbhai National Institute of Technology;Indian Statistical Institute;Indian Institute of Technology Delhi;University of Alberta;Amazon", "aff_unique_dep": ";;;;;;;;;;Amazon Web Services", "aff_unique_url": "https://www.sc.edu;https://www.ucla.edu;https://www.iitjammu.ac.in;https://www.iitgn.ac.in;https://www.nitap.ac.in;https://www.iiitsricity.ac.in;https://www.svnit.ac.in;https://www.isical.ac.in;https://www.iitdelhi.ac.in;https://www.ualberta.ca;https://aws.amazon.com", "aff_unique_abbr": "USC;UCLA;IIT Jammu;IITGN;NIT Andhra Pradesh;IIIT;SVNIT;ISI;IIT Delhi;UAlberta;AWS", "aff_campus_unique_index": "0;1;3;4;5;6;7", "aff_campus_unique": "Columbia;Los Angeles;;Jammu;Gandhinagar;Sri City;Delhi;Kolkata", "aff_country_unique_index": "0;0;0;1;1;0;1;1;1;1;1;1;1;1;2;0;0;0", "aff_country_unique": "United States;India;Canada" }, { "id": "2YEY9SPVEA", "title": "Task-Adaptive Tokenization: Enhancing Long-Form Text Generation Efficacy in Mental Health and Beyond", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We propose task-adaptive tokenization\\footnote{Our work will be publicly available upon acceptance.} as a way to adapt the generation pipeline to the specifics of a downstream task and enhance long-form generation in mental health. Inspired by insights from cognitive science, our task-adaptive tokenizer samples variable segmentations from multiple outcomes, with sampling probabilities optimized based on task-specific data. We introduce a strategy for building a specialized vocabulary and introduce a vocabulary merging protocol that allows for the integration of task-specific tokens into the pre-trained model's tokenization step. \nThrough extensive experiments on psychological question-answering tasks in both Chinese and English, we find that our task-adaptive tokenization approach brings a significant improvement in generation performance while using up to 60\\% fewer tokens. \nPreliminary experiments point to promising results when using our tokenization approach with very large language models.", "keywords": "task-adaptive tokenization;text generation;long-form generation;text segmentation", "primary_area": "", "supplementary_material": "", "author": "Siyang Liu;Naihao Deng;Sahand Sabour;Yilin Jia;Minlie Huang;Rada Mihalcea", "authorids": "~Siyang_Liu1;~Naihao_Deng1;~Sahand_Sabour1;~Yilin_Jia1;~Minlie_Huang1;~Rada_Mihalcea1", "gender": "F;M;M;M;M;F", "homepage": ";https://dnaihao.github.io;https://sahandfer.github.io/;;http://coai.cs.tsinghua.edu.cn/hml;https://web.eecs.umich.edu/~mihalcea/", "dblp": "81/4071-3;303/0640;294/4827;;;m/RadaMihalcea", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;3_qUtH4AAAAJ;EaOudNsAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.tw/citations?user=UetM7FgAAAAJ", "or_profile": "~Siyang_Liu1;~Naihao_Deng1;~Sahand_Sabour1;~Yilin_Jia1;~Minlie_Huang1;~Rada_Mihalcea1", "aff": "University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;Tsinghua University;;Tsinghua University;University of Michigan", "aff_domain": "umich.edu;umich.edu;tsinghua.edu.cn;;tsinghua.edu.cn;umich.edu", "position": "PhD student;PhD student;PhD student;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nliu2023taskadaptive,\ntitle={Task-Adaptive Tokenization: Enhancing Long-Form Text Generation Efficacy in Mental Health and Beyond},\nauthor={Siyang Liu and Naihao Deng and Sahand Sabour and Yilin Jia and Minlie Huang and Rada Mihalcea},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2YEY9SPVEA}\n}", "github": "", "project": "", "reviewers": "xPLS;iXkS;m6qr", "site": "https://openreview.net/forum?id=2YEY9SPVEA", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;2;4", "excitement": "4;4;3", "reproducibility": "5;3;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0009-0377-8508;0000-0003-0294-2897;;;;0000-0002-0767-6703", "linkedin": ";naihao-deng/;sahandsabour/;yilin-jia-1277a1250/;;", "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "University of Michigan;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "https://www.umich.edu;https://www.tsinghua.edu.cn", "aff_unique_abbr": "UM;THU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "United States;China" }, { "id": "2anfut5geh", "title": "Challenges in Context-Aware Neural Machine Translation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Context-aware neural machine translation, a paradigm that involves leveraging information beyond sentence-level context to resolve inter-sentential discourse dependencies and improve document-level translation quality, has given rise to a number of recent techniques. \nHowever, despite well-reasoned intuitions, most context-aware translation models show only modest improvements over sentence-level systems. In this work, we investigate and present several core challenges that impede progress within the field, relating to discourse phenomena, context usage, model architectures, and document-level evaluation. \nTo address these problems, we propose a more realistic setting for document-level translation, called paragraph-to-paragraph (PARA2PARA) translation, and collect a new dataset of Chinese-English novels to promote future research.", "keywords": "neural machine translation;document-level neural machine translation;context-aware neural machine translation", "primary_area": "", "supplementary_material": "", "author": "Linghao Jin;Jacqueline He;Jonathan May;Xuezhe Ma", "authorids": "~Linghao_Jin1;~Jacqueline_He1;~Jonathan_May1;~Xuezhe_Ma1", "gender": "F;F;M;M", "homepage": ";http://jacqueline-he.github.io;http://jonmay.net;https://xuezhemax.github.io/", "dblp": ";319/3146.html;00/4758;127/0230", "google_scholar": "LQ9aOHMAAAAJ;;tmK5EPEAAAAJ;6_MQLIcAAAAJ", "or_profile": "~Linghao_Jin1;~Jacqueline_He1;~Jonathan_May1;~Xuezhe_Ma1", "aff": "University of Southern California;University of Washington;USC/ISI;USC/ISI", "aff_domain": "usc.edu;uw.edu;isi.edu;isi.edu", "position": "PhD student;PhD student;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\njin2023challenges,\ntitle={Challenges in Context-Aware Neural Machine Translation},\nauthor={Linghao Jin and Jacqueline He and Jonathan May and Xuezhe Ma},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2anfut5geh}\n}", "github": "", "project": "", "reviewers": "dPV2;LPga;bWHY", "site": "https://openreview.net/forum?id=2anfut5geh", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;3;3", "reproducibility": "4;4;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-5284-477X;", "linkedin": ";;jonmayjonmay/;xuezhe-ma-b5354731", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Southern California;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "https://www.usc.edu;https://www.washington.edu", "aff_unique_abbr": "USC;UW", "aff_campus_unique_index": "0;2;2", "aff_campus_unique": "Los Angeles;;ISI", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "2b7aSGxb6M", "title": "MSCFFN: A New FFN with Multi-Space Cross to Accelerate Transformer", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Transformer models have achieved impressive success in various natural language processing tasks. But it is also limited used in some areas and the heavy computation complexity is one of the main limitations. Many model structures have been proposed to reduce the computation complexity and some are really effective. The previous research can be divided into two categories. One is to use more effective training and inference strategies and the other is focused on how to replace the standard self-attention mechanism with linear attention method. Differently, we revisit the design in Transformer and find that the feed forward network (FFN) is also computationally expensive, especially when the hidden dimension is large. In this paper, we propose a new FFN structure, named MSCFFN, which splits the large matrix space to several small space to reduce the computation complexity and uses the Multi-Space Cross method to ensure the accurate result. To the best of our knowledge, this is the first time to redesign FFN to accelerate Transformers. We experimentally validate the effectiveness of the proposed method on the Long-Range Arena benchmark. And the results show MSCFFN can achieve a faster speed with a similar or even better accuracy.", "keywords": "New FFN structure;MSCFFN;Multi-Space Cross method;Accelerate Transformers", "primary_area": "", "supplementary_material": "", "author": "Tang Dongge;Qing Yang", "authorids": "~Tang_Dongge1;~Qing_Yang11", "gender": "M;M", "homepage": "https://github.com/bancheng;https://www.duxiaoman.com/index", "dblp": ";47/3749", "google_scholar": ";", "or_profile": "~Tang_Dongge1;~Qing_Yang11", "aff": "duxiaoman;Du Xiaoman Technology(BeiJing)", "aff_domain": "duxiaoman.com;duxiaoman.com", "position": "Researcher;Principal Researcher", "bibtex": "@inproceedings{\ndongge2023mscffn,\ntitle={{MSCFFN}: A New {FFN} with Multi-Space Cross to Accelerate Transformer},\nauthor={Tang Dongge and Qing Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2b7aSGxb6M}\n}", "github": "", "project": "", "reviewers": "i4EW;iZ74;wC4i", "site": "https://openreview.net/forum?id=2b7aSGxb6M", "pdf_size": 0, "rating": "2;2;2", "confidence": "3;3;4", "excitement": "3;2;4", "reproducibility": "4;3;3", "correctness": "2;2;3", "rating_avg": 2.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 2.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;1", "aff_unique_norm": "Duxiaoman;Du Xiaoman Technology", "aff_unique_dep": ";", "aff_unique_url": ";", "aff_unique_abbr": ";", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";China" }, { "id": "2bBIY12n43", "title": "A State-Vector Framework for Dataset Effects", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The impressive success of recent deep neural network (DNN)-based systems is significantly influenced by the high-quality datasets used in training. However, the effects of the datasets, especially how they interact with each other, remain underexplored. We propose a state-vector framework to enable rigorous studies in this direction. This framework uses idealized probing test results as the bases of a vector space. This framework allows us to quantify the effects of both standalone and interacting datasets. We show that the significant effects of some commonly-used language understanding datasets are characteristic and are concentrated on a few linguistic dimensions. Additionally, we observe some ``spill-over'' effects: the datasets could impact the models along dimensions that may seem unrelated to the intended tasks. Our state-vector framework paves the way for a systematic understanding of the dataset effects, a crucial component in responsible and robust model development.", "keywords": "data influence;probing;fine-tuning;multi-task learning;datasets", "primary_area": "", "supplementary_material": "", "author": "Esmat Sahak;Zining Zhu;Frank Rudzicz", "authorids": "~Esmat_Sahak1;~Zining_Zhu1;~Frank_Rudzicz2", "gender": "M;;M", "homepage": ";http://ziningzhu.github.io;http://www.cs.toronto.edu/~frank", "dblp": ";188/5709;36/6505", "google_scholar": ";https://scholar.google.ca/citations?user=Xr_hCJMAAAAJ;https://scholar.google.ca/citations?user=elXOB1sAAAAJ", "or_profile": "~Esmat_Sahak1;~Zining_Zhu1;~Frank_Rudzicz2", "aff": "University of Toronto;University of Toronto;Dalhousie University", "aff_domain": "utoronto.ca;toronto.edu;dal.ca", "position": "Undergrad student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nsahak2023a,\ntitle={A State-Vector Framework for Dataset Effects},\nauthor={Esmat Sahak and Zining Zhu and Frank Rudzicz},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2bBIY12n43}\n}", "github": "", "project": "", "reviewers": "6EYi;9zjj;PRSZ", "site": "https://openreview.net/forum?id=2bBIY12n43", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;2;4", "excitement": "4;3;4", "reproducibility": "3;3;4", "correctness": "4;3;5", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-1139-3423", "linkedin": "esmat-sahak-777293181/;zining-zhu/;", "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Toronto;Dalhousie University", "aff_unique_dep": ";", "aff_unique_url": "https://www.utoronto.ca;https://www.dal.ca", "aff_unique_abbr": "U of T;Dal", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "id": "2c3u5YDUUy", "title": "MTGER: Multi-view Temporal Graph Enhanced Temporal Reasoning over Time-Involved Document", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The facts and time in the document are intricately intertwined, making temporal reasoning over documents challenging. Previous work models time implicitly, making it difficult to handle such complex relationships. To address this issue, we propose MTGER, a novel Multi-view Temporal Graph Enhanced Reasoning framework for temporal reasoning over time-involved documents. Concretely, MTGER explicitly models the temporal relationships among facts by multi-view temporal graphs. On the one hand, the heterogeneous temporal graphs explicitly model the temporal and discourse relationships among facts; on the other hand, the multi-view mechanism captures both time-focused and fact-focused information, allowing the two views to complement each other through adaptive fusion. To further improve the implicit reasoning capability of the model, we design a self-supervised time-comparing objective. Extensive experimental results demonstrate the effectiveness of our method on the TimeQA and SituatedQA datasets. Furthermore, MTGER gives more consistent answers under question perturbations.", "keywords": "Question Answering;Temporal Reasoning;Natural Language Processing", "primary_area": "", "supplementary_material": "", "author": "Zheng Chu;Zekun Wang;Jiafeng Liang;Ming Liu;Bing Qin", "authorids": "~Zheng_Chu1;~Zekun_Wang1;~Jiafeng_Liang1;~Ming_Liu6;~Bing_Qin2", "gender": "M;;M;M;", "homepage": ";;;http://homepage.hit.edu.cn/liuming1981;http://ir.hit.edu.cn/~qinb", "dblp": ";;;20/2039-4.html;86/5934.html", "google_scholar": "NRD-_8kAAAAJ;;https://scholar.google.com.hk/citations?user=_AHDC4gAAAAJ;VJtmTREAAAAJ;LKnCub0AAAAJ", "or_profile": "~Zheng_Chu1;~Zekun_Wang1;~Jiafeng_Liang1;~Ming_Liu6;~Bing_Qin2", "aff": "Harbin Institute of Technology;;Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;;hit.edu.cn;hit.edu.cn;hit.edu.cn", "position": "PhD student;;PhD student;Professor;Full Professor", "bibtex": "@inproceedings{\nchu2023mtger,\ntitle={{MTGER}: Multi-view Temporal Graph Enhanced Temporal Reasoning over Time-Involved Document},\nauthor={Zheng Chu and Zekun Wang and Jiafeng Liang and Ming Liu and Bing Qin},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2c3u5YDUUy}\n}", "github": "", "project": "", "reviewers": "DJLB;ZXhY;dd6k", "site": "https://openreview.net/forum?id=2c3u5YDUUy", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "3;4;3", "reproducibility": "3;3;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-5747-6187;;0000-0002-2543-5604", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Harbin Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hit.edu.cn/", "aff_unique_abbr": "HIT", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Harbin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "2hYi3mXxqf", "title": "T-Projection: High Quality Annotation Projection for Sequence Labeling Tasks", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In the absence of readily available labeled data for a given sequence labeling task and language, annotation projection has been proposed as one of the possible strategies to automatically generate annotated data. Annotation projection has often been formulated as the task of transporting, on parallel corpora, the labels pertaining to a given span in the source language into its corresponding span in the target language. In this paper we present T-Projection, a novel approach for annotation projection that leverages large pretrained text2text language models and state-of-the-art machine translation technology. T-Projection decomposes the label projection task into two subtasks: (i) A candidate generation step, in which a set of projection candidates using a multilingual T5 model is generated and, (ii) a candidate selection step, in which the generated candidates are ranked based on translation probabilities. We conducted experiments on intrinsic and extrinsic tasks in 5 Indo-European and 8 low-resource African languages. We demostrate that T-projection outperforms previous annotation projection methods by a wide margin. We believe that T-Projection can help to automatically alleviate the lack of high-quality training data for sequence labeling tasks. Code and data are publicly available.", "keywords": "annotation projection;low-resource;sequence labeling;text2text language models;machine translation;automatic data generation", "primary_area": "", "supplementary_material": "", "author": "Iker Garc\u00eda-Ferrero;Rodrigo Agerri;German Rigau", "authorids": "~Iker_Garc\u00eda-Ferrero1;~Rodrigo_Agerri1;~German_Rigau2", "gender": "M;M;M", "homepage": "https://ikergarcia1996.github.io/Iker-Garcia-Ferrero/;https://ragerri.github.io/;https://adimen.si.ehu.es/~rigau/", "dblp": "305/9880;57/5047;66/1456.html", "google_scholar": "https://scholar.google.es/citations?user=yoOzj1MAAAAJ;https://scholar.google.es/citations?user=1SJh0a8AAAAJ;3RHckhYAAAAJ", "or_profile": "~Iker_Garc\u00eda-Ferrero1;~Rodrigo_Agerri1;~German_Rigau2", "aff": "University of Pennsylvania;University of the Basque Country;Universidad del Pa\u00eds Vasco", "aff_domain": "upenn.edu;ehu.eus;ehu.eus", "position": "PhD student;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\ngarc{\\'\\i}a-ferrero2023tprojection,\ntitle={T-Projection: High Quality Annotation Projection for Sequence Labeling Tasks},\nauthor={Iker Garc{\\'\\i}a-Ferrero and Rodrigo Agerri and German Rigau},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2hYi3mXxqf}\n}", "github": "", "project": "", "reviewers": "nRSb;GnKV;LiTa", "site": "https://openreview.net/forum?id=2hYi3mXxqf", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "4;3;3", "reproducibility": "4;5;5", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.666666666666667, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-9612-7134;0000-0002-7303-7598;0000-0003-1119-0930", "linkedin": "iker-garc%C3%ADa-ferrero-75343b172/;rodrigo-agerri-0678616;german-rigau-a4ba3a173/", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Pennsylvania;University of the Basque Country;Universidad del Pa\u00eds Vasco", "aff_unique_dep": ";;", "aff_unique_url": "https://www.upenn.edu;https://www.ehu.eus/en;https://www.ehu.eus/en", "aff_unique_abbr": "UPenn;UPV/EHU;UPV/EHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;Spain" }, { "id": "2jibzAXJzH", "title": "T5Score: Discriminative Fine-tuning of Generative Evaluation Metrics", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Modern embedding-based metrics for evaluation of generated text generally fall into one of two paradigms: discriminative metrics that are trained to directly predict which outputs are of higher quality according to supervised human annotations, and generative metrics that are trained to evaluate text based on the probabilities of a generative model. Both have their advantages; discriminative metrics are able to directly optimize for the problem of distinguishing between good and bad outputs, while generative metrics can be trained using abundant raw text. In this paper, we present a framework that combines the best of both worlds, using both supervised and unsupervised signals from whatever data we have available. We operationalize this idea by training T5Score, a metric that uses these training signals with mT5 as backbone. We perform an extensive empirical comparison with other existing metrics on 5 datasets, 19 languages and 280 systems, demonstrating the utility of our method. Experimental results show that: T5Score achieves the best performance on all datasets against existing top-scoring metrics at the segment level.", "keywords": "text generation;evaluation", "primary_area": "", "supplementary_material": "", "author": "Yiwei Qin;Weizhe Yuan;Graham Neubig;Pengfei Liu", "authorids": "~Yiwei_Qin1;~Weizhe_Yuan1;~Graham_Neubig1;~Pengfei_Liu1", "gender": "F;F;M;M", "homepage": ";http://yyy-apple.github.io/;http://phontron.com;http://pfliu.com/", "dblp": "143/0512.html;207/1964;03/8155;34/3381-3", "google_scholar": ";2k5j4eMAAAAJ;wlosgkoAAAAJ;oIz_CYEAAAAJ", "or_profile": "~Yiwei_Qin1;~Weizhe_Yuan1;~Graham_Neubig1;~Pengfei_Liu1", "aff": ";New York University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": ";nyu.edu;cmu.edu;cmu.edu", "position": ";PhD student;Associate Professor;Postdoc", "bibtex": "@inproceedings{\nqin2023tscore,\ntitle={T5Score: Discriminative Fine-tuning of Generative Evaluation Metrics},\nauthor={Yiwei Qin and Weizhe Yuan and Graham Neubig and Pengfei Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2jibzAXJzH}\n}", "github": "", "project": "", "reviewers": "SSiu;NgGN;zxyM", "site": "https://openreview.net/forum?id=2jibzAXJzH", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "2;4;4", "reproducibility": "4;5;5", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.666666666666667, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "yiwei-qin-b0b57a22b/;weizhey/;;", "aff_unique_index": "0;1;1", "aff_unique_norm": "New York University;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nyu.edu;https://www.cmu.edu", "aff_unique_abbr": "NYU;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "2kSufHoYEi", "title": "NORMSAGE: Multi-Lingual Multi-Cultural Norm Discovery from Conversations On-the-Fly", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Knowledge of norms is needed to understand and reason about acceptable behavior in human communication and interactions across sociocultural scenarios. Most computational research on norms has focused on a single culture, and manually built datasets, from non-conversational settings. We address these limitations by proposing a new framework, NormSage, to automatically extract \n culture-specific norms from multi-lingual conversations. NormSage uses GPT-3 prompting to 1) extract candidate norms directly from conversations and 2) provide explainable self-verification to ensure correctness and relevance. Comprehensive empirical results show the promise of our approach to extract high-quality culture-aware norms from multi-lingual conversations (English and Chinese), across several quality metrics. Further, our relevance verification can be extended to assess the adherence and violation of any norm with respect to a conversation on-the-fly, along with textual explanation. NormSage achieves an AUC of 94.6\\% in this grounding setup, with generated explanations matching human-written quality.", "keywords": "social norms;discovery;grounding", "primary_area": "", "supplementary_material": "", "author": "Yi Fung;Tuhin Chakrabarty;Hao Guo;Owen Rambow;Smaranda Muresan;Heng Ji", "authorids": "~Yi_Fung1;~Tuhin_Chakrabarty2;~Hao_Guo5;~Owen_Rambow3;~Smaranda_Muresan3;~Heng_Ji3", "gender": "F;M;M;M;;F", "homepage": "https://mayrfung.github.io;https://tuhinjubcse.github.io/;https://github.com/h-guo18;http://owenrambow.com;http://www.cs.columbia.edu/~smara/;http://blender.cs.illinois.edu/hengji.html", "dblp": "223/2782-1.html;227/2812;;55/1330;44/70;", "google_scholar": "eUae2K0AAAAJ;HCmFuo8AAAAJ;;https://scholar.google.com/scholar?hl=en;Esbx2VcAAAAJ;z7GCqT4AAAAJ", "or_profile": "~Yi_Fung1;~Tuhin_Chakrabarty2;~Hao_Guo5;~Owen_Rambow3;~Smaranda_Muresan3;~Heng_Ji3", "aff": "University of Illinois, Urbana Champaign;Columbia University;Tsinghua University;Stony Brook University;Columbia University;University of Illinois, Urbana-Champaign", "aff_domain": "illinois.edu;columbia.edu;mails.tsinghua.edu.cn;stonybrook.edu;columbia.edu;uiuc.edu", "position": "PhD student;PhD student;Undergrad student;Full Professor;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nfung2023normsage,\ntitle={{NORMSAGE}: Multi-Lingual Multi-Cultural Norm Discovery from Conversations On-the-Fly},\nauthor={Yi Fung and Tuhin Chakrabarty and Hao Guo and Owen Rambow and Smaranda Muresan and Heng Ji},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2kSufHoYEi}\n}", "github": "", "project": "", "reviewers": "sQGj;HuvK;6CHs", "site": "https://openreview.net/forum?id=2kSufHoYEi", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;5", "excitement": "4;3;3", "reproducibility": "3;3;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";;;;;", "aff_unique_index": "0;1;2;3;1;4", "aff_unique_norm": "University of Illinois Urbana-Champaign;Columbia University;Tsinghua University;Stony Brook University;University of Illinois", "aff_unique_dep": ";;;;", "aff_unique_url": "https://illinois.edu;https://www.columbia.edu;https://www.tsinghua.edu.cn;https://www.stonybrook.edu;https://illinois.edu", "aff_unique_abbr": "UIUC;Columbia;THU;SBU;UIUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;China" }, { "id": "2lI1pVL6aj", "title": "CRAB: Assessing the Strength of Causal Relationships Between Real-world Events", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Understanding narratives requires reasoning about the cause-and-effect relationships between events mentioned in the text. While existing foundation models yield impressive results in many NLP tasks requiring reasoning, it is unclear whether they understand the complexity of the underlying network of causal relationships of events in narratives. In this work, we present CRAB, a new Causal Reasoning Assessment Benchmark designed to evaluate causal understanding of events in real-world narratives. CRAB contains fine-grained, contextual causality annotations for ~2.7K pairs of real-world events that describe various newsworthy event timelines (e.g., the acquisition of Twitter by Elon Musk). Using CRAB, we measure the performance of several large language models, demonstrating that most systems achieve poor performance on the task. Motivated by classical causal principles, we also analyze the causal structures of groups of events in CRAB, and find that models perform worse on causal reasoning when events are derived from complex causal structures compared to simple linear causal chains. We make our dataset and code available to the research community.", "keywords": "causal reasoning;benchmark;causal score;event causality", "primary_area": "", "supplementary_material": "", "author": "Angelika Romanou;Syrielle Montariol;Debjit Paul;Leo Laugier;Karl Aberer;Antoine Bosselut", "authorids": "~Angelika_Romanou1;~Syrielle_Montariol1;~Debjit_Paul2;~Leo_Laugier1;~Karl_Aberer1;~Antoine_Bosselut1", "gender": "F;F;M;M;;M", "homepage": ";https://smontariol.github.io/;https://debjitpaul.github.io/;https://leolaugier.github.io/;https://people.epfl.ch/karl.aberer;https://atcbosselut.github.io/", "dblp": ";245/2618;238/1467.html;230/4603;a/KarlAberer;184/3742", "google_scholar": "VXzt8WYAAAAJ;oM63nTMAAAAJ;https://scholar.google.de/citations?user=jJ8MjZMAAAAJ;b7SjqRwAAAAJ;;XD9hkJwAAAAJ", "or_profile": "~Angelika_Romanou1;~Syrielle_Montariol1;~Debjit_Paul2;~Leo_Laugier1;~Karl_Aberer1;~Antoine_Bosselut1", "aff": "Swiss Federal Institute of Technology Lausanne;EPFL - EPF Lausanne;EPFL - EPF Lausanne;EPFL - EPF Lausanne;School of Computer and Communication Sciences, EPFL - EPF Lausanne;Swiss Federal Institute of Technology Lausanne", "aff_domain": "epfl.ch;epfl.ch;epfl.ch;epfl.ch;ic.epfl.ch;epfl.ch", "position": "PhD student;Postdoc;Postdoc;Postdoc;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nromanou2023crab,\ntitle={{CRAB}: Assessing the Strength of Causal Relationships Between Real-world Events},\nauthor={Angelika Romanou and Syrielle Montariol and Debjit Paul and Leo Laugier and Karl Aberer and Antoine Bosselut},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2lI1pVL6aj}\n}", "github": "", "project": "", "reviewers": "GU2u;g6sm;d7EN", "site": "https://openreview.net/forum?id=2lI1pVL6aj", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "3;3;4", "reproducibility": "3;4;5", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0009-0004-4029-6616;0000-0002-3737-3092;;", "linkedin": ";;debjit-paul/;leolaugier/;;", "aff_unique_index": "0;1;1;1;1;0", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne;EPFL", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch", "aff_unique_abbr": "EPFL;EPFL", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Switzerland" }, { "id": "2mxzS2Xv2e", "title": "A Causal View of Entity Bias in (Large) Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Entity bias widely affects pretrained (large) language models, causing them to rely on (biased) parametric knowledge to make unfaithful predictions. Although causality-inspired methods have shown great potential to mitigate entity bias, it is hard to precisely estimate the parameters of underlying causal models in practice. The rise of black-box LLMs also makes the situation even worse, because of their inaccessible parameters and uncalibrated logits. To address these problems, we propose a specific structured causal model (SCM) whose parameters are comparatively easier to estimate. Building upon this SCM, we propose causal intervention techniques to mitigate entity bias for both white-box and black-box settings. The proposed causal intervention perturbs the original entity with neighboring entities. This intervention reduces specific biasing information pertaining to the original entity while still preserving sufficient semantic information from similar entities. Under the white-box setting, our training-time intervention improves OOD performance of PLMs on relation extraction (RE) and machine reading comprehension (MRC) by 5.7 points and by 9.1 points, respectively. Under the black-box setting, our in-context intervention effectively reduces the entity-based knowledge conflicts of GPT-3.5, achieving up to 20.5 points of improvement of exact match accuracy on MRC and up to 17.6 points of reduction in memorization ratio on RE.", "keywords": "entity bias;knowledge conflicts;causal analysis;large language models", "primary_area": "", "supplementary_material": "", "author": "Fei Wang;Wenjie Mo;Yiwei Wang;Wenxuan Zhou;Muhao Chen", "authorids": "~Fei_Wang12;~Wenjie_Mo2;~Yiwei_Wang2;~Wenxuan_Zhou2;~Muhao_Chen1", "gender": "M;M;M;M;M", "homepage": "https://feiwang96.github.io/;;;https://wzhouad.github.io/;https://muhaochen.github.io/", "dblp": "52/3194-60;;50/5889-1;;173/2608", "google_scholar": "N1O2KT8AAAAJ;;https://scholar.google.com.hk/citations?user=Sh9QvBkAAAAJ;https://scholar.google.com/citations?hl=en;k79yEZkAAAAJ", "or_profile": "~Fei_Wang12;~Wenjie_Mo2;~Yiwei_Wang2;~Wenxuan_Zhou2;~Muhao_Chen1", "aff": "University of Southern California;University of Southern California;National University of Singapore;University of Southern California;University of Southern California", "aff_domain": "usc.edu;usc.edu;u.nus.edu;usc.edu;usc.edu", "position": "PhD student;Undergrad student;PhD student;PhD student;Assistant Research Professor", "bibtex": "@inproceedings{\nwang2023a,\ntitle={A Causal View of Entity Bias in (Large) Language Models},\nauthor={Fei Wang and Wenjie Mo and Yiwei Wang and Wenxuan Zhou and Muhao Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2mxzS2Xv2e}\n}", "github": "", "project": "", "reviewers": "zF98;r268;wJyF", "site": "https://openreview.net/forum?id=2mxzS2Xv2e", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;2", "excitement": "3;3;4", "reproducibility": "3;3;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0003-0118-3147", "linkedin": ";jacky-mo-b5827a213/;;;", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of Southern California;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "https://www.usc.edu;https://www.nus.edu.sg", "aff_unique_abbr": "USC;NUS", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;Singapore" }, { "id": "2prcotJejU", "title": "Prompting with Pseudo-Code Instructions", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Prompting with natural language instructions has recently emerged as a popular method of harnessing the capabilities of large language models (LLM). Given the inherent ambiguity present in natural language, it is intuitive to consider the possible advantages of prompting with less ambiguous prompt styles, like pseudo-code.\n\nIn this paper, we explore if prompting via pseudo-code instructions helps improve the performance of pre-trained language models. We manually create a dataset of pseudo-code prompts for 132 different tasks spanning classification, QA, and generative language tasks, sourced from the Super-NaturalInstructions dataset. Using these prompts along with their counterparts in natural language, we study their performance on two LLM families - BLOOM, CodeGen. Our experiments show that using pseudo-code instructions leads to better results, with an average increase (absolute) of 7-16 points in F1 scores for classification tasks and an improvement (relative) of 12-38% in aggregate ROUGE-L scores across all tasks. We include detailed ablation studies which indicate that code comments, docstrings, and the structural clues encoded in pseudo-code all contribute towards the improvement in performance.\n\nTo the best of our knowledge, our work is the first to demonstrate how pseudo-code prompts can be helpful in improving the performance of pre-trained LMs.", "keywords": "Instruction Finetuning;Pseudo-Code Instructions;Large Language Model", "primary_area": "", "supplementary_material": "", "author": "Mayank Mishra;Prince Kumar;Riyaz Ahmad Bhat;Rudra Murthy;Danish Contractor;Srikanth G. Tamilselvam", "authorids": "~Mayank_Mishra1;~Prince_Kumar1;~Riyaz_Ahmad_Bhat1;~Rudra_Murthy1;~Danish_Contractor2;~Srikanth_G._Tamilselvam1", "gender": "M;M;M;M;M;", "homepage": "https://mayank31398.github.io/;;https://sites.google.com/site/riyazahbhat/;http://murthyrudra.github.io;https://researcher.watson.ibm.com/researcher/view.php?person=in-srikanth.tamilselvam;", "dblp": ";;146/3952;216/7282;138/3209;93/9012", "google_scholar": "YsbtW6cAAAAJ;8qDwuyEAAAAJ;BGC4b-sAAAAJ;5bjj_9cAAAAJ;https://scholar.google.co.in/citations?user=cFd7pr8AAAAJ;https://scholar.google.co.uk/citations?hl=en", "or_profile": "~Mayank_Mishra1;~Prince_Kumar1;~Riyaz_Ahmad_Bhat1;~Rudra_Murthy1;~Srikanth_G._Tamilselvam1;~Danish_Contractor1", "aff": "International Business Machines;International Business Machines;International Business Machines;IBM India Pvt Ltd;International Business Machines;International Business Machines", "aff_domain": "ibm.com;ibm.com;ibm.com;in.ibm.com;ibm.com;ibm.com", "position": "Researcher;Researcher;Researcher;Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nmishra2023prompting,\ntitle={Prompting with Pseudo-Code Instructions},\nauthor={Mayank Mishra and Prince Kumar and Riyaz Ahmad Bhat and Rudra Murthy and Danish Contractor and Srikanth G. Tamilselvam},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2prcotJejU}\n}", "github": "", "project": "", "reviewers": "FjEd;wLJ1;ZMgi;45A7", "site": "https://openreview.net/forum?id=2prcotJejU", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "4;4;4;3", "excitement": "4;4;4;4", "reproducibility": "4;4;4;5", "correctness": "3;4;4;3", "rating_avg": 5.0, "confidence_avg": 3.75, "excitement_avg": 4.0, "reproducibility_avg": 4.25, "correctness_avg": 3.5, "replies_avg": 13, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-6236-1931;;", "linkedin": "mayank31398;prince-k-u-m-a-r/;riyaz-a-bhat-51828423/;;srikanth-tamilselvam-913a2ab/;", "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "International Business Machines Corporation;IBM", "aff_unique_dep": ";IBM India Pvt Ltd", "aff_unique_url": "https://www.ibm.com;https://www.ibm.com/in-en", "aff_unique_abbr": "IBM;IBM India", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "United States;India" }, { "id": "2qKRa94sow", "title": "Connecting degree and polarity: An artificial language learning study", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We investigate a new linguistic generalisation in pre-trained language models (taking BERT Devlin et al. 2019 as a case study). We focus on degree modifiers (expressions like slightly, very, rather, extremely) and test the hypothesis that the degree expressed by a modifier (low, medium or high degree) is related to the modifier\u2019s sensitivity to sentence polarity (whether it shows preference for affirmative or negative sentences or neither). To probe this connection, we apply the Artificial Language Learning experimental paradigm from psycholinguistics to a neural language model. Our experimental results suggest that BERT generalizes in line with existing linguistic observations that relate de- gree semantics to polarity sensitivity, including the main one: low degree semantics is associated with preference towards positive polarity.", "keywords": "semantics;degree;polarity;artificial language learning", "primary_area": "", "supplementary_material": "", "author": "Lisa Bylinina;Alexey Tikhonov;Ekaterina Garmash", "authorids": "~Lisa_Bylinina1;~Alexey_Tikhonov1;~Ekaterina_Garmash1", "gender": ";;F", "homepage": ";;", "dblp": ";;153/9512", "google_scholar": ";;", "or_profile": "~Lisa_Bylinina1;~Alexey_Tikhonov1;~Ekaterina_Garmash1", "aff": ";;Spotify", "aff_domain": ";;spotify.com", "position": ";;Researcher", "bibtex": "@inproceedings{\nbylinina2023connecting,\ntitle={Connecting degree and polarity: An artificial language learning study},\nauthor={Lisa Bylinina and Alexey Tikhonov and Ekaterina Garmash},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2qKRa94sow}\n}", "github": "", "project": "", "reviewers": "DQ6w;tJwV;zJs2", "site": "https://openreview.net/forum?id=2qKRa94sow", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;5", "excitement": "4;3;4", "reproducibility": "4;2;3", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;ekaterina-garmash-96670130", "aff_unique_index": "0", "aff_unique_norm": "Spotify", "aff_unique_dep": "", "aff_unique_url": "https://www.spotify.com", "aff_unique_abbr": "Spotify", "aff_country_unique_index": "0", "aff_country_unique": "Sweden" }, { "id": "2wFVkTDGOZ", "title": "Emptying the Ocean with a Spoon: Should We Edit Models?", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "We call into question the recently popularized method of direct model editing as a means of correcting factual errors in LLM generations. We contrast model editing with three similar but distinct approaches that pursue better defined objectives: (1) retrieval-based architectures, which decouple factual memory from inference and linguistic capabilities embodied in LLMs; (2) concept erasure methods, which aim at preventing systemic bias in generated text; and (3) attribution methods, which aim at grounding generations into identified textual sources. We argue that direct model editing cannot be trusted as a systematic remedy for the disadvantages inherent to LLMs, and while it has proven potential in improving model explainability, it opens risks by reinforcing the notion that models can be trusted for factuality. We call for cautious promotion and application of model editing as part of the LLM deployment process, and for responsibly limiting the use cases of LLMs to those not relying on editing as a critical component.", "keywords": "Model Editing;LLMs;Factual Knowledge;Continual Learning;Knowledge Representation;Opinion Paper", "primary_area": "", "supplementary_material": "", "author": "Yuval Pinter;Michael Elhadad", "authorids": "~Yuval_Pinter1;~Michael_Elhadad1", "gender": "M;M", "homepage": "http://www.yuvalpinter.com;https://www.cs.bgu.ac.il/~elhadad/", "dblp": "153/5384;69/1744", "google_scholar": "aYAcXccAAAAJ;https://scholar.google.com.tw/citations?user=Is0pLz0AAAAJ", "or_profile": "~Yuval_Pinter1;~Michael_Elhadad1", "aff": "Amazon Science;Ben Gurion University of the Negev", "aff_domain": "amazon.com;bgu.ac.il", "position": "Visiting Academic;Full Professor", "bibtex": "@inproceedings{\npinter2023emptying,\ntitle={Emptying the Ocean with a Spoon: Should We Edit Models?},\nauthor={Yuval Pinter and Michael Elhadad},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2wFVkTDGOZ}\n}", "github": "", "project": "", "reviewers": "HGZK;UmZH;Y4br;8HBp", "site": "https://openreview.net/forum?id=2wFVkTDGOZ", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;4;4;5", "excitement": "2;2;3;4", "reproducibility": "", "correctness": "2;2;3;4", "rating_avg": 4.0, "confidence_avg": 4.25, "excitement_avg": 2.75, "reproducibility_avg": 0, "correctness_avg": 2.75, "replies_avg": 13, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-3174-1621;0000-0002-5629-2351", "linkedin": "yuvalpinter;", "aff_unique_index": "0;1", "aff_unique_norm": "Amazon;Ben Gurion University of the Negev", "aff_unique_dep": "Amazon Science;", "aff_unique_url": "https://www.amazon.science;https://www.bgu.ac.il", "aff_unique_abbr": "Amazon Science;BGU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Israel" }, { "id": "2z4s0W375H", "title": "Tuna: Instruction Tuning using Feedback from Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Instruction tuning of open-source large language models (LLMs) like LLaMA, using direct outputs from more powerful LLMs such as Instruct-GPT and GPT-4, has proven to be a cost-effective way to align model behaviors with human preferences.\nHowever, the instruction-tuned model has only seen one response per instruction, lacking the knowledge of potentially better responses.\nIn this paper, we propose finetuning an instruction-tuned LLM using our novel probabilistic ranking and contextual ranking approaches to increase the likelihood of generating better responses.\nProbabilistic ranking enables the instruction-tuned model to inherit the relative rankings of high-quality and low-quality responses from the teacher LLM.\nOn the other hand, learning with contextual ranking allows the model to refine its own response distribution using the contextual understanding ability of stronger LLMs.\nFurthermore, we apply probabilistic ranking and contextual ranking sequentially to the instruction-tuned LLM.\nThe resulting model, which we call Tuna, consistently improves the performance on Super Natural Instructions (119 test tasks), LMentry (25 test tasks), Vicuna QA, and can even obtain better results than several strong reinforcement learning baselines. Our code and data are available at https://github.com/microsoft/LMOps.", "keywords": "Large Lanugage Models; Instruction Tuning", "primary_area": "", "supplementary_material": "", "author": "Haoran Li;Yiran Liu;Xingxing Zhang;Wei Lu;Furu Wei", "authorids": "~Haoran_Li4;~Yiran_Liu1;~Xingxing_Zhang1;~Wei_Lu10;~Furu_Wei1", "gender": "M;M;M;M;M", "homepage": "https://statnlp-research.github.io/;https://xingxingzhang.github.io/;https://www.microsoft.com/en-us/research/people/fuwei/;https://istd.sutd.edu.sg/people/faculty/lu-wei;", "dblp": ";59/9985-2.html;72/5870;98/6613-11.html;", "google_scholar": "X5QwHqwAAAAJ;5yX53usAAAAJ;G-V1VpwAAAAJ;n41KN9AAAAAJ;", "or_profile": "~Haoran_Li4;~Xingxing_Zhang1;~Furu_Wei1;~Wei_Lu9;~Liu_Yiran2", "aff": "Singapore University of Technology and Design;Microsoft Research Asia;Microsoft Research;Singapore University of Technology and Design;Tsinghua University", "aff_domain": "sutd.edu.sg;microsoft.com;microsoft.com;sutd.edu.sg;tsinghua.edu.cn", "position": "PhD student;Researcher;Distinguished Scientist;Associate Professor;PhD student", "bibtex": "@inproceedings{\nli2023tuna,\ntitle={Tuna: Instruction Tuning using Feedback from Large Language Models},\nauthor={Haoran Li and Yiran Liu and Xingxing Zhang and Wei Lu and Furu Wei},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2z4s0W375H}\n}", "github": "", "project": "", "reviewers": "ANTd;eUe6;Eb1g", "site": "https://openreview.net/forum?id=2z4s0W375H", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;5;4", "excitement": "3;3;4", "reproducibility": "2;4;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-0827-0382;0000-0003-3450-7881", "linkedin": ";;;wei-lu-59aa9615/;", "aff_unique_index": "0;1;1;0;2", "aff_unique_norm": "Singapore University of Technology and Design;Microsoft;Tsinghua University", "aff_unique_dep": ";Research;", "aff_unique_url": "https://www.sutd.edu.sg;https://www.microsoft.com/en-us/research/group/asia;https://www.tsinghua.edu.cn", "aff_unique_abbr": "SUTD;MSR Asia;THU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;2;0;1", "aff_country_unique": "Singapore;China;United States" }, { "id": "2z9o8bMQNd", "title": "Conversation Understanding using Relational Temporal Graph Neural Networks with Auxiliary Cross-Modality Interaction", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Emotion recognition is a crucial task for human conversation understanding. It becomes more challenging with the notion of multimodal data, e.g., language, voice, and facial expressions. As a typical solution, the global- and the local context information are exploited to predict the emotional label for every single sentence, i.e., utterance, in the dialogue. Specifically, the global representation could be captured via modeling of cross-modal interactions at the conversation level. The local one is often inferred using the temporal information of speakers or emotional shifts, which neglects vital factors at the utterance level. Additionally, most existing approaches take fused features of multiple modalities in an unified input without leveraging modality-specific representations. Motivating from these problems, we propose the Relational Temporal Graph Neural Network with Auxiliary Cross-Modality Interaction (CORECT), an novel neural network framework that effectively captures conversation-level cross-modality interactions and utterance-level temporal dependencies with the modality-specific manner for conversation understanding. Extensive experiments demonstrate the effectiveness of CORECT via its state-of-the-art results on the IEMOCAP and CMU-MOSEI datasets for the multimodal ERC task.", "keywords": "Multimodal Emotion Recognition;Relational Temporal GNNs;Conversation Understanding;Pairwise Cross Modality", "primary_area": "", "supplementary_material": "", "author": "Cam Van Thi Nguyen;Tuan Anh Mai;Son Le The;Dang Hai Kieu;Duc-Trong Le", "authorids": "~Cam_Van_Thi_Nguyen1;~Tuan_Anh_Mai1;~Son_Le_The1;~Dang_Hai_Kieu1;~Duc-Trong_Le1", "gender": "F;M;;M;M", "homepage": ";;;;", "dblp": "238/1332;;;;155/5277", "google_scholar": "https://scholar.google.com/citations?hl=en;;;https://scholar.google.com.au/citations?user=_25LJb4AAAAJ;https://scholar.google.com.vn/citations?user=irb6x1cAAAAJ", "or_profile": "~Cam_Van_Thi_Nguyen1;~Tuan_Anh_Mai1;~Son_Le_The1;~Dang_Hai_Kieu1;~Duc-Trong_Le1", "aff": "Univeristy of Engineering and Technology, Vietnam National University;Vietnam National University Hanoi;VNU University of Engineering and Technology;VinUniversity;Vietnam National University, Hanoi", "aff_domain": "vnu.edu.vn;vnu.edu.vn;uet.vnu.edu.vn;vinuni.edu.vn;vnu.edu.vn", "position": "PhD student;Undergrad student;Undergrad student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nnguyen2023conversation,\ntitle={Conversation Understanding using Relational Temporal Graph Neural Networks with Auxiliary Cross-Modality Interaction},\nauthor={Cam Van Thi Nguyen and Tuan Anh Mai and Son Le The and Dang Hai Kieu and Duc-Trong Le},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=2z9o8bMQNd}\n}", "github": "", "project": "", "reviewers": "Xeu3;YYyo;bUo5", "site": "https://openreview.net/forum?id=2z9o8bMQNd", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "3;4;4", "reproducibility": "3;4;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0001-9675-2105;;;0000-0001-6743-088X;", "linkedin": ";tu\u1ea5n-mai-anh-358206226/;leson502/;;", "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "University of Engineering and Technology;Vietnam National University, Hanoi;VNU University of Engineering and Technology;VinUniversity;Vietnam National University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://uet.vnu.edu.vn;https://www.vnu.edu.vn;https://uet.vnu.edu.vn;https://vinuni.edu.vn;https://www.vnu.edu.vn", "aff_unique_abbr": "UET;VNU;VNU UET;VinUni;VNU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hanoi", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Vietnam" }, { "id": "30kbnyD9hF", "title": "Exchange-of-Thought: Enhancing Large Language Model Capabilities through Cross-Model Communication", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large Language Models (LLMs) have recently made significant strides in complex reasoning tasks through the Chain-of-Thought technique. Despite this progress, their reasoning is often constrained by their intrinsic understanding, lacking external insights. To address this, we propose Exchange-of-Thought (EoT), a novel framework that enables cross-model communication during problem-solving. Drawing inspiration from network topology, EoT integrates four unique communication paradigms: Memory, Report, Relay, and Debate. This paper delves into the communication dynamics and volume associated with each paradigm. To counterbalance the risks of incorrect reasoning chains, we implement a robust confidence evaluation mechanism within these communications. Our experiments across diverse complex reasoning tasks demonstrate that EoT significantly surpasses established baselines, underscoring the value of external insights in enhancing LLM performance. Furthermore, we show that EoT achieves these superior results in a cost-effective manner, marking a promising advancement for efficient and collaborative AI problem-solving.", "keywords": "Large Language Models; Model Communication; Chain-of-Thought", "primary_area": "", "supplementary_material": "", "author": "Zhangyue Yin;Qiushi Sun;Cheng Chang;Qipeng Guo;Junqi Dai;Xuanjing Huang;Xipeng Qiu", "authorids": "~Zhangyue_Yin1;~Qiushi_Sun1;~Cheng_Chang4;~Qipeng_Guo1;~Junqi_Dai1;~Xuanjing_Huang1;~Xipeng_Qiu1", "gender": "M;M;M;M;;F;M", "homepage": "https://yinzhangyue.github.io/;https://qiushisun.github.io/;https://github.com/MCplayerFromPRC;;;https://xuanjing-huang.github.io/;https://xpqiu.github.io/", "dblp": "314/5418;247/8469;;172/1046;;05/6735-1;69/1395", "google_scholar": "9gRQqSkAAAAJ;QgMkYFAAAAAJ;;k3mPGKgAAAAJ;;RGsMgZA4H78C;Pq4Yp_kAAAAJ", "or_profile": "~Zhangyue_Yin1;~Qiushi_Sun1;~Cheng_Chang4;~Qipeng_Guo1;~Junqi_Dai1;~Xuanjing_Huang1;~Xipeng_Qiu1", "aff": "Fudan University;Institute of infocomm research, A*STAR;Fudan University;Amazon;;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;i2r.a-star.edu.sg;fudan.edu.cn;amazon.com;;fudan.edu.cn;fudan.edu.cn", "position": "PhD student;Intern;MS student;Researcher;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nyin2023exchangeofthought,\ntitle={Exchange-of-Thought: Enhancing Large Language Model Capabilities through Cross-Model Communication},\nauthor={Zhangyue Yin and Qiushi Sun and Cheng Chang and Qipeng Guo and Junqi Dai and Xuanjing Huang and Xipeng Qiu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=30kbnyD9hF}\n}", "github": "", "project": "", "reviewers": "u6cP;HLJV;UcTV", "site": "https://openreview.net/forum?id=30kbnyD9hF", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;4;4", "excitement": "4;3;4", "reproducibility": "3;4;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-5207-818X;;;;0000-0001-9197-9426;0000-0001-7163-5247", "linkedin": "zhangyue-yin-083286288/;qiushi-sun/;;;;;", "aff_unique_index": "0;1;0;2;0;0", "aff_unique_norm": "Fudan University;Institute of Infocomm Research;Amazon", "aff_unique_dep": ";;Amazon.com, Inc.", "aff_unique_url": "https://www.fudan.edu.cn;https://www.i2r.a-star.edu.sg;https://www.amazon.com", "aff_unique_abbr": "Fudan;I2R;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2;0;0", "aff_country_unique": "China;Singapore;United States" }, { "id": "33aJCNQV1C", "title": "A linear time approximation of Wasserstein distance with word embedding selection", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Wasserstein distance, which can be computed by solving the optimal transport problem, is a powerful method for measuring the dissimilarity between documents. In the NLP community, it is referred to as word mover's distance (WMD). One of the key challenges of Wasserstein distance is its computational cost since it needs cubic time. Although the Sinkhorn algorithm is a powerful tool to speed up to compute the Wasserstein distance, it still requires square time. Recently, a linear time approximation of the Wasserstein distance including the sliced Wasserstein and the tree-Wasserstein distance (TWD) has been proposed. However, a linear time approximation method suffers when the dimensionality of word vectors is high. In this study, we propose a method to combine feature selection and tree approximation of Wasserstein distance to handle high-dimensional problems. More specifically, we use multiple word embeddings and automatically select useful word embeddings in a tree approximation of Wasserstein distance. To this end, we approximate Wasserstein distance for each word vector by tree approximation technique, and select the discriminative (i.e., large Wasserstein distance) word embeddings by solving an entropic regularized maximization problem. Through our experiments on document classification, our proposed method achieved high performance.", "keywords": "optimal transport;group feature selection;document classification;word embedding", "primary_area": "", "supplementary_material": "", "author": "Sho Otao;Makoto Yamada", "authorids": "~Sho_Otao1;~Makoto_Yamada3", "gender": "M;M", "homepage": "https://shou9.github.io/;https://groups.oist.jp/mlds", "dblp": ";56/4937", "google_scholar": ";1cKNu1gAAAAJ", "or_profile": "~Sho_Otao1;~Makoto_Yamada3", "aff": "Kyoto University;Kyoto University", "aff_domain": "kyoto-u.ac.jp;kyoto-u.ac.jp", "position": "MS student;Associate Professor", "bibtex": "@inproceedings{\notao2023a,\ntitle={A linear time approximation of Wasserstein distance with word embedding selection},\nauthor={Sho Otao and Makoto Yamada},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=33aJCNQV1C}\n}", "github": "", "project": "", "reviewers": "2jWw;yRiE;ybsA", "site": "https://openreview.net/forum?id=33aJCNQV1C", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;2", "excitement": "4;3;4", "reproducibility": "3;4;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Kyoto University", "aff_unique_dep": "", "aff_unique_url": "https://www.kyoto-u.ac.jp", "aff_unique_abbr": "Kyoto U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "id": "377w7agYKC", "title": "CoRec: An Easy Approach for Coordination Recognition", "track": "main", "status": "Short Main", "tldr": "", "abstract": "In this paper, we observe and address the challenges of the coordination recognition task. Most existing methods rely on syntactic parsers to identify the coordinators in a sentence and detect the coordination boundaries. However, state-of-the-art syntactic parsers are slow and suffer from errors, especially for long and complicated sentences. To better solve the problems, we propose a pipeline model COordination RECognizer (CoRec). It consists of two components: coordinator identifier and conjunct boundary detector. The experimental results on datasets from various domains demonstrate the effectiveness and efficiency of the proposed method. Further experiments show that CoRec positively impacts downstream tasks, improving the yield of state-of-the-art Open IE models.", "keywords": "coordination recognition;shallow parsing", "primary_area": "", "supplementary_material": "", "author": "Qing Wang;Haojie Jia;Wenfei Song;Qi Li", "authorids": "~Qing_Wang15;~Haojie_Jia2;~Wenfei_Song1;~Qi_Li14", "gender": "F;M;F;M", "homepage": "https://www.cs.iastate.edu/people/qing-wang;https://www.cs.iastate.edu/wsong;https://sites.google.com/iastate.edu/qili/;https://yinxiangshi.github.io/", "dblp": ";;181/2688-12;", "google_scholar": "jY7bx4gAAAAJ;;Gvld0foAAAAJ;", "or_profile": "~Qing_Wang15;~Wenfei_Song1;~Qi_Li14;~haojie_jia1", "aff": "Iowa State University;Iowa State University;Iowa State University;Iowa State University", "aff_domain": "iastate.edu;iastate.edu;iastate.edu;iastate.edu", "position": "PhD student;MS student;Assistant Professor;Undergrad student", "bibtex": "@inproceedings{\nwang2023corec,\ntitle={CoRec: An Easy Approach for Coordination Recognition},\nauthor={Qing Wang and Haojie Jia and Wenfei Song and Qi Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=377w7agYKC}\n}", "github": "", "project": "", "reviewers": "12p3;r3Vf;4paW", "site": "https://openreview.net/forum?id=377w7agYKC", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "3;4;3", "reproducibility": "3;5;4", "correctness": "4;5;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-3136-2157;", "linkedin": ";;;haojie-jia-486b2220b/", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Iowa State University", "aff_unique_dep": "", "aff_unique_url": "https://www.iastate.edu", "aff_unique_abbr": "ISU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "38k1q1yyCe", "title": "Crossing the Threshold: Idiomatic Machine Translation through Retrieval Augmentation and Loss Weighting", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Idioms are common in everyday language, but often pose a challenge to translators because their meanings do not follow from the meanings of their parts. Despite significant advances, machine translation systems still struggle to translate idiomatic expressions. We provide a simple characterization of idiomatic translation and related issues. This allows us to conduct a synthetic experiment revealing a tipping point at which transformer-based machine translation models correctly default to idiomatic translations. To expand multilingual resources, we compile a dataset of ~4k natural sentences containing idiomatic expressions in French, Finnish, and Japanese. To improve translation of natural idioms, we introduce two straightforward yet effective techniques: the strategic upweighting of training loss on potentially idiomatic sentences, and using retrieval-augmented models. This not only improves the accuracy of a strong pretrained MT model on idiomatic sentences by up to 13\\% in absolute accuracy, but also holds potential benefits for non-idiomatic sentences.", "keywords": "machine translation;idioms;multi-word expressions;retrieval-based machine translation", "primary_area": "", "supplementary_material": "", "author": "Emmy Liu;Aditi Chaudhary;Graham Neubig", "authorids": "~Emmy_Liu1;~Aditi_Chaudhary1;~Graham_Neubig1", "gender": "F;;M", "homepage": "https://nightingal3.github.io/;;http://phontron.com", "dblp": "249/6997;225/7684;03/8155", "google_scholar": ";iNuUxiwAAAAJ;wlosgkoAAAAJ", "or_profile": "~Emmy_Liu1;~Aditi_Chaudhary1;~Graham_Neubig1", "aff": "School of Computer Science, Carnegie Mellon University;Google;Carnegie Mellon University", "aff_domain": "cs.cmu.edu;google.com;cmu.edu", "position": "PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nliu2023crossing,\ntitle={Crossing the Threshold: Idiomatic Machine Translation through Retrieval Augmentation and Loss Weighting},\nauthor={Emmy Liu and Aditi Chaudhary and Graham Neubig},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=38k1q1yyCe}\n}", "github": "", "project": "", "reviewers": "23HH;dV8h;ZCwa;3Wyz", "site": "https://openreview.net/forum?id=38k1q1yyCe", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;4;4;3", "excitement": "3;2;4;4", "reproducibility": "3;4;4;5", "correctness": "4;2;4;3", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 3.25, "reproducibility_avg": 4.0, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Carnegie Mellon University;Google", "aff_unique_dep": "School of Computer Science;Google", "aff_unique_url": "https://www.cmu.edu;https://www.google.com", "aff_unique_abbr": "CMU;Google", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Pittsburgh;Mountain View;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "3AxESAk0Re", "title": "STAIR: Learning Sparse Text and Image Representation in Grounded Tokens", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Image and text retrieval is one of the foundational tasks in the vision and language domain with multiple real-world applications. State-of-the-art contrastive approaches, e.g. CLIP, ALIGN, represent images and texts as dense embeddings and calculate the similarity in the dense embedding space as the matching score. On the other hand, sparse semantic features like bag-of-words models are more interpretable, but believed to suffer from inferior accuracy than dense representations. In this work, we show that it is possible to build a sparse semantic representation that is as powerful as, or even better than, dense presentations. We extend the CLIP model and build a sparse text and image representation (STAIR), where the image and text are mapped to a sparse token space. Each token in the space is a (sub-)word in the vocabulary, which is not only interpretable but also easy to integrate with existing information retrieval systems. STAIR model significantly outperforms a CLIP model with +$4.9\\%$ and +$4.3\\%$ absolute Recall@1 improvement on COCO-5k text$\\rightarrow$image and image$\\rightarrow$text retrieval respectively. It also achieved better performance on both of ImageNet zero-shot and linear probing compared to CLIP.", "keywords": "Image text retrieval;sparse embedding;interpretability", "primary_area": "", "supplementary_material": "", "author": "Chen Chen;Bowen Zhang;Liangliang Cao;Jiguang Shen;Tom Gunter;Albin Madappally Jose;Alexander T Toshev;Yantao Zheng;Jonathon Shlens;Ruoming Pang;Yinfei Yang", "authorids": "~Chen_Chen38;~Bowen_Zhang2;~Liangliang_Cao1;~Jiguang_Shen1;~Tom_Gunter1;~Albin_Madappally_Jose1;~Alexander_T_Toshev1;~Yantao_Zheng1;~Jonathon_Shlens1;~Ruoming_Pang2;~Yinfei_Yang1", "gender": ";M;M;M;M;M;;M;;;", "homepage": "https://github.com/alex8937;https://zbwglory.github.io;http://llcao.net;;;;;;;;", "dblp": ";85/7433-2;95/6915;;56/3385;338/9642;;32/4440;;;117/4082", "google_scholar": ";nI3cKV8AAAAJ;S-hBSfIAAAAJ;D-dGY1QAAAAJ;091Onx0AAAAJ;za_orHUAAAAJ;;;;1fsmwB8AAAAJ;kvDbu90AAAAJ", "or_profile": "~Chen_Chen38;~Bowen_Zhang2;~Liangliang_Cao1;~Jiguang_Shen1;~Tom_Gunter1;~Albin_Madappally_Jose1;~Alexander_T_Toshev1;~Yantao_Zheng1;~Jonathon_Shlens1;~Ruoming_Pang2;~Yinfei_Yang1", "aff": "Apple;Apple;Apple;;Apple;Apple;;Apple;;Apple;Apple", "aff_domain": "apple.com;apple.com;apple.com;;apple.com;apple.com;;apple.com;;apple.com;apple.com", "position": "Researcher;Research Scientist;Principal Researcher;;Researcher;Researcher;;Researcher;;Researcher;Researcher", "bibtex": "@inproceedings{\nchen2023stair,\ntitle={{STAIR}: Learning Sparse Text and Image Representation in Grounded Tokens},\nauthor={Chen Chen and Bowen Zhang and Liangliang Cao and Jiguang Shen and Tom Gunter and Albin Madappally Jose and Alexander T Toshev and Yantao Zheng and Jonathon Shlens and Ruoming Pang and Yinfei Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3AxESAk0Re}\n}", "github": "", "project": "", "reviewers": "9dcF;GqY9;gpBi", "site": "https://openreview.net/forum?id=3AxESAk0Re", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;5", "excitement": "4;4;4", "reproducibility": "3;4;3", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 11, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;;;;", "linkedin": "chen-8937/;;liangliangcao/;;;albin-m-jose;;;;;", "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "Apple", "aff_unique_dep": "Apple Inc.", "aff_unique_url": "https://www.apple.com", "aff_unique_abbr": "Apple", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "3CIQIYNGlp", "title": "Exploring the Impact of Model Scaling on Parameter-Efficient Tuning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Parameter-efficient tuning (PET) methods can effectively drive extremely large pre-trained language models (PLMs) by training only minimal parameters. Different PET methods utilize different manually designed tunable modules. In small PLMs, there are usually noticeable performance differences among PET methods. Nevertheless, as the model scale increases, the performance differences become marginal. Hence, we hypothesize that model scaling mitigates the impact of design differences on PET methods. To investigate this hypothesis, we introduce a more flexible PET method called Arbitrary PET (APET) method. The APET method is compatible with a tunable module, which consists of any number of parameters distributed in arbitrary positions. Then, we utilize it and conduct experiments on $11$ NLP tasks across $3$ representative PLMs. Our investigations reveal that model scaling (1) mitigates the effects of the positions of tunable parameters on performance, and (2) enables tuning methods to achieve performance comparable to full-parameter fine-tuning by optimizing fewer tunable parameters. Intriguingly, we also observe that tuning methods optimize the similar number of tunable parameters to exceed random guess performance on different tasks. We collectively discuss this phenomenon and the two aforementioned findings from an optimization perspective to understand the underlying mechanisms. These conclusions enhance our understanding of the impact of model scaling on PET and assist in designing more effective and efficient PET methods for PLMs of different scales. The source code can be obtained from this GitHub repository: \\url{https://github.com/yushengsu-thu/PET_Scaling}.", "keywords": "Parameter-efficient fine-tuning;Pre-trained language model", "primary_area": "", "supplementary_material": "", "author": "Yusheng Su;Chi-Min Chan;Jiali Cheng;Yujia Qin;Yankai Lin;Shengding Hu;Zonghan Yang;Ning Ding;Xingzhi Sun;Guotong Xie;Zhiyuan Liu;Maosong Sun", "authorids": "~Yusheng_Su1;~Chi-Min_Chan1;~Jiali_Cheng1;~Yujia_Qin1;~Yankai_Lin1;~Shengding_Hu2;~Zonghan_Yang1;~Ning_Ding5;~Xingzhi_Sun1;~Guotong_Xie4;~Zhiyuan_Liu1;~Maosong_Sun1", "gender": "M;;;M;M;;M;M;M;;M;M", "homepage": "https://yushengsu-thu.github.io/;;;https://yujia-qin.github.io/;https://linyankai.github.io/;;https://minicheshire.github.io/;https://www.stingning.cn/;;;http://nlp.csai.tsinghua.edu.cn/~lzy;https://www.cs.tsinghua.edu.cn/csen/info/1312/4394.htm", "dblp": "260/5558;;;126/2333;161/0001.html;;222/7860;;49/2892;;53/3245-1;95/3291-1", "google_scholar": "xwy6Va4AAAAJ;;;;https://scholar.google.com.hk/citations?user=j8K1FqEAAAAJ;;rt9HOIUAAAAJ;uZXQuYAAAAAJ;;;dT0v5u0AAAAJ;https://scholar.google.com.tw/citations?user=zIgT0HMAAAAJ", "or_profile": "~Yusheng_Su1;~Chi-Min_Chan1;~Jiali_Cheng1;~Yujia_Qin1;~Yankai_Lin1;~Shengding_Hu2;~Zonghan_Yang1;~Ning_Ding5;~Xingzhi_Sun1;~Guotong_Xie4;~Zhiyuan_Liu1;~Maosong_Sun1", "aff": "Tsinghua University;;;Tsinghua University;Renmin University of China;;Department of Computer Science and Technology, Tsinghua University;Tsinghua University;;;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;;;tsinghua.edu.cn;ruc.edu.cn;;cs.tsinghua.edu.cn;tsinghua.edu.cn;;;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;;;PhD student;Assistant Professor;;PhD student;PhD student;;;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nsu2023exploring,\ntitle={Exploring the Impact of Model Scaling on Parameter-Efficient Tuning},\nauthor={Yusheng Su and Chi-Min Chan and Jiali Cheng and Yujia Qin and Yankai Lin and Shengding Hu and Zonghan Yang and Ning Ding and Xingzhi Sun and Guotong Xie and Zhiyuan Liu and Maosong Sun},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3CIQIYNGlp}\n}", "github": "", "project": "", "reviewers": "rQAh;fvk4;8CW5", "site": "https://openreview.net/forum?id=3CIQIYNGlp", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "2;3;4", "reproducibility": "4;4;4", "correctness": "3;4;3", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 12, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-9509-9573;;;;0000-0002-9182-8158;;;;0000-0002-6519-0197;;0000-0002-7709-2543;", "linkedin": ";;;yujia-qin-672595181/;;;;;;;;", "aff_unique_index": "0;0;1;0;0;0;0", "aff_unique_norm": "Tsinghua University;Renmin University of China", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.ruc.edu.cn", "aff_unique_abbr": "THU;RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "3EcjsgPq74", "title": "Towards A Unified View of Sparse Feed-Forward Network in Pretraining Large Language Model", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large and sparse feed-forward layers (S-FFN) such as Mixture-of-Experts (MoE) have proven effective in scaling up Transformers model size for pretraining large language models. By only activating part of the FFN parameters conditioning on input, S-FFN improves generalization performance while keeping training and inference costs (in FLOPs) fixed. In this work, we analyzed two major design choices of S-FFN: the memory block (a.k.a. expert) size and the memory block selection method under a general conceptual framework of sparse neural memory. Using this unified framework, we compare several S-FFN architectures for language modeling and provide insights into their relative efficacy and efficiency. We found a simpler selection method \u2014 Avg-K that selects blocks through their mean aggregated hidden states, achieving lower perplexity in language model pretraining compared to existing MoE architectures including Switch Transformer (Fedus et al., 2021) and HashLayer (Roller et al., 2021).", "keywords": "Mixture-of-Experts;Efficiency;Transformer;Architecture;Pretraining;LLM", "primary_area": "", "supplementary_material": "", "author": "Zeyu Liu;Tim Dettmers;Xi Victoria Lin;Veselin Stoyanov;Xian Li", "authorids": "~Zeyu_Liu1;~Tim_Dettmers2;~Xi_Victoria_Lin1;~Veselin_Stoyanov2;~Xian_Li1", "gender": "M;M;F;M;", "homepage": "https://leo-liuzy.github.io;https://timdettmers.com/;http://victorialin.net;;", "dblp": "https://dblp.uni-trier.de/pid/276/6870;172/1045;215/5264;275/3258;82/1763-3.html", "google_scholar": "TiQTvyYAAAAJ;lHI3w5kAAAAJ;gYUOJwMAAAAJ;xdfWqboAAAAJ;v_sIgawAAAAJ", "or_profile": "~Zeyu_Liu1;~Tim_Dettmers2;~Xi_Victoria_Lin1;~Veselin_Stoyanov2;~Xian_Li1", "aff": "University of Washington, Seattle;University of Washington;Meta;Meta Facebook;Facebook AI", "aff_domain": "uw.edu;cs.washington.edu;fb.com;fb.com;fb.com", "position": "MS student;PhD student;Research Scientist;Research Scientist;Principal Researcher", "bibtex": "@inproceedings{\nliu2023towards,\ntitle={Towards A Unified View of Sparse Feed-Forward Network in Pretraining Large Language Model},\nauthor={Zeyu Liu and Tim Dettmers and Xi Victoria Lin and Veselin Stoyanov and Xian Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3EcjsgPq74}\n}", "github": "", "project": "", "reviewers": "J9vn;mxPW;mMBN;M6uy", "site": "https://openreview.net/forum?id=3EcjsgPq74", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "2;3;3;3", "excitement": "4;4;4;3", "reproducibility": "4;4;4;3", "correctness": "3;4;4;3", "rating_avg": 4.0, "confidence_avg": 2.75, "excitement_avg": 3.75, "reproducibility_avg": 3.75, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;xivictorialin/;;", "aff_unique_index": "0;0;1;1;1", "aff_unique_norm": "University of Washington;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.washington.edu;https://meta.com", "aff_unique_abbr": "UW;Meta", "aff_campus_unique_index": "0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "3ErwybEDgt", "title": "DiQAD: A Benchmark Dataset for Open-domain Dialogue Quality Assessment", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Dialogue assessment plays a critical role in the development of open-domain dialogue systems.\nExisting work are uncapable of providing an end-to-end and human-epistemic assessment dataset, while they only provide sub-metrics like coherence or the dialogues are conversed between annotators far from real user settings. \nIn this paper, we release a large-scale dialogue quality assessment dataset (DiQAD), for automatically assessing open-domain dialogue quality.\nSpecifically, we (1) establish the assessment criteria based on the dimensions conforming to human judgements on dialogue qualities, and (2) annotate large-scale dialogues that conversed between real users based on these annotation criteria, which contains around 100,000 dialogues.\nWe conduct several experiments and report the performances of the baselines as the benchmark on DiQAD.\nThe dataset is openly accessible at \\url{https://github.com/yukunZhao/Dataset_Dialogue_quality_evaluation}.", "keywords": "Dataset;Dialogue Evaluation Dataset;Dialogue Quality Assessment;Dialogue Quality Evaluation;Dialogue Quality Benchmark;Dialogue Evaluation", "primary_area": "", "supplementary_material": "", "author": "Yukun Zhao;Lingyong Yan;Weiwei Sun;Chong Meng;Shuaiqiang Wang;Zhicong Cheng;Zhaochun Ren;Dawei Yin", "authorids": "~Yukun_Zhao1;~Lingyong_Yan1;~Weiwei_Sun9;~Chong_Meng2;~Shuaiqiang_Wang2;~Zhicong_Cheng1;~Zhaochun_Ren1;~Dawei_Yin1", "gender": ";M;;;M;M;M;M", "homepage": ";https://yanlingyong.net;https://sunnweiwei.github.io/;;http://wangshuaiqiang.net/;;https://renzhaochun.github.io/;https://www.yindawei.com/", "dblp": ";254/8048;;;16/1524;88/8024;58/10440;", "google_scholar": ";NksMJFcAAAAJ;hdUZbxgAAAAJ;;https://scholar.google.com.hk/citations?user=8SbYYcIAAAAJ;;fPcIPt0AAAAJ;GuQ9bpAAAAAJ", "or_profile": "~Yukun_Zhao1;~Lingyong_Yan1;~Weiwei_Sun9;~Chong_Meng2;~Shuaiqiang_Wang2;~Zhicong_Cheng1;~Zhaochun_Ren1;~Dawei_Yin1", "aff": ";Baidu Inc.;Shandong University;;Baidu Inc.;Baidu;Shandong University;Baidu", "aff_domain": ";baidu.com;sdu.edu.cn;;baidu.com;baidu.com;sdu.edu.cn;baidu.com", "position": ";Search Scientist;MS student;;Principal Researcher;Principal Researcher;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nzhao2023diqad,\ntitle={Di{QAD}: A Benchmark Dataset for Open-domain Dialogue Quality Assessment},\nauthor={Yukun Zhao and Lingyong Yan and Weiwei Sun and Chong Meng and Shuaiqiang Wang and Zhicong Cheng and Zhaochun Ren and Dawei Yin},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3ErwybEDgt}\n}", "github": "", "project": "", "reviewers": "NmNL;uFZJ;JK85", "site": "https://openreview.net/forum?id=3ErwybEDgt", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;3;4", "reproducibility": "4;3;2", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-9212-1947;;0000-0002-9076-6565;0000-0002-0684-6205", "linkedin": ";;;;;;zhaochun-ren-460491296/?locale=nl_NL;dwyin/", "aff_unique_index": "0;1;0;0;1;0", "aff_unique_norm": "Baidu;Shandong University", "aff_unique_dep": "Baidu Inc.;", "aff_unique_url": "https://www.baidu.com;http://www.sdu.edu.cn", "aff_unique_abbr": "Baidu;SDU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "3F1qEXWKFE", "title": "PIVOINE: Instruction Tuning for Open-world Entity Profiling", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "This work considers the problem of Open-world Entity Profiling, a sub-domain of Open-world Information Extraction (Open-world IE). Unlike the conventional closed-world IE, Open-world IE is considered a more general situation where entities and relations could be beyond a predefined ontology. We seek to develop a large language model (LLM) that can perform Open-world Entity Profiling with instruction tuning to extract desirable entity profiles characterized by (possibly fine-grained) natural language instructions. In particular, we construct INSTRUCTOPENWIKI, a substantial instruction-tuning dataset for Open-world Entity Profiling enriched with a comprehensive corpus, extensive annotations, and diverse instructions. We finetune pretrained BLOOM models on INSTRUCTOPENWIKI and obtain PIVOINE, an LLM for Open-world Entity Profiling with strong instruction-following capabilities. Our experiments demonstrate that PIVOINE significantly outperforms traditional methods and ChatGPT-based baselines, displaying impressive generalization capabilities on both unseen instructions and out-of-ontology cases. Consequently, PIVOINE emerges as a promising solution to tackle the open-world challenge of entity profiling.", "keywords": "Large Language Model;Information Extraction;Instruction Tuning", "primary_area": "", "supplementary_material": "", "author": "Keming Lu;Xiaoman Pan;Kaiqiang Song;Hongming Zhang;Dong Yu;Jianshu Chen", "authorids": "~Keming_Lu1;~Xiaoman_Pan2;~Kaiqiang_Song2;~Hongming_Zhang2;~Dong_Yu2;~Jianshu_Chen1", "gender": "M;M;M;M;M;M", "homepage": ";http://i2u.world/kqsong/;http://www.cse.ust.hk/~hzhangal/;https://sites.google.com/view/dongyu888/;https://chenjianshu.github.io/;https://panx27.github.io/homepage/", "dblp": "65/6898.html;;;71/4598-1;11/3124;148/9210", "google_scholar": "WuD2op4AAAAJ;PHoJwakAAAAJ;i5ETuuQAAAAJ;tMY31_gAAAAJ;jQeFWdoAAAAJ;tRPF03IAAAAJ", "or_profile": "~Keming_Lu1;~Kaiqiang_Song2;~Hongming_Zhang2;~Dong_Yu2;~Jianshu_Chen1;~Xiaoman_Pan1", "aff": "Alibaba Group;Tencent AI Lab;Tencent AI Lab Seattle;Tencent AI Lab;Tencent AI Lab;Tencent AI Lab", "aff_domain": "alibaba-inc.com;tencent.com;tencent.com;tencent.com;tencent.com;tencent.com", "position": "Researcher;Senior Researcher;Researcher;Distinguished Scientist;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nlu2023pivoine,\ntitle={{PIVOINE}: Instruction Tuning for Open-world Entity Profiling},\nauthor={Keming Lu and Xiaoman Pan and Kaiqiang Song and Hongming Zhang and Dong Yu and Jianshu Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3F1qEXWKFE}\n}", "github": "", "project": "", "reviewers": "VgsG;YbHk;FP8C;CAmu", "site": "https://openreview.net/forum?id=3F1qEXWKFE", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;5;3;5", "excitement": "4;3;3;3", "reproducibility": "4;4;3;4", "correctness": "3;3;3;2", "rating_avg": 3.0, "confidence_avg": 4.25, "excitement_avg": 3.25, "reproducibility_avg": 3.75, "correctness_avg": 2.75, "replies_avg": 14, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-0520-6844;;", "linkedin": ";;;dongyu/;;", "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "Alibaba Group;Tencent", "aff_unique_dep": ";Tencent AI Lab", "aff_unique_url": "https://www.alibaba.com;https://ai.tencent.com", "aff_unique_abbr": "Alibaba;Tencent AI Lab", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "China;United States" }, { "id": "3FNrGv5MKb", "title": "$k$NN-LM Does Not Improve Open-ended Text Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In this paper, we study the generation quality of interpolation-based retrieval-augmented language models (LMs). These methods, best exemplified by the $k$NN-LM, interpolate the LM's predicted distribution of the next word with a distribution formed from the most relevant retrievals for a given prefix. While the $k$NN-LM and related methods yield impressive decreases in perplexity, we discover that they do not exhibit corresponding improvements in open-ended generation quality, as measured by both automatic evaluation metrics (e.g., MAUVE) and human evaluations. Digging deeper, we find that interpolating with a retrieval distribution actually increases perplexity compared to a baseline LM for the majority of tokens in the WikiText-103 test set, even though the overall perplexity is lower due to a smaller number of tokens for which perplexity dramatically decreases after interpolation. However, when decoding a long sequence at inference time, significant improvements on this smaller subset of tokens are washed out by slightly worse predictions on most tokens. Furthermore, we discover that the entropy of the retrieval distribution increases faster than that of the base LM as the generated sequence becomes longer, which indicates that retrieval is less reliable when using model-generated text as queries (i.e., is subject to exposure bias). We hope that our analysis spurs future work on improved decoding algorithms and interpolation strategies for retrieval-augmented language models.", "keywords": "retrieval-augmented language model;text generation;text generation evaluation;kNN-LM", "primary_area": "", "supplementary_material": "", "author": "Shufan Wang;Yixiao Song;Andrew Drozdov;Aparna Garimella;Varun Manjunatha;Mohit Iyyer", "authorids": "~Shufan_Wang1;~Yixiao_Song1;~Andrew_Drozdov1;~Aparna_Garimella1;~Varun_Manjunatha1;~Mohit_Iyyer1", "gender": "M;F;M;F;M;M", "homepage": "https://people.cs.umass.edu/~shufanwang/;https://yixiao-song.github.io;http://mrdrozdov.github.io;https://research.adobe.com/person/aparna-garimella/;https://research.adobe.com/person/varun-manjunatha/;http://cs.umass.edu/~miyyer", "dblp": "192/1552;331/5829;200/8508;183/5034.html;https://dblp.org/pers/m/Manjunatha:Varun.html;148/9178", "google_scholar": ";4OgciqMAAAAJ;glt2HXQAAAAJ;Q4PJyXIAAAAJ;nO-We6sAAAAJ;rBVA5tcAAAAJ", "or_profile": "~Shufan_Wang1;~Yixiao_Song1;~Andrew_Drozdov1;~Aparna_Garimella1;~Varun_Manjunatha1;~Mohit_Iyyer1", "aff": "University of Massachusetts, Amherst;University of Massachusetts at Amherst;Department of Computer Science, University of Massachusetts, Amherst;Adobe Research;Adobe Systems;University of Massachusetts Amherst", "aff_domain": "umass.edu;umass.edu;cs.umass.edu;adobe.com;adobe.com;cs.umass.edu", "position": "PhD student;PhD student;PhD student;Researcher;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nwang2023knnlm,\ntitle={\\$k\\${NN}-{LM} Does Not Improve Open-ended Text Generation},\nauthor={Shufan Wang and Yixiao Song and Andrew Drozdov and Aparna Garimella and Varun Manjunatha and Mohit Iyyer},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3FNrGv5MKb}\n}", "github": "", "project": "", "reviewers": "FiTG;qeVJ;ofKA", "site": "https://openreview.net/forum?id=3FNrGv5MKb", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;4;4", "reproducibility": "5;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-1025-5715;;;", "linkedin": ";songyixiao/;;aparna-garimella-639738110/;;", "aff_unique_index": "0;0;0;1;1;0", "aff_unique_norm": "University of Massachusetts Amherst;Adobe", "aff_unique_dep": ";Adobe Research", "aff_unique_url": "https://www.umass.edu;https://research.adobe.com", "aff_unique_abbr": "UMass Amherst;Adobe", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Amherst;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "3I1A9xAI8S", "title": "Dr ChatGPT tell me what I want to hear: How different prompts impact health answer correctness", "track": "main", "status": "Long Main", "tldr": "", "abstract": "This paper investigates the significant impact different prompts have on the behaviour of ChatGPT when used for health information seeking. As people more and more depend on generative large language models (LLMs) like ChatGPT, it is critical to understand model behaviour under different conditions, especially for domains where incorrect answers can have serious consequences such as health. Using the TREC Misinformation dataset, we empirically evaluate ChatGPT to show not just its effectiveness but reveal that knowledge passed in the prompt can bias the model to the detriment of answer correctness. We show this occurs both for retrieve-then-generate pipelines and based on how a user phrases their question as well as the question type. This work has important implications for the development of more robust and transparent question-answering systems based on generative large language models. Prompts, raw result files and manual analysis are made publicly available at \\url{https://github.com/ielab/drchatgpt-health_prompting}.", "keywords": "ChatGPT;LLM;Health Misinformation;Prompt Knowledge;Consumer Health", "primary_area": "", "supplementary_material": "", "author": "Bevan Koopman;Guido Zuccon", "authorids": "~Bevan_Koopman1;~Guido_Zuccon1", "gender": ";", "homepage": "http://koopman.id.au;http://ielab.io/people/guido-zuccon.html", "dblp": "96/9899;22/6562", "google_scholar": ";aEVHhC8AAAAJ", "or_profile": "~Bevan_Koopman1;~Guido_Zuccon1", "aff": "University of Queensland;University of Queensland", "aff_domain": "uq.edu.au;uq.edu.au", "position": "Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nkoopman2023dr,\ntitle={Dr Chat{GPT} tell me what I want to hear: How different prompts impact health answer correctness},\nauthor={Bevan Koopman and Guido Zuccon},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3I1A9xAI8S}\n}", "github": "", "project": "", "reviewers": "96ee;XcW1;o8M9", "site": "https://openreview.net/forum?id=3I1A9xAI8S", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;5", "excitement": "4;2;4", "reproducibility": "5;4;5", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.666666666666667, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-0271-5563", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "University of Queensland", "aff_unique_dep": "", "aff_unique_url": "https://www.uq.edu.au", "aff_unique_abbr": "UQ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Australia" }, { "id": "3JBKnkUACW", "title": "Sub-network Discovery and Soft-masking for Continual Learning of Mixed Tasks", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Continual learning (CL) has two main objectives: preventing catastrophic forgetting (CF) and encouraging knowledge transfer (KT). The existing literature mainly focused on overcoming CF. Some work has also been done on KT when the tasks are similar. To our knowledge, only one method has been proposed to learn a sequence of mixed tasks. However, these techniques still suffer from CF and/or limited KT. This paper proposes a new CL method to achieve both. It overcomes CF by isolating the knowledge of each task via discovering a sub-network for it. A soft-masking mechanism is also proposed to preserve the previous knowledge and to enable the new task to leverage the past knowledge to achieve KT. Experiments using classification, generation, information extraction, and their mixture (i.e., heterogeneous tasks) show that the proposed method consistently outperforms strong baselines.", "keywords": "Continual Learning;Heterogeneous Tasks;Sub-network Discovery", "primary_area": "", "supplementary_material": "", "author": "Zixuan Ke;Bing Liu;Wenhan Xiong;Asli Celikyilmaz;Haoran Li", "authorids": "~Zixuan_Ke1;~Bing_Liu1;~Wenhan_Xiong1;~Asli_Celikyilmaz1;~Haoran_Li8", "gender": "M;M;M;F;F", "homepage": "https://vincent950129.github.io/;https://www.cs.uic.edu/~liub/;https://xwhan.github.io;https://asli.us;", "dblp": "196/3817;l/BingLiu1.html;203/8542;15/3724;50/10038", "google_scholar": "SZ4sFNEAAAAJ;Kt1bjZoAAAAJ;;https://scholar.google.com/citations?hl=en;l7xXEU0AAAAJ", "or_profile": "~Zixuan_Ke1;~Bing_Liu1;~Wenhan_Xiong1;~Asli_Celikyilmaz1;~Haoran_Li8", "aff": "University of Illinois, Chicago;University of Illinois at Chicago;Meta Facebook;FAIR ;Facebook AI", "aff_domain": "uic.edu;uic.edu;fb.com;meta.com;facebook.com", "position": "PhD student;Full Professor;Researcher;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nke2023subnetwork,\ntitle={Sub-network Discovery and Soft-masking for Continual Learning of Mixed Tasks},\nauthor={Zixuan Ke and Bing Liu and Wenhan Xiong and Asli Celikyilmaz and Haoran Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3JBKnkUACW}\n}", "github": "", "project": "", "reviewers": "jmje;Naqz;dnuC", "site": "https://openreview.net/forum?id=3JBKnkUACW", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;2", "excitement": "2;3;4", "reproducibility": "4;3;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;aslicelikyilmaz/;", "aff_unique_index": "0;0;1;1;1", "aff_unique_norm": "University of Illinois at Chicago;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.uic.edu;https://meta.com", "aff_unique_abbr": "UIC;Meta", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Chicago;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "3JP1Jsng4G", "title": "mReFinED: An Efficient End-to-End Multilingual Entity Linking System", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "End-to-end multilingual entity linking (MEL) is concerned with identifying multilingual entity mentions and their corresponding entity IDs in a knowledge base. Existing works assumed that entity mentions were given and skipped the entity mention detection step due to a lack of high-quality multilingual training corpora. To overcome this limitation, we propose mReFinED, the first end-to-end multilingual entity linking. Additionally, we propose a bootstrapping mention detection framework that enhances the quality of training corpora. Our experimental results demonstrated that mReFinED outperformed the best existing work in the end-to-end MEL task while being 44 times faster.", "keywords": "entity linking;multilingual;end-to-end", "primary_area": "", "supplementary_material": "", "author": "Peerat Limkonchotiwat;Weiwei Cheng;Christos Christodoulopoulos;Amir Saffari;Jens Lehmann", "authorids": "~Peerat_Limkonchotiwat1;~Weiwei_Cheng4;~Christos_Christodoulopoulos1;~Amir_Saffari1;~Jens_Lehmann3", "gender": "M;M;M;M;M", "homepage": "https://mrpeerat.github.io/;;http://christos-c.com/;http://www.ymer.org/amir/;http://jens-lehmann.org", "dblp": "278/8244;;82/1906;71/310;71/4882.html", "google_scholar": "T-rvPZ4AAAAJ;;oZORQtwAAAAJ;https://scholar.google.co.uk/citations?user=QJX4mnQAAAAJ;https://scholar.google.de/citations?user=sEaQ5rgAAAAJ", "or_profile": "~Peerat_Limkonchotiwat1;~Weiwei_Cheng4;~Christos_Christodoulopoulos1;~Amir_Saffari1;~Jens_Lehmann3", "aff": "Amazon;Amazon;Amazon;Amazon;Fraunhofer IAIS", "aff_domain": "amazon.co.uk;amazon.com;amazon.co.uk;amazon.com;iais.fraunhofer.de", "position": "Intern;Researcher;Researcher;Principal Researcher;Lead Scientist", "bibtex": "@inproceedings{\nlimkonchotiwat2023mrefined,\ntitle={mReFin{ED}: An Efficient End-to-End Multilingual Entity Linking System},\nauthor={Peerat Limkonchotiwat and Weiwei Cheng and Christos Christodoulopoulos and Amir Saffari and Jens Lehmann},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3JP1Jsng4G}\n}", "github": "", "project": "", "reviewers": "9qjt;4Pgx;N6KL", "site": "https://openreview.net/forum?id=3JP1Jsng4G", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "4;3;3", "reproducibility": "4;4;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-7708-0051;;0000-0001-9108-4278", "linkedin": "peerat-limkonchotiwat/;;christos-christodoulopoulos-376b9831/;amirsaffari/;jenslehmann82/", "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Amazon;Fraunhofer Institute for Applied Information Technology", "aff_unique_dep": "Amazon.com, Inc.;", "aff_unique_url": "https://www.amazon.com;https://www.iais.fraunhofer.de/", "aff_unique_abbr": "Amazon;Fraunhofer IAIS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United States;Germany" }, { "id": "3LIUMrCKrv", "title": "It Ain't Over: A Multi-aspect Diverse Math Word Problem Dataset", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The math word problem (MWP) is a complex task that requires natural language understanding and logical reasoning to extract key knowledge from natural language narratives. Previous studies have provided various MWP datasets but lack diversity in problem types, lexical usage patterns, languages, and annotations for intermediate solutions. To address these limitations, we introduce a new MWP dataset, named DMath (Diverse Math Word Problems), offering a wide range of diversity in problem types, lexical usage patterns, languages, and intermediate solutions. The problems are available in English and Korean and include an expression tree and Python code as intermediate solutions. Through extensive experiments, we demonstrate that the DMath dataset provides a new opportunity to evaluate the capability of large language models, i.e., GPT-4 only achieves about 75\\% accuracy on the DMath dataset.", "keywords": "Math Word Problem;Arithmetic Reasonong;Natural Language Processing;Dataset;Large Language Model", "primary_area": "", "supplementary_material": "", "author": "Jiwoo Kim;Youngbin Kim;Ilwoong Baek;JinYeong Bak;Jongwuk Lee", "authorids": "~Jiwoo_Kim2;~Youngbin_Kim2;~Ilwoong_Baek1;~JinYeong_Bak2;~Jongwuk_Lee1", "gender": ";M;M;M;M", "homepage": "https://diallab.github.io/;https://dial.skku.edu/;https://dial.skku.edu/members/ilwoong_baek;https://nosyu.kr;", "dblp": ";;362/7832;22/11519;04/3445", "google_scholar": ";;;https://scholar.google.co.kr/citations?user=oYK9Z_IAAAAJ;", "or_profile": "~Jiwoo_Kim2;~Youngbin_Kim2;~Ilwoong_Baek1;~JinYeong_Bak2;~Jongwuk_Lee1", "aff": "SungKyunKwan University College of Computing and Informatics;Sung Kyun Kwan University;Sung Kyun Kwan University;Sungkyunkwan University;Sungkyunkwan University", "aff_domain": "cs.skku.edu;skku.edu;skku.edu;skku.edu;skku.edu", "position": "MS student;Undergrad student;Undergrad student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nkim2023it,\ntitle={It Ain't Over: A Multi-aspect Diverse Math Word Problem Dataset},\nauthor={Jiwoo Kim and Youngbin Kim and Ilwoong Baek and JinYeong Bak and Jongwuk Lee},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3LIUMrCKrv}\n}", "github": "", "project": "", "reviewers": "uqoi;jf9x;ZcUJ", "site": "https://openreview.net/forum?id=3LIUMrCKrv", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;3;4", "reproducibility": "3;3;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-3212-5241;", "linkedin": ";;;jybak/;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Sungkyunkwan University", "aff_unique_dep": "College of Computing and Informatics", "aff_unique_url": "https://www.skku.edu", "aff_unique_abbr": "SKKU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "3LdaPmAnji", "title": "EDeR: Towards Understanding Dependency Relations Between Events", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Relation extraction is a crucial task in natural language processing (NLP) and information retrieval (IR). Previous work on event relation extraction mainly focuses on hierarchical, temporal and causal relations. Such relationships consider two events to be independent in terms of syntax and semantics, but they fail to recognize the interdependence between events. To bridge this gap, we introduce a human-annotated Event Dependency Relation dataset (EDeR). The annotation is done on a sample of documents from the OntoNotes dataset, which has the additional benefit that it integrates with existing, orthogonal, annotations of this dataset. We investigate baseline approaches for EDeR's event dependency relation prediction. We show that recognizing such event dependency relations can further benefit critical NLP tasks, including semantic role labelling and co-reference resolution.", "keywords": "dataset;event dependency relation", "primary_area": "", "supplementary_material": "", "author": "Ruiqi Li;Patrik Haslum;Leyang Cui", "authorids": "~Ruiqi_Li5;~Patrik_Haslum1;~Leyang_Cui1", "gender": "F;M;M", "homepage": ";https://cecs.anu.edu.au/people/patrik-haslum;https://github.com/Nealcly", "dblp": ";39/6592;247/6181", "google_scholar": "Af1b3n8AAAAJ;https://scholar.google.com.tw/citations?user=XqbCCJIAAAAJ;6YVwZgkAAAAJ", "or_profile": "~Ruiqi_Li5;~Patrik_Haslum1;~Leyang_Cui1", "aff": "Australian National University;Australian National University;Tencent AI Lab", "aff_domain": "anu.edu.au;anu.edu.au;tencent.com", "position": "PhD student;Associate Professor;Researcher", "bibtex": "@inproceedings{\nli2023eder,\ntitle={{ED}eR: Towards Understanding Dependency Relations Between Events},\nauthor={Ruiqi Li and Patrik Haslum and Leyang Cui},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3LdaPmAnji}\n}", "github": "", "project": "", "reviewers": "R3vw;9FEu;HYy2", "site": "https://openreview.net/forum?id=3LdaPmAnji", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "4;3;4", "reproducibility": "4;5;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "richie-li-18a990117/;;", "aff_unique_index": "0;0;1", "aff_unique_norm": "Australian National University;Tencent", "aff_unique_dep": ";Tencent AI Lab", "aff_unique_url": "https://www.anu.edu.au;https://ai.tencent.com", "aff_unique_abbr": "ANU;Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Australia;China" }, { "id": "3MEV3aIDDq", "title": "Can Pre-trained Vision and Language Models Answer Visual Information-Seeking Questions?", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Pre-trained vision and language\nmodels have demonstrated state-of-the-art capabilities over existing tasks involving images and texts, including visual question answering. However, it remains unclear whether these models possess the capability to answer questions that are not only querying visual content but knowledge-intensive and information-seeking. In this study, we introduce InfoSeek, a visual question answering dataset tailored for information-seeking questions that cannot be answered with only common sense knowledge. \nUsing InfoSeek, we analyze various pre-trained visual question answering models and gain insights into their characteristics. Our findings reveal that state-of-the-art pre-trained multi-modal models (e.g., PaLI-X, BLIP2, InstructBLIP) face challenges in answering visual information-seeking questions, but fine-tuning on the InfoSeek dataset elicits models to use fine-grained knowledge that was learned during pre-training.\nFurthermore, we show that accurate visual entity recognition can be used to improve performance on InfoSeek by retrieving relevant documents, showing a significant space for improvement.", "keywords": "visual question answering;vision and language;pre-trained multimodal model", "primary_area": "", "supplementary_material": "", "author": "Yang Chen;Hexiang Hu;Yi Luan;Haitian Sun;Soravit Changpinyo;Alan Ritter;Ming-Wei Chang", "authorids": "~Yang_Chen10;~Hexiang_Hu1;~Yi_Luan1;~Haitian_Sun2;~Soravit_Changpinyo1;~Alan_Ritter1;~Ming-Wei_Chang3", "gender": ";;F;M;M;M;", "homepage": "https://edchengg.github.io/;;;;https://schangpi.github.io/;http://aritter.github.io/;", "dblp": "48/4792-13;;125/7491;185/6000;139/1319;47/3133;", "google_scholar": "o-oBMWEAAAAJ;;0i5Ys-4AAAAJ;o7-PJu8AAAAJ;2TWx9x0AAAAJ;https://scholar.google.com/citations?hl=en;", "or_profile": "~Yang_Chen10;~Hexiang_Hu1;~Yi_Luan1;~Haitian_Sun2;~Soravit_Changpinyo1;~Alan_Ritter1;~Ming-Wei_Chang3", "aff": "Georgia Institute of Technology;;Google;School of Computer Science, Carnegie Mellon University;Google;Georgia Institute of Technology;", "aff_domain": "gatech.edu;;google.com;cs.cmu.edu;google.com;gatech.edu;", "position": "PhD student;;Research Scientist;PhD student;Researcher;Associate Professor;", "bibtex": "@inproceedings{\nchen2023can,\ntitle={Can Pre-trained Vision and Language Models Answer Visual Information-Seeking Questions?},\nauthor={Yang Chen and Hexiang Hu and Yi Luan and Haitian Sun and Soravit Changpinyo and Alan Ritter and Ming-Wei Chang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3MEV3aIDDq}\n}", "github": "", "project": "", "reviewers": "Vbdw;fHav;Kp26", "site": "https://openreview.net/forum?id=3MEV3aIDDq", "pdf_size": 0, "rating": "5;5;5", "confidence": "2;3;5", "excitement": "3;3;3", "reproducibility": "2;3;3", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-4013-1190;;", "linkedin": ";;;;soravit-changpinyo-b6a35944;;", "aff_unique_index": "0;1;2;1;0", "aff_unique_norm": "Georgia Institute of Technology;Google;Carnegie Mellon University", "aff_unique_dep": ";Google;School of Computer Science", "aff_unique_url": "https://www.gatech.edu;https://www.google.com;https://www.cmu.edu", "aff_unique_abbr": "Georgia Tech;Google;CMU", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Mountain View;Pittsburgh", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "3Nq9KRcvx5", "title": "DiNeR: A Large Realistic Dataset for Evaluating Compositional Generalization", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Most of the existing compositional generalization datasets are synthetically-generated, resulting in a lack of natural language variation. While there have been recent attempts to introduce non-synthetic datasets for compositional generalization, they suffer from either limited data scale or a lack of diversity in the forms of combinations. To better investigate compositional generalization with more linguistic phenomena and compositional diversity, we propose the DIsh NamE Recognition (DiNeR) task and create a large realistic Chinese dataset. Given a recipe instruction, models are required to recognize the dish name composed of diverse combinations of food, actions, and flavors. Our dataset consists of 3,811 dishes and 228,114 recipes, and involves plenty of linguistic phenomena such as anaphora, omission and ambiguity. We provide two strong baselines based on T5 and large language models (LLMs). This work contributes a challenging task, baseline methods to tackle the task, and insights into compositional generalization in the context of dish name recognition.", "keywords": "compositional generalization;dish name recognition;large realistic dataset;language model evaluation", "primary_area": "", "supplementary_material": "", "author": "Chengang Hu;Xiao Liu;Yansong Feng", "authorids": "~Chengang_Hu1;~Xiao_Liu19;~Yansong_Feng1", "gender": "F;M;M", "homepage": "https://xxxiaol.github.io/;https://yansongfeng.github.io/;", "dblp": "82/1364-32;25/2643-2.html;", "google_scholar": "c3bdW2IAAAAJ;https://scholar.google.com.tw/citations?user=67qAw_wAAAAJ;zoMklrcAAAAJ", "or_profile": "~Xiao_Liu19;~Yansong_Feng1;~Hu_ChenGang1", "aff": "University of California, Los Angeles;Peking University;Peking University", "aff_domain": "ucla.edu;pku.edu.cn;pku.edu.cn", "position": "Researcher;Associate Professor;MS student", "bibtex": "@inproceedings{\nhu2023diner,\ntitle={DiNeR: A Large Realistic Dataset for Evaluating Compositional Generalization},\nauthor={Chengang Hu and Xiao Liu and Yansong Feng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3Nq9KRcvx5}\n}", "github": "", "project": "", "reviewers": "A6aj;m8Dw;DC41", "site": "https://openreview.net/forum?id=3Nq9KRcvx5", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;4;3", "reproducibility": "3;4;3", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;1;1", "aff_unique_norm": "University of California, Los Angeles;Peking University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucla.edu;http://www.pku.edu.cn", "aff_unique_abbr": "UCLA;Peking U", "aff_campus_unique_index": "0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;China" }, { "id": "3OvLxe9n9S", "title": "Dialogue Act-Aided Backchannel Prediction Using Multi-Task Learning", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Produced in the form of small injections such as \"Yeah!\" or \"Uh-Huh\" by listeners in a conversation, supportive verbal feedback (i.e., backchanneling) is essential for natural dialogue. Highlighting its tight relation to speaker intent and utterance type, we propose a multi-task learning approach that learns textual representations for the task of backchannel prediction in tandem with dialogue act classification. We demonstrate the effectiveness of our approach by improving the prediction of specific backchannels like \"Yeah\" or \"Really?\" by up to 2.0\\% in F1. Additionally, whereas previous models relied on well-established methods to extract audio features, we further pre-train the audio encoder in a self-supervised fashion using voice activity projection. This leads to additional gains of 1.4\\% in weighted F1.", "keywords": "backchannel prediction;multi-task learning;dialogue act;pre-trained audio encoder;voice activity projection", "primary_area": "", "supplementary_material": "", "author": "Wencke Liermann;Yo-Han Park;Yong-Seok Choi;Kong Joo Lee", "authorids": "~Wencke_Liermann1;~Yo-Han_Park1;~Yong-Seok_Choi1;~Kong_Joo_Lee1", "gender": "F;M;;F", "homepage": ";;https://github.com/yseokchoi;", "dblp": "313/1305;;;", "google_scholar": "IMwPVowAAAAJ;;;RfeStkAAAAAJ", "or_profile": "~Wencke_Liermann1;~Yo-Han_Park1;~Yong-Seok_Choi1;~Kong_Joo_Lee1", "aff": "Chungnam National University;Chungnam National University;Chungnam National University;Chungnam National University", "aff_domain": "cnu.ac.kr;cnu.ac.kr;cnu.ac.kr;cnu.ac.kr", "position": "MS student;MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nliermann2023dialogue,\ntitle={Dialogue Act-Aided Backchannel Prediction Using Multi-Task Learning},\nauthor={Wencke Liermann and Yo-Han Park and Yong-Seok Choi and Kong Joo Lee},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3OvLxe9n9S}\n}", "github": "", "project": "", "reviewers": "ww4h;fpPn;kE2m", "site": "https://openreview.net/forum?id=3OvLxe9n9S", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;2;4", "excitement": "3;3;3", "reproducibility": "4;3;4", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-5023-5604;0000-0002-7889-8004;0000-0003-0025-4230", "linkedin": "wencke-lm/;;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Chungnam National University", "aff_unique_dep": "", "aff_unique_url": "http://www.cnu.ac.kr", "aff_unique_abbr": "CNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "3Q6LON8y2I", "title": "Is ChatGPT Good at Search? Investigating Large Language Models as Re-Ranking Agents", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large Language Models (LLMs) have demonstrated remarkable zero-shot generalization across various language-related tasks, including search engines. \nHowever, existing work utilizes the generative ability of LLMs for Information Retrieval (IR) rather than direct passage ranking.\nThe discrepancy between the pre-training objectives of LLMs and the ranking objective poses another challenge.\nIn this paper, we first investigate generative LLMs such as ChatGPT and GPT-4 for relevance ranking in IR. Surprisingly, our experiments reveal that properly instructed LLMs can deliver competitive, even superior results to state-of-the-art supervised methods on popular IR benchmarks. \nFurthermore, to address concerns about data contamination of LLMs, we collect a new test set called NovelEval, based on the latest knowledge and aiming to verify the model's ability to rank unknown knowledge. \nFinally, to improve efficiency in real-world applications, we delve into the potential for distilling the ranking capabilities of ChatGPT into small specialized models using a permutation distillation scheme.\nOur evaluation results turn out that a distilled 440M model outperforms a 3B supervised model on the BEIR benchmark.\nThe code to reproduce our results is available at www.github.com/sunnweiwei/RankGPT.", "keywords": "Passage Re-ranking;Information Retrieval;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "Weiwei Sun;Lingyong Yan;Xinyu Ma;Shuaiqiang Wang;Pengjie Ren;Zhumin Chen;Dawei Yin;Zhaochun Ren", "authorids": "~Weiwei_Sun9;~Lingyong_Yan1;~Xinyu_Ma4;~Shuaiqiang_Wang2;~Pengjie_Ren1;~Zhumin_Chen1;~Dawei_Yin1;~Zhaochun_Ren1", "gender": ";M;M;M;;;M;M", "homepage": "https://sunnweiwei.github.io/;https://yanlingyong.net;https://albert-ma.github.io/;http://wangshuaiqiang.net/;;https://ir.sdu.edu.cn/~zhuminchen/~zhuminchen_en.htm;https://www.yindawei.com/;https://renzhaochun.github.io/", "dblp": ";254/8048;;16/1524;;88/1081;;58/10440", "google_scholar": "hdUZbxgAAAAJ;NksMJFcAAAAJ;DXYzAIkAAAAJ;https://scholar.google.com.hk/citations?user=8SbYYcIAAAAJ;;;GuQ9bpAAAAAJ;fPcIPt0AAAAJ", "or_profile": "~Weiwei_Sun9;~Lingyong_Yan1;~Xinyu_Ma4;~Shuaiqiang_Wang2;~Pengjie_Ren1;~Zhumin_Chen1;~Dawei_Yin1;~Zhaochun_Ren1", "aff": "Shandong University;Baidu Inc.;Institute of Computing Technology, Chinese Academy of Sciences, ;Baidu Inc.;;Shandong University;Baidu;Shandong University", "aff_domain": "sdu.edu.cn;baidu.com;ict.ac.cn;baidu.com;;sdu.edu.cn;baidu.com;sdu.edu.cn", "position": "MS student;Search Scientist;PhD student;Principal Researcher;;Full Professor;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nsun2023is,\ntitle={Is Chat{GPT} Good at Search? Investigating Large Language Models as Re-Ranking Agents},\nauthor={Weiwei Sun and Lingyong Yan and Xinyu Ma and Shuaiqiang Wang and Pengjie Ren and Zhumin Chen and Dawei Yin and Zhaochun Ren},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3Q6LON8y2I}\n}", "github": "", "project": "", "reviewers": "KPjQ;V7x6;Xw3x", "site": "https://openreview.net/forum?id=3Q6LON8y2I", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;4", "excitement": "4;3;4", "reproducibility": "4;4;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-5511-9370;0000-0002-9212-1947;;0000-0003-4592-4074;0000-0002-0684-6205;0000-0002-9076-6565", "linkedin": ";;;;;;dwyin/;zhaochun-ren-460491296/?locale=nl_NL", "aff_unique_index": "0;1;2;1;0;1;0", "aff_unique_norm": "Shandong University;Baidu;Chinese Academy of Sciences", "aff_unique_dep": ";Baidu Inc.;Institute of Computing Technology", "aff_unique_url": "http://www.sdu.edu.cn;https://www.baidu.com;http://www.ict.ac.cn", "aff_unique_abbr": "SDU;Baidu;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "3QibSyz6Qt", "title": "NarrativeXL: a Large-scale Dataset for Long-Term Memory Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We propose a new large-scale (nearly a million questions) ultra-long-context (more than 50,000 words average document length) reading comprehension dataset. Using GPT 3.5, we summarized each scene in 1,500 hand-curated fiction books from Project Gutenberg, which resulted in approximately 150 scene-level summaries per book. After that, we created a number of reading comprehension questions based on these summaries, including three types of multiple-choice scene recognition questions, as well as free-form narrative reconstruction questions. With 990,595 total questions, our dataset is an order of magnitude larger than the closest alternatives. Crucially, most questions have a known ``retention demand'', indicating how long-term of a memory is needed to answer them, which should aid long-term memory performance evaluation. We validate our data in four small-scale experiments: one with human labelers, and three with existing language models. We show that our questions 1) adequately represent the source material 2) can be used to diagnose a model's memory capacity 3) are not trivial for modern language models even when the memory demand does not exceed those models' context lengths. Lastly, we provide our code which can be used to further expand the dataset with minimal human labor.", "keywords": "NLP;long-term memory;long term memory;reading comprehension", "primary_area": "", "supplementary_material": "", "author": "Arsenii Kirillovich Moskvichev;Ky-Vinh Mai", "authorids": "~Arsenii_Kirillovich_Moskvichev1;~Ky-Vinh_Mai1", "gender": "M;M", "homepage": "http://r-seny.com;", "dblp": "249/7049;", "google_scholar": "OuglBUgAAAAJ;", "or_profile": "~Arsenii_Kirillovich_Moskvichev1;~Ky-Vinh_Mai1", "aff": "Santa Fe Institute;University of California, Irvine", "aff_domain": "santafe.edu;uci.edu", "position": "Postdoc;Undergrad student", "bibtex": "@inproceedings{\nmoskvichev2023narrativexl,\ntitle={Narrative{XL}: a Large-scale Dataset for Long-Term Memory Models},\nauthor={Arsenii Kirillovich Moskvichev and Ky-Vinh Mai},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3QibSyz6Qt}\n}", "github": "", "project": "", "reviewers": "ms7Q;wT87;3cPB;x1G6", "site": "https://openreview.net/forum?id=3QibSyz6Qt", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "5;3;3;3", "excitement": "3;4;3;3", "reproducibility": "5;4;3;3", "correctness": "3;4;3;3", "rating_avg": 3.0, "confidence_avg": 3.5, "excitement_avg": 3.25, "reproducibility_avg": 3.75, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": "arseny-moskvichev-601a809a;https://www.linkedin.com/", "aff_unique_index": "0;1", "aff_unique_norm": "Santa Fe Institute;University of California, Irvine", "aff_unique_dep": ";", "aff_unique_url": "https://www.santafe.edu;https://www.uci.edu", "aff_unique_abbr": "SFI;UCI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Irvine", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "3QzTzulZwY", "title": "IMTLab: An Open-Source Platform for Building, Evaluating, and Diagnosing Interactive Machine Translation Systems", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We present IMTLab, an open-source end-to-end interactive machine translation (IMT) system platform that enables researchers to quickly build IMT systems with state-of-the-art models, perform an end-to-end evaluation, and diagnose the weakness of systems.\nIMTLab treats the whole interactive translation process as a task-oriented dialogue with a human-in-the-loop setting, in which human interventions can be explicitly incorporated to produce high-quality, error-free translations.\nTo this end, a general communication interface is designed to support the flexible IMT architectures and user policies.\nBased on the proposed design, we construct a simulated and real interactive environment to achieve end-to-end evaluation and leverage the framework to systematically evaluate previous IMT systems.\nOur simulated and manual experiments show that the prefix-constrained decoding approach still gains the lowest editing cost in the end-to-end evaluation, while BiTIIMT achieves comparable editing cost with a better interactive experience.", "keywords": "Interactive Machine Translation; Neural Machine Translation; Lexical-constrained Translation", "primary_area": "", "supplementary_material": "", "author": "Xu Huang;Zhirui Zhang;Ruize Gao;Yichao Du;Lemao Liu;Guoping Huang;Shuming Shi;Jiajun Chen;Shujian Huang", "authorids": "~Xu_Huang1;~Zhirui_Zhang1;~Ruize_Gao2;~Yichao_Du1;~Lemao_Liu3;~Guoping_Huang2;~Shuming_Shi1;~Jiajun_Chen1;~Shujian_Huang1", "gender": "M;M;M;M;M;M;M;M;M", "homepage": ";;https://ruizgao.github.io/;;;https://cs.nju.edu.cn/chenjiajun/index_en.htm;http://nlp.nju.edu.cn/huangsj/;https://lemaoliu.github.io/homepage/;", "dblp": ";202/1838;;165/3047;s/ShumingShi;;57/8451;41/10887.html;271/6727", "google_scholar": "ueD6CkkAAAAJ;C8Ylo7sAAAAJ;;xSkkA7UAAAAJ;Lg31AKMAAAAJ;https://scholar.google.com.tw/citations?user=WIF7VaoAAAAJ;HF3-E9kAAAAJ;;UC4wSP0AAAAJ", "or_profile": "~Xu_Huang1;~Zhirui_Zhang1;~Ruize_Gao2;~Guoping_Huang2;~Shuming_Shi1;~Jiajun_Chen1;~Shujian_Huang1;~lemao_liu1;~Du_Yichao1", "aff": "Nanjing University;Tencent AI Lab;Shanghai Jiaotong University;Tencent AI Lab;Tencent AI Lab;Nanjing University;Nanjing University;Tencent;University of Science and Technology of China", "aff_domain": "nju.edu.cn;tencent.com;sjtu.edu.cn;tencent.com;tencent.com;nju.edu.cn;nju.edu.cn;tencent.com;ustc.edu.cn", "position": "MS student;Senior Researcher;MS student;Researcher;Principal Researcher;Full Professor;Associate Professor;Researcher;PhD student", "bibtex": "@inproceedings{\nhuang2023imtlab,\ntitle={{IMTL}ab: An Open-Source Platform for Building, Evaluating, and Diagnosing Interactive Machine Translation Systems},\nauthor={Xu Huang and Zhirui Zhang and Ruize Gao and Yichao Du and Lemao Liu and Guoping Huang and Shuming Shi and Jiajun Chen and Shujian Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3QzTzulZwY}\n}", "github": "", "project": "", "reviewers": "UJug;HeBz;nTMj", "site": "https://openreview.net/forum?id=3QzTzulZwY", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;5;2", "excitement": "4;4;4", "reproducibility": "3;4;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;my-orcid?orcid=0000-0003-4500-2459;;;;;;", "linkedin": ";;;guoping-huang-473708b9/;;;;;", "aff_unique_index": "0;1;2;1;1;0;0;1;3", "aff_unique_norm": "Nanjing University;Tencent;Shanghai Jiao Tong University;University of Science and Technology of China", "aff_unique_dep": ";Tencent AI Lab;;", "aff_unique_url": "https://www.nju.edu.cn;https://ai.tencent.com;https://www.sjtu.edu.cn;http://www.ustc.edu.cn", "aff_unique_abbr": "Nanjing U;Tencent AI Lab;SJTU;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "3RS2T9EPjI", "title": "In-Image Neural Machine Translation with Segmented Pixel Sequence-to-Sequence Model", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In-Image Machine Translation (IIMT) aims to convert images containing texts from one language to another. Traditional approaches for this task are cascade methods, which utilize optical character recognition (OCR) followed by neural machine translation (NMT) and text rendering. However, the cascade methods suffer from compounding errors of OCR and NMT, leading to a decrease in translation quality. In this paper, we propose an end-to-end model instead of the OCR, NMT and text rendering pipeline. Our neural architecture adopts encoder-decoder paradigm with segmented pixel sequences as inputs and outputs. Through end-to-end training, our model yields improvements across various dimensions, (i) it achieves higher translation quality by avoiding error propagation, (ii) it demonstrates robustness for out domain data, and (iii) it displays insensitivity to incomplete words. To validate the effectiveness of our method and support for future research, we construct our dataset containing 4M pairs of De-En images and train our end-to-end model. The experimental results show that our approach outperforms both cascade method and current end-to-end model.", "keywords": "In-Image Machine Translation;Neural Machine Translation", "primary_area": "", "supplementary_material": "", "author": "Yanzhi Tian;Xiang Li;Zeming Liu;Yuhang Guo;Bin Wang", "authorids": "~Yanzhi_Tian1;~Xiang_Li30;~Zeming_Liu1;~Yuhang_Guo1;~Bin_Wang13", "gender": "M;M;;;M", "homepage": "https://yanzhitian.github.io/;;;;", "dblp": "351/5744;;;74/10083-1;13/1898-4", "google_scholar": ";DMfYmIEAAAAJ;;;tDajnHEAAAAJ", "or_profile": "~Yanzhi_Tian1;~Xiang_Li30;~Zeming_Liu1;~Yuhang_Guo1;~Bin_Wang13", "aff": "Beijing Institute of Technology;Xiaomi AI Lab;;Beijing Institute of Technology;AI Lab, Xiaomi Inc.", "aff_domain": "bit.edu.cn;xiaomi.com;;bit.edu.cn;xiaomi.com", "position": "Undergrad student;Researcher;;Lecturer;Principal Researcher", "bibtex": "@inproceedings{\ntian2023inimage,\ntitle={In-Image Neural Machine Translation with Segmented Pixel Sequence-to-Sequence Model},\nauthor={Yanzhi Tian and Xiang Li and Zeming Liu and Yuhang Guo and Bin Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3RS2T9EPjI}\n}", "github": "", "project": "", "reviewers": "9Cpu;6iiA;ApGL", "site": "https://openreview.net/forum?id=3RS2T9EPjI", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "4;3;2", "reproducibility": "4;3;4", "correctness": "4;4;2", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;;", "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Beijing Institute of Technology;Xiaomi Corporation;Xiaomi Inc.", "aff_unique_dep": ";Xiaomi AI Lab;AI Lab", "aff_unique_url": "http://www.bit.edu.cn/;https://www.xiaomi.com;https://www.xiaomi.com", "aff_unique_abbr": "BIT;Xiaomi;Xiaomi", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "3RTpKMVg0P", "title": "Privacy Implications of Retrieval-Based Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Retrieval-based language models (LMs) have demonstrated improved interpretability, factuality, and adaptability compared to their parametric counterparts by incorporating retrieved text from external datastores. While it is well known that parametric models are prone to leaking private data, it remains unclear how the addition of a retrieval datastore impacts model privacy. In this work, we present the first study of privacy risks in retrieval-based LMs, particularly $k$NN-LMs. Our goal is to explore the optimal design and training procedure in domains where privacy is of concern, aiming to strike a balance between utility and privacy. Crucially, we find that $k$NN-LMs are more susceptible to leaking private information from their private datastore than parametric models. We further explore mitigations of privacy risks: When privacy information is targeted and readily detected in the text, we find that a simple sanitization step would eliminate the risks while decoupling query and key encoders achieves an even better utility-privacy trade-off. Otherwise, we consider strategies of mixing public and private data in both datastore and encoder training. While these methods offer modest improvements, they leave considerable room for future work. Together, our findings provide insights for practitioners to better understand and mitigate privacy risks in retrieval-based LMs.", "keywords": "Privacy;Retrieval-based language models", "primary_area": "", "supplementary_material": "", "author": "Yangsibo Huang;Samyak Gupta;Zexuan Zhong;Kai Li;Danqi Chen", "authorids": "~Yangsibo_Huang2;~Samyak_Gupta1;~Zexuan_Zhong1;~Kai_Li8;~Danqi_Chen1", "gender": "F;M;M;M;F", "homepage": "https://hazelsuko07.github.io/yangsibo/;https://samkg.github.io;https://www.cs.princeton.edu/~zzhong/;https://www.cs.princeton.edu/~li/;https://www.cs.princeton.edu/~danqic/", "dblp": ";305/0404;218/7257;l/KaiLi1.html;87/7949", "google_scholar": "NMPUDa0AAAAJ;;;9MSpWOUAAAAJ;sVR8ktkAAAAJ", "or_profile": "~Yangsibo_Huang2;~Samyak_Gupta1;~Zexuan_Zhong1;~Kai_Li8;~Danqi_Chen1", "aff": "Princeton University;Princeton University;Princeton University;Princeton University;Princeton University", "aff_domain": "princeton.edu;princeton.edu;princeton.edu;princeton.edu;cs.princeton.edu", "position": "PhD student;PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nhuang2023privacy,\ntitle={Privacy Implications of Retrieval-Based Language Models},\nauthor={Yangsibo Huang and Samyak Gupta and Zexuan Zhong and Kai Li and Danqi Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3RTpKMVg0P}\n}", "github": "", "project": "", "reviewers": "KGUM;EAGn;nKXz", "site": "https://openreview.net/forum?id=3RTpKMVg0P", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;1;4", "excitement": "3;4;3", "reproducibility": "3;3;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "3Uu4rZ6hLI", "title": "Unveiling the Essence of Poetry: Introducing a Comprehensive Dataset and Benchmark for Poem Summarization", "track": "main", "status": "Short Main", "tldr": "", "abstract": "While research in natural language processing has progressed significantly in creative language generation, the question of whether language models can interpret the intended meaning of creative language largely remains unanswered. Poetry as a creative art form has existed for generations, and summarization of such content requires deciphering the figurative patterns to find out the actual intent and message of the poet. This task can provide the researchers an opportunity to evaluate the creative language interpretation capacity of the language models. Unlike typical text, summarization of poems is a challenging task as poems carry a deeper meaning, which can be easily lost if only the literal meaning is considered. That being said, we propose a new task in the field of natural language understanding called 'Poem Summarization'. As a starting, we propose the first-ever dataset for this task, named 'PoemSum', consisting of 3011 samples of poetry and its corresponding summarized interpretation in the English language. We have benchmarked the performance of different state-of-the-art summarization models and provided observations on their limitations. The dataset and all relevant code used in this work have been made publicly available.", "keywords": "Poem summarization;Creative language summarization;Creative language interpretation;Natural language understanding;Automatic text summarization;Language models", "primary_area": "", "supplementary_material": "", "author": "Ridwan Mahbub;Ifrad Towhid Khan;Samiha Shafiq Anuva;Md Shihab Shahriar;Md Tahmid Rahman Laskar;Sabbir Ahmed", "authorids": "~Ridwan_Mahbub1;~Ifrad_Towhid_Khan1;~Samiha_Shafiq_Anuva1;~Md_Shihab_Shahriar1;~Md_Tahmid_Rahman_Laskar2;~Sabbir_Ahmed1", "gender": "M;M;F;M;M;M", "homepage": "https://ridwan230.github.io/;;;;https://sites.google.com/view/tahmedge/home;https://ggck43.github.io/", "dblp": "362/7903;;;;250/6292;", "google_scholar": "https://scholar.google.ca/citations?user=LKMc1l8AAAAJ;;udWuH_YAAAAJ;;qpnsWPoAAAAJ;l_Dk4ZoAAAAJ", "or_profile": "~Ridwan_Mahbub1;~Ifrad_Towhid_Khan1;~Samiha_Shafiq_Anuva1;~Md_Shihab_Shahriar1;~Md_Tahmid_Rahman_Laskar2;~Sabbir_Ahmed1", "aff": "Islamic University of Technology;Islamic University of Technology;Islamic University of Technology;Islamic University of Technology;Dialpad Inc. ;Islamic University of Technology", "aff_domain": "iutoic-dhaka.edu;iutoic-dhaka.edu;iutoic-dhaka.edu;iut-dhaka.edu;dialpad.com;iutoic-dhaka.edu", "position": "Undergrad student;Undergrad student;Undergrad student;Lecturer;Applied Scientist;PhD student", "bibtex": "@inproceedings{\nmahbub2023unveiling,\ntitle={Unveiling the Essence of Poetry: Introducing a Comprehensive Dataset and Benchmark for Poem Summarization},\nauthor={Ridwan Mahbub and Ifrad Towhid Khan and Samiha Shafiq Anuva and Md Shihab Shahriar and Md Tahmid Rahman Laskar and Sabbir Ahmed},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3Uu4rZ6hLI}\n}", "github": "", "project": "", "reviewers": "dv38;r8W8;6WjZ", "site": "https://openreview.net/forum?id=3Uu4rZ6hLI", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;4", "excitement": "2;4;3", "reproducibility": "4;4;2", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0007-3644-6230;;;;;0000-0001-5928-4886", "linkedin": "ridwan-mahbub/;ifrad-khan/;samiha-shafiq-anuva-73036820b/;md-shihab-shahriar;tahmedge/;sabbirahmediut/", "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Islamic University of Technology;Dialpad Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.iut-dhaka.edu.bd;https://www.dialpad.com", "aff_unique_abbr": "IUT;Dialpad", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "Bangladesh;United States" }, { "id": "3XDDWCu8CF", "title": "A Simple Baseline for Knowledge-Based Visual Question Answering", "track": "main", "status": "Short Main", "tldr": "", "abstract": "This paper is on the problem of Knowledge-Based Visual Question Answering (KB-VQA).\nRecent works have emphasized the significance of incorporating both explicit (through external databases) and implicit (through LLMs) knowledge to answer questions requiring external knowledge effectively. A common limitation of such approaches is that they consist of relatively complicated pipelines and often heavily rely on accessing GPT-3 API. Our main contribution in this paper is to propose a much simpler and readily reproducible pipeline which, in a nutshell, is based on efficient in-context\nlearning by prompting LLaMA (1 and 2) using question-informative captions as contextual information. Contrary to recent approaches, our\nmethod is training-free, does not require access to external databases or APIs, and yet achieves state-of-the-art accuracy on the OK-VQA and A-OK-VQA datasets. Finally, we perform several ablation studies to understand important aspects of our method. Our code is publicly available at https://github.com/alexandrosXe/ASimple-Baseline-For-Knowledge-Based-VQA", "keywords": "Knowledge-based Visual Question Answering (KB-VQA);Commonsense Reasoning;Few-shot learning", "primary_area": "", "supplementary_material": "", "author": "ALEXANDROS XENOS;Themos Stafylakis;Ioannis Patras;Georgios Tzimiropoulos", "authorids": "~ALEXANDROS_XENOS1;~Themos_Stafylakis1;~Ioannis_Patras2;~Georgios_Tzimiropoulos1", "gender": "M;M;M;M", "homepage": ";;http://www.eecs.qmul.ac.uk/~ioannisp/;https://ytzimiro.github.io/", "dblp": "305/6777;18/1739;18/1556;03/3273", "google_scholar": "ucSeBMgAAAAJ;kdEnoP0AAAAJ;https://scholar.google.com.tw/citations?user=OBYLxRkAAAAJ;https://scholar.google.co.uk/citations?user=D4JkWxf-8fwC", "or_profile": "~ALEXANDROS_XENOS1;~Themos_Stafylakis1;~Ioannis_Patras2;~Georgios_Tzimiropoulos1", "aff": "Queen Mary University of London;Omilia;Queen Mary, University of London;Queen Mary University London", "aff_domain": "qmul.ac.uk;omilia.com;qmul.ac.uk;qmul.ac.uk", "position": "PhD student;Principal Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nxenos2023a,\ntitle={A Simple Baseline for Knowledge-Based Visual Question Answering},\nauthor={ALEXANDROS XENOS and Themos Stafylakis and Ioannis Patras and Georgios Tzimiropoulos},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3XDDWCu8CF}\n}", "github": "", "project": "", "reviewers": "z51R;3YRF;P6LA", "site": "https://openreview.net/forum?id=3XDDWCu8CF", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;2", "excitement": "4;3;3", "reproducibility": "4;4;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-3913-4738;", "linkedin": "alexandros-xenos-51a4311a4/;themos-stafylakis-18a87016/;ioannis-patras-1053767/;", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Queen Mary University of London;Omilia;Queen Mary, University of London", "aff_unique_dep": ";;", "aff_unique_url": "https://www.qmul.ac.uk;;https://www.qmul.ac.uk", "aff_unique_abbr": "QMUL;;QMUL", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "London;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom;" }, { "id": "3aF1Rv3dHG", "title": "One-Model-Connects-All: A Unified Graph Pre-Training Model for Online Community Modeling", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Online community is composed of communities, users, and user-generated textual content, with rich information that can help us solve social problems. Previous research hasn't fully utilized these three components and the relationship among them. What's more, they can't adapt to a wide range of downstream tasks. To solve these problems, we focus on a framework that simultaneously considers communities, users, and texts. And it can easily connect with a variety of downstream tasks related to social media. Specifically, we use a ternary heterogeneous graph to model online communities. Text reconstruction and edge generation are used to learn structural and semantic knowledge among communities, users, and texts. By leveraging this pre-trained model, we achieve promising results across multiple downstream tasks, such as violation detection, sentiment analysis, and community recommendation. Our exploration will improve online community modeling.", "keywords": "community;pre-train;graph", "primary_area": "", "supplementary_material": "", "author": "Ruoxue Ma;Jiarong Xu;Xinnong Zhang;Haozhe Zhang;Zuyu Zhao;Qi Zhang;Xuanjing Huang;zhongyu wei", "authorids": "~Ruoxue_Ma1;~Jiarong_Xu2;~Xinnong_Zhang1;~Haozhe_Zhang2;~Zuyu_Zhao2;~Qi_Zhang8;~Xuanjing_Huang1;~zhongyu_wei1", "gender": "F;F;M;M;M;M;F;M", "homepage": "http://fudan-disc.com/people;https://galina0217.github.io/;https://lishi905.github.io/;https://haozhestat.github.io/;https://ya20586665.icoc.vc/;http://qizhang.info;https://xuanjing-huang.github.io/;http://www.sdspeople.fudan.edu.cn/zywei/", "dblp": ";;362/8488.html;;;52/323-1;05/6735-1;31/10489", "google_scholar": ";;;xv0IjskAAAAJ;;XfqR3yYAAAAJ;RGsMgZA4H78C;AjLDxxgAAAAJ", "or_profile": "~Ruoxue_Ma1;~Jiarong_Xu2;~Xinnong_Zhang1;~Haozhe_Zhang2;~Zuyu_Zhao2;~Qi_Zhang8;~Xuanjing_Huang1;~zhongyu_wei1", "aff": "Fudan University;Fudan University;Wuhan University;Huawei Technologies Ltd.;;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;whu.edu.cn;huawei.com;;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "MS student;Assistant Professor;Undergrad student;Researcher;;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nma2023onemodelconnectsall,\ntitle={One-Model-Connects-All: A Unified Graph Pre-Training Model for Online Community Modeling},\nauthor={Ruoxue Ma and Jiarong Xu and Xinnong Zhang and Haozhe Zhang and Zuyu Zhao and Qi Zhang and Xuanjing Huang and zhongyu wei},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3aF1Rv3dHG}\n}", "github": "", "project": "", "reviewers": "uL1E;equG;2SWw;bxaU", "site": "https://openreview.net/forum?id=3aF1Rv3dHG", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;4;4", "excitement": "3;3;2;3", "reproducibility": "3;2;3;4", "correctness": "3;3;2;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 2.75, "reproducibility_avg": 3.0, "correctness_avg": 2.75, "replies_avg": 14, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-2973-1889;;0000-0002-7771-4808;;;0000-0001-9197-9426;", "linkedin": ";;;;;;;", "aff_unique_index": "0;0;1;2;0;0;0", "aff_unique_norm": "Fudan University;Wuhan University;Huawei", "aff_unique_dep": ";;Huawei Technologies", "aff_unique_url": "https://www.fudan.edu.cn;http://www.whu.edu.cn/;https://www.huawei.com", "aff_unique_abbr": "Fudan;WHU;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "3dNeNpmyiO", "title": "Learning to Describe for Predicting Zero-shot Drug-Drug Interactions", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Adverse drug-drug interactions (DDIs) can compromise the effectiveness of concurrent drug administration, \nposing a significant challenge in healthcare. \nAs the development of new drugs continues, \nthe potential for unknown adverse effects resulting from DDIs becomes a growing concern. \nTraditional computational methods for DDI prediction \nmay fail to capture interactions for new drugs\ndue to the lack of knowledge.\nIn this paper,\nwe introduce a new problem setup as zero-shot DDI prediction\nthat deals with the case of new drugs.\nLeveraging textual information from online databases like DrugBank and PubChem, \nwe propose an innovative approach TextDDI\nwith a language model-based DDI predictor\nand a reinforcement learning~(RL)-based information selector,\nenabling the selection of concise and pertinent text for accurate DDI prediction on new drugs.\nEmpirical results show the benefits of the proposed approach\non several settings including zero-shot and few-shot DDI prediction,\nand the selected texts are semantically relevant. Our code and data are available at https://github.com/zhufq00/DDIs-Prediction.", "keywords": "Reinforcement Learning;Prompt Learning;Drug-Drug Interaction", "primary_area": "", "supplementary_material": "", "author": "Fangqi Zhu;Yongqi Zhang;Lei Chen;Bing Qin;Ruifeng Xu", "authorids": "~Fangqi_Zhu1;~Yongqi_Zhang2;~Lei_Chen7;~Bing_Qin2;~Ruifeng_Xu1", "gender": "M;M;M;;M", "homepage": "https://fangqi-zhu.github.io/;https://yzhangee.github.io/;http://www.cs.ust.hk/~leichen/;http://ir.hit.edu.cn/~qinb;http://faculty.hitsz.edu.cn/xuruifeng", "dblp": "155/6286;;c/LeiChen0002;86/5934.html;93/5407-1", "google_scholar": "https://scholar.google.com.tw/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=nVk-7EAAAAAJ;gtglwgYAAAAJ;LKnCub0AAAAJ;mObXnNIAAAAJ", "or_profile": "~Fangqi_Zhu1;~Yongqi_Zhang2;~Lei_Chen7;~Bing_Qin2;~Ruifeng_Xu1", "aff": "Harbin Institute of Technology;4Paradigm. Inc;Hong Kong University of Science and Technology;Harbin Institute of Technology;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;4paradigm.com;hkust.edu;hit.edu.cn;hit.edu.cn", "position": "MS student;Researcher;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhu2023learning,\ntitle={Learning to Describe for Predicting Zero-shot Drug-Drug Interactions},\nauthor={Fangqi Zhu and Yongqi Zhang and Lei Chen and Bing Qin and Ruifeng Xu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3dNeNpmyiO}\n}", "github": "", "project": "", "reviewers": "mJfs;Na97;yZ3w", "site": "https://openreview.net/forum?id=3dNeNpmyiO", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;4;4", "reproducibility": "4;4;5", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0006-2875-8069;0000-0003-2085-7418;0000-0002-8257-5806;0000-0002-2543-5604;0000-0002-4009-5679", "linkedin": ";;;;", "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Harbin Institute of Technology;4Paradigm;Hong Kong University of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "http://www.hit.edu.cn/;https://www.4paradigm.com/;https://www.ust.hk", "aff_unique_abbr": "HIT;;HKUST", "aff_campus_unique_index": "0;2;0;0", "aff_campus_unique": "Harbin;;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "3e8rcsIO7H", "title": "Dense Retrieval as Indirect Supervision for Large-space Decision Making", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Many discriminative natural language understanding (NLU) tasks have large label spaces. Learning such a process of large-space decision making is particularly challenging due to the lack of training instances per label and the difficulty of selection among many fine-grained labels. Inspired by dense retrieval methods for passage finding in open-domain QA, we propose a reformulation of large-space discriminative NLU tasks as a learning-to-retrieve task, leading to a novel solution named Dense Decision Retrieval (DDR). Instead of predicting fine-grained decisions as logits, DDR adopts a dual-encoder architecture that learns to predict by retrieving from a decision thesaurus. This approach not only leverages rich indirect supervision signals from easy-to-consume learning resources for dense retrieval, it also leads to enhanced prediction generalizability with a semantically meaningful representation of the large decision space. When evaluated on tasks with decision spaces ranging from hundreds to hundred-thousand scales, DDR outperforms strong baselines greatly by 27.54% in P @1 on two extreme multi-label classification tasks, 1.17% in F1 score ultra-fine entity typing, and 1.26% in accuracy on three few-shot intent classification tasks on average.", "keywords": "large decision spaces;dense retrieval;extreme multi-label classification;ultra-fine entity typing;few-shot intent classification", "primary_area": "", "supplementary_material": "", "author": "Nan Xu;Fei Wang;Mingtao Dong;Muhao Chen", "authorids": "~Nan_Xu2;~Fei_Wang12;~Mingtao_Dong1;~Muhao_Chen1", "gender": "F;M;M;M", "homepage": "https://sites.google.com/site/xunannancy;https://feiwang96.github.io/;;https://muhaochen.github.io/", "dblp": ";52/3194-60;;173/2608", "google_scholar": "https://scholar.google.co.uk/citations?hl=en;N1O2KT8AAAAJ;;k79yEZkAAAAJ", "or_profile": "~Nan_Xu2;~Fei_Wang12;~Mingtao_Dong1;~Muhao_Chen1", "aff": "University of Southern California;University of Southern California;University of Southern California;University of Southern California", "aff_domain": "usc.edu;usc.edu;usc.edu;usc.edu", "position": "PhD student;PhD student;Undergrad student;Assistant Research Professor", "bibtex": "@inproceedings{\nxu2023dense,\ntitle={Dense Retrieval as Indirect Supervision for Large-space Decision Making},\nauthor={Nan Xu and Fei Wang and Mingtao Dong and Muhao Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3e8rcsIO7H}\n}", "github": "", "project": "", "reviewers": "FPkP;E5JK;YGUm", "site": "https://openreview.net/forum?id=3e8rcsIO7H", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "4;3;3", "reproducibility": "4;4;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-0118-3147", "linkedin": "https://linkedin.com/in/nan-xu-b52777125;;https://www.linkedin.com/me?trk=p_mwlite_feed_updates-secondary_nav;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "", "aff_unique_url": "https://www.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "3gdG9upo7e", "title": "Generative Table Pre-training Empowers Models for Tabular Prediction", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recently, the topic of table pre-training has attracted considerable research interest. However, how to employ table pre-training to boost the performance of tabular prediction remains an open challenge. In this paper, we propose TapTap, the first attempt that leverages table pre-training to empower models for tabular prediction. After pre-training on a large corpus of real-world tabular data, TapTap can generate high-quality synthetic tables to support various applications on tabular data, including privacy protection, low resource regime, missing value imputation, and imbalanced classification. Extensive experiments on $12$ datasets demonstrate that TapTap outperforms a total of $16$ baselines in different scenarios. Meanwhile, it can be easily combined with various backbone models, including LightGBM, Multilayer Perceptron (MLP) and Transformer. Moreover, with the aid of table pre-training, models trained using synthetic data generated by TapTap can even compete with models using the original dataset on half of the experimental datasets, marking a milestone in the development of synthetic tabular data generation. The code and datasets are available at https://github.com/ZhangTP1996/TapTap.", "keywords": "tabular prediction;generative table pre-training", "primary_area": "", "supplementary_material": "", "author": "Tianping Zhang;Shaowen Wang;Shuicheng YAN;Li Jian;Qian Liu", "authorids": "~Tianping_Zhang1;~Shaowen_Wang3;~Shuicheng_YAN3;~Jian_Li2;~Qian_Liu2", "gender": "M;M;M;M;M", "homepage": "https://yanshuicheng.ai/;http://iiis.tsinghua.edu.cn/~jianli;http://siviltaram.github.io/;https://github.com/Outsider565/;https://scholar.google.com/citations?user=D_AJuY0AAAAJ", "dblp": "y/ShuichengYan;33/5448-15;;;", "google_scholar": "https://scholar.google.com.hk/citations?user=DNuiPHwAAAAJ;zX7i1EkAAAAJ;bcbeUo0AAAAJ;TtCmtjAAAAAJ;D_AJuY0AAAAJ", "or_profile": "~Shuicheng_YAN3;~Jian_Li2;~Qian_Liu2;~Shaowen_Wang4;~\u5929\u5e73_\u5f201", "aff": "sea Group;Tsinghua University;Sea AI Lab;Fudan University;Tsinghua University", "aff_domain": "sea.com;tsinghua.edu.cn;sea.com;fudan.edu.cn;tsinghua.edu.cn", "position": "Researcher;Associate Professor;Researcher;Undergrad student;PhD student", "bibtex": "@inproceedings{\nzhang2023generative,\ntitle={Generative Table Pre-training Empowers Models for Tabular Prediction},\nauthor={Tianping Zhang and Shaowen Wang and Shuicheng YAN and Li Jian and Qian Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3gdG9upo7e}\n}", "github": "", "project": "", "reviewers": "2Qac;YvRN;PTdH", "site": "https://openreview.net/forum?id=3gdG9upo7e", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;4;4", "excitement": "4;4;4", "reproducibility": "3;5;5", "correctness": "3;4;4", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;shaowen-wang-65606b20a/;", "aff_unique_index": "0;1;2;3;1", "aff_unique_norm": "Sea Group;Tsinghua University;Sea AI Lab;Fudan University", "aff_unique_dep": ";;;", "aff_unique_url": ";https://www.tsinghua.edu.cn;;https://www.fudan.edu.cn", "aff_unique_abbr": ";THU;;Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1;1", "aff_country_unique": ";China" }, { "id": "3k5GFJEGem", "title": "ParroT: Translating during Chat using Large Language Models tuned with Human Translation and Feedback", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models (LLMs) like ChatGPT have exhibited remarkable abilities on a wide range of natural language processing (NLP) tasks, including various machine translation abilities accomplished during chat. However, these models are only accessible through restricted APIs, which creates barriers to new research and advancements in the field. Therefore, we propose ParroT, a framework to enhance and regulate the translation abilities during chat based on open-source LLMs (e.g., LLaMA), human-written translation and feedback data. Specifically, ParroT reformulates translation data into the instruction-following style, and introduces a \u201cHint\u201d field for incorporating extra requirements to regulate the translation process. Accordingly, we propose three instruction types for finetuning ParroT models, including translation instruction, contrastive instruction, and error-guided instruction. Experiments on Flores subsets and WMT22 test sets suggest that translation instruction improves the translation performance of vanilla LLMs significantly while error-guided instruction can lead to further improvement, which demonstrates the importance of learning from low-quality translations annotated by humans. We also demonstrate the potential of automatic evaluation tools in providing quality information of translations, when constructing error-guided instructions for directions that lack human annotation data. Please refer to our Github project for more implementation details: https://github.com/wxjiao/ParroT.", "keywords": "machine translation;instruction tuning;LoRA;human feedback;LLaMA;BLOOMZ;ChatGPT;GPT-4", "primary_area": "", "supplementary_material": "", "author": "Wenxiang Jiao;Jen-tse Huang;Wenxuan Wang;Zhiwei He;Tian Liang;Xing Wang;Shuming Shi;Zhaopeng Tu", "authorids": "~Wenxiang_Jiao1;~Jen-tse_Huang1;~Wenxuan_Wang2;~Zhiwei_He1;~Tian_Liang4;~Xing_Wang1;~Shuming_Shi1;~Zhaopeng_Tu1", "gender": "M;M;;M;M;M;M;M", "homepage": "https://wxjiao.github.io/;https://penguinnnnn.github.io/;;https://zwhe99.github.io/;https://skytliang.github.io/;http://xingwang4nlp.com/;;http://www.zptu.net", "dblp": "239/4883;317/7026;203/1536-1;52/6077-2;;02/3674-7;s/ShumingShi;71/9281", "google_scholar": "CvtODukAAAAJ;XBzDTAQAAAAJ;4v5x0bUAAAAJ;https://scholar.google.com/citations?hl=en;Z6bZcT4AAAAJ;6AqRKa0AAAAJ;Lg31AKMAAAAJ;IvE2zRgAAAAJ", "or_profile": "~Wenxiang_Jiao1;~Jen-tse_Huang1;~Wenxuan_Wang2;~Zhiwei_He1;~Tian_Liang4;~Xing_Wang1;~Shuming_Shi1;~Zhaopeng_Tu1", "aff": "Tencent AI Lab;The Chinese University of Hong Kong;The Chinese University of Hong Kong;Shanghai Jiaotong University;Graduate School at Shenzhen,Tsinghua University;Tencent AI Lab;Tencent AI Lab;Tencent AI Lab", "aff_domain": "tencent.com;cuhk.edu.hk;cuhk.edu.hk;sjtu.edu.cn;tsinghua.edu.cn;tencent.com;tencent.com;tencent.com", "position": "Researcher;PhD student;PhD student;PhD student;MS student;Researcher;Principal Researcher;Principal Researcher", "bibtex": "@inproceedings{\njiao2023parrot,\ntitle={ParroT: Translating during Chat using Large Language Models tuned with Human Translation and Feedback},\nauthor={Wenxiang Jiao and Jen-tse Huang and Wenxuan Wang and Zhiwei He and Tian Liang and Xing Wang and Shuming Shi and Zhaopeng Tu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3k5GFJEGem}\n}", "github": "", "project": "", "reviewers": "AfZp;jRLd;k66w", "site": "https://openreview.net/forum?id=3k5GFJEGem", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "4;3;1", "reproducibility": "4;4;3", "correctness": "4;4;2", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-3446-0083;;0000-0002-4807-0062;;0000-0002-0737-9653;;", "linkedin": ";jen-tse-huang-08a169200/;;;;;;tuzhaopeng", "aff_unique_index": "0;1;1;2;3;0;0;0", "aff_unique_norm": "Tencent;Chinese University of Hong Kong;Shanghai Jiao Tong University;Tsinghua University", "aff_unique_dep": "Tencent AI Lab;;;Graduate School", "aff_unique_url": "https://ai.tencent.com;https://www.cuhk.edu.hk;https://www.sjtu.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Tencent AI Lab;CUHK;SJTU;THU", "aff_campus_unique_index": "1;1;2", "aff_campus_unique": ";Hong Kong SAR;Shenzhen", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "3l9zUuFo9m", "title": "Cache me if you Can: an Online Cost-aware Teacher-Student framework to Reduce the Calls to Large Language Models", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Prompting Large Language Models (LLMs) performs impressively in zero- and few-shot settings. Hence, small and medium-sized enterprises (SMEs) that cannot afford the cost of creating large task-specific training datasets, but also the cost of pretraining their own LLMs, are increasingly turning to third-party services that allow them to prompt LLMs. However, such services currently require a payment per call, which becomes a significant operating expense (OpEx). Furthermore, customer inputs are often very similar over time, hence SMEs end-up prompting LLMs with very similar instances. We propose a framework that allows reducing the calls to LLMs by caching previous LLM responses and using them to train a local inexpensive model on the SME side. The framework includes criteria for deciding when to trust the local model or call the LLM, and a methodology to tune the criteria and measure the tradeoff between performance and cost. For experimental purposes, we instantiate our framework with two LLMs, GPT-3.5 or GPT-4, and two inexpensive students, a $k$-NN classifier or a Multi-Layer Perceptron, using two common business tasks, intent recognition and sentiment analysis. \nExperimental results indicate that significant OpEx savings can be obtained with only slightly lower performance.", "keywords": "Online Learning;Cost-Efficient;LLMs Applications;Caching;Teacher-Student", "primary_area": "", "supplementary_material": "", "author": "Ilias Stogiannidis;Stavros Vassos;Prodromos Malakasiotis;Ion Androutsopoulos", "authorids": "~Ilias_Stogiannidis1;~Stavros_Vassos1;~Prodromos_Malakasiotis1;~Ion_Androutsopoulos1", "gender": "M;M;M;M", "homepage": ";http://pages.cs.aueb.gr/~rulller/;http://www.aueb.gr/users/ion/;https://stogiannidis.github.io/", "dblp": "07/2215;16/5137;87/6723;", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.gr/citations?user=36n9818AAAAJ;https://scholar.google.com.tw/citations?user=4UJm5EQAAAAJ;h9Pxv-kAAAAJ", "or_profile": "~Stavros_Vassos1;~Prodromos_Malakasiotis1;~Ion_Androutsopoulos1;~Ilias_Marios_Stogiannidis1", "aff": "Helvia.ai;Workable;Athens University of Economics and Business;Athens University of Economics and Business", "aff_domain": "helvia.ai;workable.com;aueb.gr;aueb.gr", "position": "Principal Researcher;Principal Machine Learning Engineer;Faculty;MS student", "bibtex": "@inproceedings{\nstogiannidis2023cache,\ntitle={Cache me if you Can: an Online Cost-aware Teacher-Student framework to Reduce the Calls to Large Language Models},\nauthor={Ilias Stogiannidis and Stavros Vassos and Prodromos Malakasiotis and Ion Androutsopoulos},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3l9zUuFo9m}\n}", "github": "", "project": "", "reviewers": "4DeH;7Vyf;5m6Q", "site": "https://openreview.net/forum?id=3l9zUuFo9m", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;3;2", "excitement": "4;3;2", "reproducibility": "1;4;2", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0008-0055-5598;0009-0000-2969-0509;0009-0005-5803-1138", "linkedin": "stavrosv/;rulller/;ion-androutsopoulos-477b6b3a/;stogiannidis/", "aff_unique_index": "0;1;2;2", "aff_unique_norm": "helvia.ai;Workable;Athens University of Economics and Business", "aff_unique_dep": ";;", "aff_unique_url": "https://helvia.ai;https://www.workable.com;https://www.aueb.gr", "aff_unique_abbr": "Helvia.ai;;AUEB", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Athens", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "United States;Greece" }, { "id": "3pvdo2yHXq", "title": "Speech-enriched Memory for Inference-time Adaptation of ASR Models to Word Dictionaries", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Despite the impressive performance of ASR models on mainstream benchmarks, their performance on rare words is unsatisfactory. In enterprise settings, often a focused list of entities (such as locations, names, etc) are available which can be used to adapt the model to the terminology of specific domains. In this paper, we present a novel inference algorithm that improves the prediction of state-of-the-art ASR models using nearest-neighbor-based matching on an inference-time word list. We consider both the Transducer architecture that is useful in the streaming setting, and state-of-the-art encoder-decoder models such as Whisper.\n\nIn our approach, a list of rare entities is indexed in a memory by synthesizing speech for each entry, and then storing the internal acoustic and language model states obtained from the best possible alignment on the ASR model. The memory is organized as a trie which we harness to perform a stateful lookup during inference. A key property of our extension is that we prevent spurious matches by restricting to only word-level matches. In our experiments on publicly available datasets and private benchmarks, we show that our method is effective in significantly improving rare word recognition.", "keywords": "ASR Adaptation;Inference-time adaptation;Contextual Biasing", "primary_area": "", "supplementary_material": "", "author": "Ashish Mittal;Sunita Sarawagi;Preethi Jyothi;George Saon;Gakuto Kurata", "authorids": "~Ashish_Mittal1;~Sunita_Sarawagi1;~Preethi_Jyothi2;~George_Saon1;~Gakuto_Kurata1", "gender": "M;F;F;;", "homepage": "https://researcher.watson.ibm.com/researcher/view.php?person=in-arakeshk;https://www.cse.iitb.ac.in/~sunita/;http://www.cse.iitb.ac.in/~pjyothi;;", "dblp": "184/1441;s/SunitaSarawagi;01/9014;52/6787;20/4496", "google_scholar": "https://scholar.google.co.in/citations?user=4LMwouUAAAAJ;https://scholar.google.com.tw/citations?user=Hg4HmTAAAAAJ;https://scholar.google.co.in/citations?user=QN_uhu8AAAAJ;;", "or_profile": "~Ashish_Mittal1;~Sunita_Sarawagi1;~Preethi_Jyothi2;~George_Saon1;~Gakuto_Kurata1", "aff": "IBM Research;IIT Bombay;Indian Institute of Technology Bombay;;International Business Machines", "aff_domain": "ibm.com;iitb.ac.in;iitb.ac.in;;ibm.com", "position": "Researcher;Full Professor;Associate Professor;;Manager of Research and Senior Technical Staff Member", "bibtex": "@inproceedings{\nmittal2023speechenriched,\ntitle={Speech-enriched Memory for Inference-time Adaptation of {ASR} Models to Word Dictionaries},\nauthor={Ashish Mittal and Sunita Sarawagi and Preethi Jyothi and George Saon and Gakuto Kurata},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3pvdo2yHXq}\n}", "github": "", "project": "", "reviewers": "emcA;Kmjd;KQzm;wrWb", "site": "https://openreview.net/forum?id=3pvdo2yHXq", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "4;3;3;3", "excitement": "4;4;4;4", "reproducibility": "3;4;3;4", "correctness": "4;4;4;3", "rating_avg": 5.0, "confidence_avg": 3.25, "excitement_avg": 4.0, "reproducibility_avg": 3.5, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;;", "aff_unique_index": "0;1;1;2", "aff_unique_norm": "IBM;Indian Institute of Technology Bombay;International Business Machines Corporation", "aff_unique_dep": "IBM Research;;", "aff_unique_url": "https://www.ibm.com/research;https://www.iitb.ac.in;https://www.ibm.com", "aff_unique_abbr": "IBM;IITB;IBM", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Mumbai;Bombay", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United States;India" }, { "id": "3qF5MqUl3Y", "title": "R2H: Building Multimodal Navigation Helpers that Respond to Help Requests", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Intelligent navigation-helper agents are critical as they can navigate users in unknown areas through environmental awareness and conversational ability, serving as potential accessibility tools for individuals with disabilities. In this work, we first introduce a novel benchmark, Respond to Help Requests (R2H), to promote the development of multi-modal navigation helpers capable of responding to requests for help, utilizing existing dialog-based embodied datasets. R2H mainly includes two tasks: (1) Respond to Dialog History (RDH), which assesses the helper agent's ability to generate informative responses based on a given dialog history, and (2) Respond during Interaction (RdI), which evaluates the effectiveness and efficiency of the response during consistent cooperation with a task performer. Furthermore, we explore two approaches to construct the navigation-helper agent, including fine-tuning a novel task-oriented multi-modal response generation model that can see and respond, named SeeRee, and employing a multi-modal large language model in a zero-shot manner. Analysis of the task and method was conducted based on both automatic benchmarking and human evaluations.", "keywords": "Multimodal Understanding;Embodied AI;Vision-and-language Navigation", "primary_area": "", "supplementary_material": "", "author": "Yue Fan;Jing Gu;Kaizhi Zheng;Xin Eric Wang", "authorids": "~Yue_Fan3;~Jing_Gu2;~Kaizhi_Zheng1;~Xin_Eric_Wang2", "gender": "M;M;M;M", "homepage": "http://www.yfan.site;https://g-jing.github.io/;https://kzzheng.github.io/;https://eric-xw.github.io", "dblp": ";;;10/5630-61", "google_scholar": ";B3YeB3YAAAAJ;jLa6wpUAAAAJ;YjqluE0AAAAJ", "or_profile": "~Yue_Fan3;~Jing_Gu2;~Kaizhi_Zheng1;~Xin_Eric_Wang2", "aff": "University of California, Santa Cruz;University of California, Santa Cruz;University of California, Santa Cruz;University of California, Santa Cruz", "aff_domain": "ucsc.edu;ucsc.edu;ucsc.edu;ucsc.edu", "position": "PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nfan2023rh,\ntitle={R2H: Building Multimodal Navigation Helpers that Respond to Help Requests},\nauthor={Yue Fan and Jing Gu and Kaizhi Zheng and Xin Eric Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3qF5MqUl3Y}\n}", "github": "", "project": "", "reviewers": "6i43;DxYF;yJ8D", "site": "https://openreview.net/forum?id=3qF5MqUl3Y", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;3", "excitement": "3;4;4", "reproducibility": "4;4;3", "correctness": "3;3;3", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-3534-3487;0000-0003-2605-5504", "linkedin": ";;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, Santa Cruz", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsc.edu", "aff_unique_abbr": "UCSC", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Santa Cruz", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "3qG4r6FGWD", "title": "Aligning Predictive Uncertainty with Clarification Questions in Grounded Dialog", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Asking for clarification is fundamental to effective collaboration. \nAn interactive artificial agent must know when to ask a human instructor for more information in order to ascertain their goals. \nPrevious work bases the timing of questions on supervised models learned from interactions between humans. \nInstead of a supervised classification task, we wish to ground the need for questions in the acting agent's predictive uncertainty. In this work, we investigate if ambiguous linguistic instructions can be aligned with uncertainty in neural models. We train an agent using the T5 encoder-decoder architecture to solve the Minecraft Collaborative Building Task and identify uncertainty metrics that achieve better distributional separation between clear and ambiguous instructions. We further show that well-calibrated prediction probabilities benefit the detection of ambiguous instructions. Lastly, we provide a novel empirical analysis on the relationship between uncertainty and dialog history length and highlight an important property that poses a difficulty for detection.", "keywords": "predictive uncertainty;calibration;grounded dialog;clarification question;instruction following;collaborative dialog", "primary_area": "", "supplementary_material": "", "author": "Kata Naszadi;Putra Manggala;Christof Monz", "authorids": "~Kata_Naszadi1;~Putra_Manggala1;~Christof_Monz1", "gender": "F;;M", "homepage": "https://ltl.science.uva.nl/;;https://staff.fnwi.uva.nl/c.monz/", "dblp": ";;m/ChristofMonz", "google_scholar": ";ikZj2E8AAAAJ;0r3PWLQAAAAJ", "or_profile": "~Kata_Naszadi1;~Putra_Manggala1;~Christof_Monz1", "aff": "University of Amsterdam;University of Amsterdam;University of Amsterdam, University of Amsterdam", "aff_domain": "uva.nl;uva.nl;ivi.uva.nl", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nnaszadi2023aligning,\ntitle={Aligning Predictive Uncertainty with Clarification Questions in Grounded Dialog},\nauthor={Kata Naszadi and Putra Manggala and Christof Monz},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3qG4r6FGWD}\n}", "github": "", "project": "", "reviewers": "tDZx;pdGz;d5nS;zqDm", "site": "https://openreview.net/forum?id=3qG4r6FGWD", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;4;3;2", "excitement": "4;2;2;4", "reproducibility": "3;4;4;3", "correctness": "4;3;3;4", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.5, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Amsterdam", "aff_unique_dep": "", "aff_unique_url": "https://www.uva.nl", "aff_unique_abbr": "UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Netherlands" }, { "id": "3u3kXSeVvR", "title": "Viewing Knowledge Transfer in Multilingual Machine Translation Through a Representational Lens", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We argue that translation quality alone is not a sufficient metric for measuring knowledge transfer in multilingual neural machine translation. To support this claim, we introduce Representational Transfer Potential (RTP), which measures representational similarities between languages. We show that RTP can measure both positive and negative transfer (interference), and find that RTP is strongly correlated with changes in translation quality, indicating that transfer \\textit{does} occur. Furthermore, we investigate data and language characteristics that are relevant for transfer, and find that multi-parallel overlap is an important yet under-explored feature. Based on this, we develop a novel training scheme, which uses an auxiliary similarity loss that encourages representations to be more invariant across languages by taking advantage of multi-parallel data. We show that our method yields increased translation quality for low- and mid-resource languages across multiple data and model setups.\\footnote{We will release our code upon acceptance.}", "keywords": "multilingual machine translation;cross-lingual knowledge transfer", "primary_area": "", "supplementary_material": "", "author": "David Stap;Vlad Niculae;Christof Monz", "authorids": "~David_Stap1;~Vlad_Niculae2;~Christof_Monz1", "gender": "M;M;M", "homepage": "https://davidstap.github.io;https://vene.ro;https://staff.fnwi.uva.nl/c.monz/", "dblp": ";40/10489;m/ChristofMonz", "google_scholar": "u7c1llgAAAAJ;7_3UAgQAAAAJ;0r3PWLQAAAAJ", "or_profile": "~David_Stap1;~Vlad_Niculae2;~Christof_Monz1", "aff": "University of Amsterdam;University of Amsterdam;University of Amsterdam, University of Amsterdam", "aff_domain": "uva.nl;uva.nl;ivi.uva.nl", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nstap2023viewing,\ntitle={Viewing Knowledge Transfer in Multilingual Machine Translation Through a Representational Lens},\nauthor={David Stap and Vlad Niculae and Christof Monz},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3u3kXSeVvR}\n}", "github": "", "project": "", "reviewers": "4uHs;kGrk;sd5X", "site": "https://openreview.net/forum?id=3u3kXSeVvR", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;3", "excitement": "3;2;3", "reproducibility": "3;4;4", "correctness": "3;2;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 2.6666666666666665, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Amsterdam", "aff_unique_dep": "", "aff_unique_url": "https://www.uva.nl", "aff_unique_abbr": "UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Netherlands" }, { "id": "3ymHqvobHJ", "title": "Cross-lingual Open-Retrieval Question Answering for African Languages", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "African languages have far less in-language content available digitally, making it challenging for question answering systems to satisfy the information needs of users. Cross-lingual open-retrieval question answering (XOR QA) systems -- those that retrieve answer content from other languages while serving people in their native language---offer a means of filling this gap. To this end, we create Our Dataset, the first cross-lingual QA dataset with a focus on African languages. Our Dataset includes 12,000+ XOR QA examples across 10 African languages. While previous datasets have focused primarily on languages where cross-lingual QA augments coverage from the target language, Our Dataset focuses on languages where cross-lingual answer content is the only high-coverage source of answer content. Because of this, we argue that African languages are one of the most important and realistic use cases for XOR QA. Our experiments demonstrate the poor performance of automatic translation and multilingual retrieval methods. Overall, Our Dataset proves challenging for state-of-the-art QA models. We hope that the dataset enables the development of more equitable QA technology.", "keywords": "African Languages;Question Answering;Information Retrieval;Low-resource Languages", "primary_area": "", "supplementary_material": "", "author": "Odunayo Ogundepo;Tajuddeen Gwadabe;Clara E. Rivera;Jonathan H. Clark;Sebastian Ruder;David Ifeoluwa Adelani;Bonaventure F. P. Dossou;Abdou Aziz DIOP;Claytone Sikasote;Gilles HACHEME;Happy Buzaaba;Ignatius Ezeani;Rooweither Mabuya;Salomey Osei;Chris Chinenye Emezue;Albert Kahira;Shamsuddeen Hassan Muhammad;Akintunde Oladipo;Abraham Toluwase Owodunni;Atnafu Lambebo Tonja;Iyanuoluwa Shode;Akari Asai;Anuoluwapo Aremu;Ayodele Awokoya;Bernard Opoku;Chiamaka Ijeoma Chukwuneke;Christine Mwase;Clemencia Siro;Stephen Arthur;Tunde Oluwaseyi Ajayi;Verrah Akinyi Otiende;Andre Niyongabo Rubungo;Boyd Sinkala;Daniel Ajisafe;Emeka Felix Onwuegbuzia;Falalu Ibrahim Lawan;Ibrahim Said Ahmad;Jesujoba Oluwadara Alabi;CHINEDU EMMANUEL MBONU;Mofetoluwa Adeyemi;Mofya Phiri;Orevaoghene Ahia;Ruqayya Nasir Iro;Sonia Adhiambo", "authorids": "~Odunayo_Ogundepo1;~Tajuddeen_Gwadabe1;~Clara_E._Rivera1;~Jonathan_H._Clark1;~Sebastian_Ruder2;~David_Ifeoluwa_Adelani1;~Bonaventure_F._P._Dossou1;~Abdou_Aziz_DIOP1;~Claytone_Sikasote1;~Gilles_HACHEME1;~Happy_Buzaaba1;~Ignatius_Ezeani1;~Rooweither_Mabuya1;~Salomey_Osei1;~Chris_Chinenye_Emezue1;~Albert_Kahira1;~Shamsuddeen_Hassan_Muhammad1;~Akintunde_Oladipo1;~Abraham_Toluwase_Owodunni1;~Atnafu_Lambebo_Tonja1;~Iyanuoluwa_Shode1;~Akari_Asai2;~Anuoluwapo_Aremu1;~Ayodele_Awokoya1;~Bernard_Opoku1;~Chiamaka_Ijeoma_Chukwuneke1;~Christine_Mwase1;~Clemencia_Siro1;~Stephen_Arthur1;~Tunde_Oluwaseyi_Ajayi1;~Verrah_Akinyi_Otiende1;~Andre_Niyongabo_Rubungo1;~Boyd_Sinkala1;~Daniel_Ajisafe1;~Emeka_Felix_Onwuegbuzia1;~Falalu_Ibrahim_Lawan1;~Ibrahim_Said_Ahmad1;~Jesujoba_Oluwadara_Alabi1;~CHINEDU_EMMANUEL_MBONU1;~Mofetoluwa_Adeyemi1;~Mofya_Phiri2;~Orevaoghene_Ahia1;~Ruqayya_Nasir_Iro1;~Sonia_Adhiambo1", "gender": "M;M;;M;;M;M;M;M;M;M;M;F;F;M;M;;M;M;M;F;F;M;F;M;F;;;M;M;F;M;M;M;M;M;M;M;;F;M;;F;", "homepage": ";;https://scholar.google.co.uk/citations?user=K7JKtEkAAAAJ&hl=en;;;https://dadelani.github.io/;https://twitter.com/bonadossou;https://abdouaziz.github.io/;https://csikasote.github.io/;https://www.gilleshacheme.com/;https://buzaabah.github.io/;https://www.lancaster.ac.uk/scc/about-us/people/ignatius-ezeani;;;https://twitter.com/ChrisEmezue;https://albertkahira.com;;https://theyorubayesian.github.io/;;http://atnafuatx.github.io/;;https://akariasai.github.io/;https://aremuanuoluwapo.com;;;https://profile.unizik.edu.ng/nau3138;;;;https://tundeajayi.github.io/;;https://andrews2017.github.io/;;https://danielajisafe.github.io/;;;https://isahmadbbr.github.io;https://ajesujoba.github.io/;;;;;;", "dblp": ";;;02/786;;230/6973;261/9506;;;;247/1379;185/1311;;;261/9858;234/3885;;341/4148;;312/3167;;;;288/0155;;;;;;318/3166;;276/0370;;;;;https://dblp.uni-trier.de/pid/270/7405;255/4820;;276/0211;;;;", "google_scholar": "https://scholar.google.com/citations?hl=en;Fu1ru8YAAAAJ;https://scholar.google.co.uk/citations?user=K7JKtEkAAAAJ;WfWxwlIAAAAJ;;https://scholar.google.ca/citations?user=W9sTkS0AAAAJ;2J581k0AAAAJ;;https://scholar.google.com/citations?hl=en;yMMUBIUAAAAJ;emV9Mr0AAAAJ;flSvPiMAAAAJ;https://scholar.google.com/schhp?hl=en;32M1HMsAAAAJ;PBHOsekAAAAJ;https://scholar.google.es/citations?user=L4orVv4AAAAJ;;QXGZ_yQAAAAJ;yW-2hooAAAAJ;https://scholar.google.com.mx/citations?user=rubyApkAAAAJ;estoP6EAAAAJ;gqB4u_wAAAAJ;POKZOc4AAAAJ;_E5iEVEAAAAJ;f5tqjbkAAAAJ;zyhK-EIAAAAJ;;;;G-AVO3sAAAAJ;4mVpU1UAAAAJ;5qnTWQEAAAAJ;;https://scholar.google.ca/citations?user=jYHy-c0AAAAJ;;IeAUfwYAAAAJ;QA4Aib4AAAAJ;vhBQlm8AAAAJ;https://scholar.google.com/citations?authuser=1;nqj3mJYAAAAJ;;;;", "or_profile": "~Odunayo_Ogundepo1;~Tajuddeen_Gwadabe1;~Clara_E._Rivera1;~Jonathan_H._Clark1;~Sebastian_Ruder2;~David_Ifeoluwa_Adelani1;~Bonaventure_F._P._Dossou1;~Abdou_Aziz_DIOP1;~Claytone_Sikasote1;~Gilles_HACHEME1;~Happy_Buzaaba1;~Ignatius_Ezeani1;~Rooweither_Mabuya1;~Salomey_Osei1;~Chris_Chinenye_Emezue1;~Albert_Kahira1;~Shamsuddeen_Hassan_Muhammad1;~Akintunde_Oladipo1;~Abraham_Toluwase_Owodunni1;~Atnafu_Lambebo_Tonja1;~Iyanuoluwa_Shode1;~Akari_Asai2;~Anuoluwapo_Aremu1;~Ayodele_Awokoya1;~Bernard_Opoku1;~Chiamaka_Ijeoma_Chukwuneke1;~Christine_Mwase1;~Clemencia_Siro1;~Stephen_Arthur1;~Tunde_Oluwaseyi_Ajayi1;~Verrah_Akinyi_Otiende1;~Andre_Niyongabo_Rubungo1;~Boyd_Sinkala1;~Daniel_Ajisafe1;~Emeka_Felix_Onwuegbuzia1;~Falalu_Ibrahim_Lawan1;~Ibrahim_Said_Ahmad1;~Jesujoba_Oluwadara_Alabi1;~CHINEDU_EMMANUEL_MBONU1;~Mofetoluwa_Adeyemi1;~Mofya_Phiri2;~Orevaoghene_Ahia1;~Ruqayya_Nasir_Iro1;~Sonia_Adhiambo1", "aff": "University of Waterloo;Masakhane Research Foundation;Research, Google;Google DeepMind;;University College London, University of London;McGill University, McGill University;;;Masakhane NLP;RIKEN;Lancaster University;North-West University;Universidad de Deusto;;Julich Supercomputing Center;;University of Waterloo;;Instituto Polit\u00e9cnico Nacional;Bloomberg;Paul G. Allen School of Computer Science & Engineering, University of Washington;;University of Ibadan;Accra Institute of Technology;Lancaster University;;;;University of Galway, Ireland;Tom Mboya University;Princeton University;University of Zambia;University of British Columbia;University of Ibadan;Kaduna State University;Bayero University Kano;Universit\u00e4t des Saarlandes;Nnamdi Azikiwe University;University of Waterloo;University of Zambia;;National Open University of Nigeria;", "aff_domain": "cs.uwaterloo.ca;masakhane.io;research.google.com;google.com;;ucl.ac.uk;mail.mcgill.ca;;;masakhane.io;riken.jp;lancaster.ac.uk;nwu.ac.za;deusto.es;;fzj-juelich.de;;uwaterloo.ca;;ipn.mx;bloomberg.net;cs.washington.edu;;ui.edu.ng;ait.edu.gh;lancaster.ac.uk;;;;universityofgalway.ie;tmu.ac.ke;princeton.edu;unza.zm;cs.ubc.ca;ui.edu.ng;kasuportal.net;buk.edu.ng;uni-saarland.de;unizik.edu.ng;uwaterloo.ca;unza.zm;;nou.edu.ng;", "position": "MS student;Researcher;Researcher;Researcher;;Postdoc;PhD student;;;Researcher;Postdoc;Researcher;Researcher;PhD student;;Researcher;;MS student;;PhD student;Researcher;PhD student;;PhD student;Instructor;PhD student;;;;PhD student;Lecturer;PhD student;MS student;MS student;PhD student;Lecturer;Lecturer;Undergrad student;Lecturer;MS student;Lecturer;;MS student;", "bibtex": "@inproceedings{\nogundepo2023crosslingual,\ntitle={Cross-lingual Open-Retrieval Question Answering for African Languages},\nauthor={Odunayo Ogundepo and Tajuddeen Gwadabe and Clara E. Rivera and Jonathan H. Clark and Sebastian Ruder and David Ifeoluwa Adelani and Bonaventure F. P. Dossou and Abdou Aziz DIOP and Claytone Sikasote and Gilles HACHEME and Happy Buzaaba and Ignatius Ezeani and Rooweither Mabuya and Salomey Osei and Chris Chinenye Emezue and Albert Kahira and Shamsuddeen Hassan Muhammad and Akintunde Oladipo and Abraham Toluwase Owodunni and Atnafu Lambebo Tonja and Iyanuoluwa Shode and Akari Asai and Anuoluwapo Aremu and Ayodele Awokoya and Bernard Opoku and Chiamaka Ijeoma Chukwuneke and Christine Mwase and Clemencia Siro and Stephen Arthur and Tunde Oluwaseyi Ajayi and Verrah Akinyi Otiende and Andre Niyongabo Rubungo and Boyd Sinkala and Daniel Ajisafe and Emeka Felix Onwuegbuzia and Falalu Ibrahim Lawan and Ibrahim Said Ahmad and Jesujoba Oluwadara Alabi and CHINEDU EMMANUEL MBONU and Mofetoluwa Adeyemi and Mofya Phiri and Orevaoghene Ahia and Ruqayya Nasir Iro and Sonia Adhiambo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=3ymHqvobHJ}\n}", "github": "", "project": "", "reviewers": "aGgm;Fe2K;u1CQ", "site": "https://openreview.net/forum?id=3ymHqvobHJ", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;5", "excitement": "3;4;4", "reproducibility": "3;4;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 44, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0009-0008-2036-2777;;0000-0002-0193-2083;;;;;;0000-0001-8286-9997;0000-0001-6658-975X;my-orcid?orcid=0000-0003-1900-3124;0000-0002-3533-6829;0000-0002-1138-0577;;0009-0000-2630-8167;0000-0002-2561-256X;0000-0002-3501-5136;;;;;0000-0003-3104-9897;0000-0002-2966-9416;;;;0000-0002-1860-609X;0000-0001-6147-3547;;;;;0000-0003-3310-0326;0000-0001-9514-1807;0000-0001-8843-5460;;;;;;", "linkedin": "ogundepo-odunayo-b69191111/;;;;;david-adelani-7557b337/;bonaventuredossou/;;claytone-sikasote-93109b167?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3BXrO8EFSERJKNoCy4yaSMMw%3D%3D;gilles-q-hacheme-a0956ab7/;;ignatiusezeani/;;salomey-osei-4b08a5b8/;chris-emezue-4878471a9/;albertkahira/;;olasakins/;abraham-owodunni;atnafu-lambebo-6b21a5184;iyanuoluwashode/;;;ayodele-awokoya-b9317265/;bernard-kwabena-opoku-965653b1/;chiamaka-ijeoma-chukwuneke-nee-oli-517b2313/;;;stephen-arthur-315b0712b/;tunde-ajayi-978130b3/;verrahotiende/;andre-niyongabo-rubungo-851370168/;boyd-sinkala-889942237/;;emeka-onwuegbuzia-5121b140/;https://linkedin.com/in/falalu-ibrahim-003283114;ibrahim-said-ahmad-42419775/;alabi-jesujoba-oluwadara;;;mofya-phiri-1a878a22/;;ruqayya-nasir-iro-04468959/;", "aff_unique_index": "0;1;2;2;3;4;5;6;7;8;9;10;0;11;12;13;14;15;7;16;17;18;19;20;14;21;22;23;24;0;19;25", "aff_unique_norm": "University of Waterloo;Masakhane Research Foundation;Google;University College London;McGill University;Masakhane;RIKEN;Lancaster University;North-West University;Universidad de Deusto;Julich Supercomputing Center;Instituto Polit\u00e9cnico Nacional;Bloomberg;University of Washington;University of Ibadan;Accra Institute of Technology;National University of Ireland, Galway;Tom Mboya University;Princeton University;University of Zambia;University of British Columbia;Kaduna State University;Bayero University;Universit\u00e4t des Saarlandes;Nnamdi Azikiwe University;National Open University of Nigeria", "aff_unique_dep": ";;Google Research;;;NLP;;;;;;;;Paul G. Allen School of Computer Science & Engineering;;;;;;;;;;;;", "aff_unique_url": "https://uwaterloo.ca;https://www.masakhane.io;https://research.google;https://www.ucl.ac.uk;https://www.mcgill.ca;;https://www.riken.jp;https://www.lancaster.ac.uk;https://www.nwu.ac.za;https://www.deusto.es;https://www.fz-juelich.de/ibm/jsc;https://www.ipn.mx;https://www.bloomberg.com;https://www.washington.edu;https://www.ui.edu.ng;https://www.aitech.edu.gh;https://www.nuigalway.ie;http://www.tmu.ac.ke;https://www.princeton.edu;https://www.unza.zm;https://www.ubc.ca;https://www.kasu.edu.ng;https://www.buk.edu.ng;https://www.uni-saarland.de;https://www.nau.edu.ng;https://www.nou.edu.ng", "aff_unique_abbr": "UW;;Google;UCL;McGill;;RIKEN;Lancaster;NWU;Deusto;JSC;IPN;Bloomberg;UW;UI;AIT;NUI Galway;TMU;Princeton;UNZA;UBC;KASU;Bayero;UDS;NAU;NOUN", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Mountain View;Seattle;Kano", "aff_country_unique_index": "0;1;2;3;3;0;1;4;3;1;5;6;0;7;2;2;8;9;3;10;11;2;12;0;8;8;8;6;8;0;12;8", "aff_country_unique": "Canada;South Africa;United States;United Kingdom;Japan;Spain;Germany;Mexico;Nigeria;Ghana;Ireland;Kenya;Zambia" }, { "id": "40NCUv4I2R", "title": "Enhancing Text-to-SQL Capabilities of Large Language Models: A Study on Prompt Design Strategies", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In-context learning (ICL) has emerged as a new approach to various natural language processing tasks, utilizing large language models (LLMs) to make predictions based on context that has been supplemented with a few examples or task-specific instructions. In this paper, we aim to extend this method to question answering tasks that utilize structured knowledge sources, and improve Text-to-SQL systems by exploring various prompt design strategies for employing LLMs. We conduct a systematic investigation into different demonstration selection methods and optimal instruction formats for prompting LLMs in the Text-to-SQL task. Our approach involves leveraging the syntactic structure of an example's SQL query to retrieve demonstrations, and we demonstrate that pursuing both diversity and similarity in demonstration selection leads to enhanced performance. Furthermore, we show that LLMs benefit from database-related knowledge augmentations. Our most effective strategy outperforms the state-of-the-art system by 2.5 points (Execution Accuracy) and the best fine-tuned system by 5.1 points on the Spider dataset. These results highlight the effectiveness of our approach in adapting LLMs to the Text-to-SQL task, and we present an analysis of the factors contributing to the success of our strategy.", "keywords": "Text-to-SQL;Few-shot;Zero-shot;In-context Learning;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "Linyong Nan;Yilun Zhao;Weijin Zou;Narutatsu Ri;Jaesung Tae;Ellen Zhang;Arman Cohan;Dragomir Radev", "authorids": "~Linyong_Nan1;~Yilun_Zhao1;~Weijin_Zou1;~Narutatsu_Ri1;~Jaesung_Tae1;~Ellen_Zhang1;~Arman_Cohan1;~Dragomir_Radev2", "gender": "M;;F;M;;F;M;", "homepage": "https://linyongnan.github.io/;https://yilunzhao.github.io/;;https://narutatsuri.github.io/;;;http://www.armancohan.com;", "dblp": ";271/8391;321/1052;;;;160/1727;", "google_scholar": "b-HaNvYAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;Pp2YYKcAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;", "or_profile": "~Linyong_Nan1;~Yilun_Zhao1;~Weijin_Zou1;~Narutatsu_Ri1;~Jaesung_Tae1;~Ellen_Zhang1;~Arman_Cohan1;~Dragomir_Radev2", "aff": "Yale University;Yale University;Yale University;Columbia University;;Yale University;Allen Institute for Artificial Intelligence;", "aff_domain": "yale.edu;yale.edu;yale.edu;columbia.edu;;yale.edu;allenai.org;", "position": "PhD student;PhD student;MS student;Undergrad student;;Undergrad student;Research Scientist;", "bibtex": "@inproceedings{\nnan2023enhancing,\ntitle={Enhancing Text-to-{SQL} Capabilities of Large Language Models: A Study on Prompt Design Strategies},\nauthor={Linyong Nan and Yilun Zhao and Weijin Zou and Narutatsu Ri and Jaesung Tae and Ellen Zhang and Arman Cohan and Dragomir Radev},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=40NCUv4I2R}\n}", "github": "", "project": "", "reviewers": "rZ1M;FoCC;ZrTD", "site": "https://openreview.net/forum?id=40NCUv4I2R", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;4;4", "excitement": "4;3;2", "reproducibility": "3;4;3", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;", "linkedin": ";;weijin-vivian-zou;;;;;", "aff_unique_index": "0;0;0;1;0;2", "aff_unique_norm": "Yale University;Columbia University;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.yale.edu;https://www.columbia.edu;https://allenai.org", "aff_unique_abbr": "Yale;Columbia;AI2", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "41vXNjZbIn", "title": "Improving Input-label Mapping with Demonstration Replay for In-context Learning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In-context learning (ICL) is an emerging capability of large autoregressive language models where a few input-label demonstrations are appended to the input to enhance the model's understanding of downstream NLP tasks, without directly adjusting the model parameters. The effectiveness of ICL can be attributed to the strong language modeling capabilities of large language models (LLMs), which enable them to learn the mapping between input and labels based on in-context demonstrations. Despite achieving promising results, the causal nature of language modeling in ICL restricts the attention to be backward only, i.e., a token only attends to its previous tokens, failing to capture the full input-label information and limiting the model's performance. In this paper, we propose a novel ICL method called Repeated Demonstration with Sliding Causal Attention, (RdSca). Specifically, we duplicate later demonstrations and concatenate them to the front, allowing the model to `observe' the later information even under the causal restriction. Besides, we introduce sliding causal attention, which customizes causal attention to avoid information leakage. Experimental results show that our method significantly improves the input-label mapping in ICL demonstrations. We also conduct an in-depth analysis of how to customize the causal attention without training, which has been an unexplored area in previous research.", "keywords": "in-context learning;causal language modeling", "primary_area": "", "supplementary_material": "", "author": "Zhuocheng Gong;Jiahao Liu;Qifan Wang;Jingang Wang;Xunliang Cai;Dongyan Zhao;Rui Yan", "authorids": "~Zhuocheng_Gong1;~Jiahao_Liu6;~Qifan_Wang2;~Jingang_Wang1;~Xunliang_Cai1;~Dongyan_Zhao1;~Rui_Yan2", "gender": "M;M;M;M;M;M;M", "homepage": "https://github.com/gzhch/;https://hit-computer.github.io/;https://wqfcr.github.io/;https://sites.google.com/site/bitwjg/;https://maimai.cn/contact/share/card?u=fudmdwckxlwi;https://gsai.ruc.edu.cn/english/ruiyan;https://www.wict.pku.edu.cn/zhaodongyan/en/", "dblp": "320/5689;;33/8610;59/7807;;19/2405-1;63/1870", "google_scholar": ";https://scholar.google.com.hk/citations?user=IvImF70AAAAJ;LrSyLosAAAAJ;janU39IAAAAJ;;eLw6g-UAAAAJ;lhR8-68AAAAJ", "or_profile": "~Zhuocheng_Gong1;~Jiahao_Liu6;~Qifan_Wang2;~Jingang_Wang1;~Xunliang_Cai1;~Rui_Yan2;~Dongyan_Zhao2", "aff": "Peking University;Meituan;Meta AI;Meituan;Meituan;Renmin University of China;Peking University", "aff_domain": "pku.edu.cn;meituan.com;fb.com;meituan.com;meituan.com;ruc.edu.cn;pku.edu.cn", "position": "PhD student;Researcher;Principal Researcher;Researcher;Principal Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\ngong2023improving,\ntitle={Improving Input-label Mapping with Demonstration Replay for In-context Learning},\nauthor={Zhuocheng Gong and Jiahao Liu and Qifan Wang and Jingang Wang and Xunliang Cai and Dongyan Zhao and Rui Yan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=41vXNjZbIn}\n}", "github": "", "project": "", "reviewers": "6837;Zcd4;evij;n9jP", "site": "https://openreview.net/forum?id=41vXNjZbIn", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;5;4;3", "excitement": "4;3;3;4", "reproducibility": "4;4;4;4", "correctness": "4;3;3;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.5, "reproducibility_avg": 4.0, "correctness_avg": 3.5, "replies_avg": 13, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-7570-5756;;;0000-0002-3356-6823;", "linkedin": ";;;;;;", "aff_unique_index": "0;1;2;1;1;3;0", "aff_unique_norm": "Peking University;Meituan;Meta;Renmin University of China", "aff_unique_dep": ";;Meta AI;", "aff_unique_url": "http://www.pku.edu.cn;https://www.meituan.com;https://meta.com;http://www.ruc.edu.cn", "aff_unique_abbr": "Peking U;Meituan;Meta;RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "China;United States" }, { "id": "4272bEn4Q0", "title": "Large Language Models are Complex Table Parsers", "track": "main", "status": "Long Main", "tldr": "", "abstract": "With the Generative Pre-trained Transformer 3.5 (GPT-3.5) exhibiting remarkable reasoning and comprehension abilities in Natural Language Processing (NLP), most Question Answering (QA) research has primarily centered around general QA tasks based on GPT, neglecting the specific challenges posed by Complex Table QA. In this paper, we propose to incorporate GPT-3.5 to address such challenges, in which complex tables are reconstructed into tuples and specific prompt designs are employed for dialogues. Specifically, we encode each cell's hierarchical structure, position information, and content as a tuple. By enhancing the prompt template with an explanatory description of the meaning of each tuple and the logical reasoning process of the task, we effectively improve the hierarchical structure awareness capability of GPT-3.5 to better parse the complex tables. Extensive experiments and results on Complex Table QA datasets, i.e., the open-domain dataset HiTAB and the aviation domain dataset AIT-QA show that our approach significantly outperforms previous work on both datasets, leading to state-of-the-art (SOTA) performance.", "keywords": "Complex Table QA;GPT-3.5;Large language models", "primary_area": "", "supplementary_material": "", "author": "Bowen Zhao;Changkai Ji;Yuejie Zhang;Wen He;Yingwen Wang;Qing Wang;Rui Feng;Xiaobo Zhang", "authorids": "~Bowen_Zhao4;~Changkai_Ji1;~Yuejie_Zhang2;~Wen_He2;~Yingwen_Wang1;~Qing_Wang23;~Rui_Feng2;~Xiaobo_Zhang1", "gender": ";F;F;F;F;F;;F", "homepage": ";https://github.com/wuxi-dixi;http://www.cs.fudan.edu.cn/?page_id=5518;https://scholar.google.ca/citations?user=KHJMbHwAAAAJ&hl=en;;https://user.qzone.qq.com/2797022015;;https://baike.baidu.com/item/%E5%BC%A0%E6%99%93%E6%B3%A2/61140240?fr=aladdin", "dblp": ";;09/5786;;;;;", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;;;;;;https://scholar.google.ca/citations?user=gUdjVUkAAAAJ", "or_profile": "~Bowen_Zhao4;~Changkai_Ji1;~Yuejie_Zhang2;~Wen_He2;~Yingwen_Wang1;~Qing_Wang23;~Rui_Feng2;~Xiaobo_Zhang1", "aff": ";Fudan University;Fudan University;Fudan University;;Fudan University;;Fudan University", "aff_domain": ";fudan.edu.cn;fudan.edu.cn;fudan.edu;;fudan.edu.cn;;fudan.edu.cn", "position": ";PhD student;Full Professor;PhD student;;MS student;;Full Professor", "bibtex": "@inproceedings{\nzhao2023large,\ntitle={Large Language Models are Complex Table Parsers},\nauthor={Bowen Zhao and Changkai Ji and Yuejie Zhang and Wen He and Yingwen Wang and Qing Wang and Rui Feng and Xiaobo Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=4272bEn4Q0}\n}", "github": "", "project": "", "reviewers": "Didr;n81y;bgck", "site": "https://openreview.net/forum?id=4272bEn4Q0", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;4;3", "reproducibility": "4;2;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-7993-7223;0000-0003-0440-8516;0000-0002-0155-5046;0009-0004-6252-2565;;0000-0002-8645-5414", "linkedin": ";;;;;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "42Cc5s71zl", "title": "D$^2$TV: Dual Knowledge Distillation and Target-oriented Vision Modeling for Many-to-Many Multimodal Summarization", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Many-to-many multimodal summarization (M$^3$S) task aims to generate summaries in any language with document inputs in any language and the corresponding image sequence, which essentially comprises of multimodal monolingual summarization (MMS) and multimodal cross-lingual summarization (MXLS) tasks. Although much work has been devoted to either MMS or MXLS, little research pays attention to the M$^3$S task. Besides, existing studies mainly focus on 1) utilizing MMS to enhance MXLS via knowledge distillation without considering the performance of MMS or 2) improving MMS models by filtering summary-unrelated visual features with implicit learning or explicitly complex training objectives. In this paper, we first introduce a general and practical task, \\emph{i.e.}, M$^3$S. Further, we propose a dual knowledge distillation and target-oriented vision modeling framework for the M$^3$S task. Specifically, the dual knowledge distillation method guarantees that the knowledge of MMS and MXLS can be transferred to each other and thus mutually prompt both of them. To offer target-oriented visual features, a simple yet effective target-oriented contrastive objective is designed and responsible for discarding needless visual information. Extensive experiments on the many-to-many setting show the effectiveness of the proposed approach. Additionally, we contribute a many-to-many multimodal summarization ({\\fontfamily{lmtt}\\selectfont M$^3$Sum}) dataset with 44 languages to facilitate future research.", "keywords": "Multimodal Summarization; Cross-lingual Summarization; Many-to-many Multimodal Summarization", "primary_area": "", "supplementary_material": "", "author": "Yunlong Liang;Fandong Meng;Jiaan Wang;Jinan Xu;Yufeng Chen;Jie Zhou", "authorids": "~Yunlong_Liang1;~Fandong_Meng3;~Jiaan_Wang1;~Jinan_Xu1;~Yufeng_Chen1;~Jie_Zhou8", "gender": "M;M;M;F;M;M", "homepage": ";http://fandongmeng.github.io/;https://wangjiaan.cn/;;;http://faculty.bjtu.edu.cn/8300/", "dblp": "177/5130.html;117/4056.html;296/2112;64/5715;00/5012-16;67/3124", "google_scholar": "P5iDDGIAAAAJ;sA8U4S0AAAAJ;5S8h7qAAAAAJ;;https://scholar.google.com.hk/citations?user=OijxQCMAAAAJ;wMuW0W4AAAAJ", "or_profile": "~Yunlong_Liang1;~Fandong_Meng3;~Jiaan_Wang1;~Yufeng_Chen1;~Jie_Zhou8;~Xu_Jinan1", "aff": "Beijing Jiaotong University;WeChat AI, Tencent Inc.;Soochow University;Beijing jiaotong univercity;WeChat AI, Tencent Inc.;Beijing Jiaotong University", "aff_domain": "bjtu.edu.cn;tencent.com;suda.edu.cn;bjtu.edu.cn;tencent.com;bjtu.edu.cn", "position": "PhD student;Principal Researcher;MS student;Assistant Professor;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nliang2023dtv,\ntitle={D\\${\\textasciicircum}2\\${TV}: Dual Knowledge Distillation and Target-oriented Vision Modeling for Many-to-Many Multimodal Summarization},\nauthor={Yunlong Liang and Fandong Meng and Jiaan Wang and Jinan Xu and Yufeng Chen and Jie Zhou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=42Cc5s71zl}\n}", "github": "", "project": "", "reviewers": "KUUr;CEcV;UzrF;16rq", "site": "https://openreview.net/forum?id=42Cc5s71zl", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "2;3;4;4", "excitement": "3;3;4;3", "reproducibility": "3;4;4;4", "correctness": "3;2;4;3", "rating_avg": 4.0, "confidence_avg": 3.25, "excitement_avg": 3.25, "reproducibility_avg": 3.75, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-2311-7642;0000-0002-8158-2377;0000-0002-2587-7648;;0000-0002-5899-5165;", "linkedin": ";;;;;jinan-xu-3544b137/", "aff_unique_index": "0;1;2;0;1;0", "aff_unique_norm": "Beijing Jiao Tong University;Tencent;Soochow University", "aff_unique_dep": ";WeChat AI;", "aff_unique_url": "http://www.njtu.edu.cn/en;https://www.tencent.com;https://www.soochow.edu.cn", "aff_unique_abbr": "BJTU;Tencent;Soochow U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "42LIoV0C1h", "title": "Qualitative Code Suggestion: A Human-Centric Approach to Qualitative Coding", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Qualitative coding is a content analysis method in which researchers read through a text corpus and assign descriptive labels or qualitative codes to passages. It is an arduous and manual process which human-computer interaction (HCI) studies have shown could greatly benefit from NLP techniques to assist qualitative coders. Yet, previous attempts at leveraging language technologies have set up qualitative coding as a fully automatable classification problem. In this work, we take a more assistive approach by defining the task of qualitative code suggestion (QCS) in which a ranked list of previously assigned qualitative codes is suggested from an identified passage. In addition to being user-motivated, QCS integrates previously ignored properties of qualitative coding such as the sequence in which passages are annotated, the importance of rare codes and the differences in annotation styles between coders. We investigate the QCS task by releasing the first publicly available qualitative coding dataset, CVDQuoding, consisting of interviews conducted with women at risk of cardiovascular disease. In addition, we conduct a human evaluation which shows that our systems consistently make relevant code suggestions.", "keywords": "qualitative coding;human-centric", "primary_area": "", "supplementary_material": "", "author": "Cesare Spinoso-Di Piano;Samira Abbasgholizadeh Rahimi;Jackie CK Cheung", "authorids": "~Cesare_Spinoso-Di_Piano1;~Samira_Abbasgholizadeh_Rahimi1;~Jackie_CK_Cheung1", "gender": "M;F;M", "homepage": "https://cesare-spinoso.github.io/;https://rahimislab.ca/;http://cs.mcgill.ca/~jcheung/", "dblp": ";;00/9012", "google_scholar": ";;https://scholar.google.com.tw/citations?user=Um-wmYQAAAAJ", "or_profile": "~Cesare_Spinoso-Di_Piano1;~Samira_Abbasgholizadeh_Rahimi1;~Jackie_CK_Cheung1", "aff": ", McGill University;McGill University;Microsoft", "aff_domain": "cs.mcgill.ca;mcgill.ca;microsoft.com", "position": "MS student;Assistant Professor;Consulting Researcher", "bibtex": "@inproceedings{\npiano2023qualitative,\ntitle={Qualitative Code Suggestion: A Human-Centric Approach to Qualitative Coding},\nauthor={Cesare Spinoso-Di Piano and Samira Abbasgholizadeh Rahimi and Jackie CK Cheung},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=42LIoV0C1h}\n}", "github": "", "project": "", "reviewers": "ULRd;WMQc;ZTy2", "site": "https://openreview.net/forum?id=42LIoV0C1h", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "4;4;4", "reproducibility": "4;4;5", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";samira-a-rahimi-ph-d-36769541/;", "aff_unique_index": "0;0;1", "aff_unique_norm": "McGill University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.mcgill.ca;https://www.microsoft.com", "aff_unique_abbr": "McGill;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Canada;United States" }, { "id": "43SOcneD8W", "title": "Chain-of-Thought Tuning: Masked Language Models can also Think Step By Step in Natural Language Understanding", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Chain-of-Thought (CoT) is a technique that guides Large Language Models (LLMs) to decompose complex tasks into multi-step reasoning through intermediate steps in natural language form. Briefly, CoT enables LLMs to think step by step. However, although many Natural Language Understanding (NLU) tasks also require thinking step by step, LLMs perform less well than small-scale Masked Language Models (MLMs). To migrate CoT from LLMs to MLMs, we propose Chain-of-Thought Tuning (CoTT), a two-step reasoning framework based on prompt tuning, to implement step-by-step thinking for MLMs on NLU tasks. From the perspective of CoT, CoTT's two-step framework enables MLMs to implement task decomposition; CoTT's prompt tuning allows intermediate steps to be used in natural language form. Thereby, the success of CoT can be extended to NLU tasks through MLMs. To verify the effectiveness of CoTT, we conduct experiments on two NLU tasks: hierarchical classification and relation extraction, and the results show that CoTT outperforms baselines and achieves state-of-the-art performance.", "keywords": "masked language models;multi-step reasoning;Chain-of-Thought;natural language understanding", "primary_area": "", "supplementary_material": "", "author": "Caoyun Fan;Jidong Tian;Yitian Li;Wenqing Chen;Hao HE;Yaohui Jin", "authorids": "~Caoyun_Fan2;~Jidong_Tian1;~Yitian_Li2;~Wenqing_Chen1;~Hao_HE4;~Yaohui_Jin2", "gender": "M;M;M;M;M;M", "homepage": ";;https://chenwq95.github.io/;;http://front.sjtu.edu.cn/~jinyh/;", "dblp": "230/4307.html;;31/2740;18/813-7.html;27/7040;https://dblp.uni-trier.de/pid/293/7040", "google_scholar": "0iq39EUAAAAJ;;aJs9eskAAAAJ;;H_7_oVcAAAAJ;https://scholar.google.com.hk/citations?user=KFLS66EAAAAJ", "or_profile": "~Jidong_Tian1;~Yitian_Li2;~Wenqing_Chen1;~Hao_HE4;~Yaohui_Jin2;~caoyun_fan1", "aff": "Shanghai Jiaotong University;Shanghai Jiao Tong University, Tsinghua University;SUN YAT-SEN UNIVERSITY;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sysu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;PhD student;Assistant Professor;Associate Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nfan2023chainofthought,\ntitle={Chain-of-Thought Tuning: Masked Language Models can also Think Step By Step in Natural Language Understanding},\nauthor={Caoyun Fan and Jidong Tian and Yitian Li and Wenqing Chen and Hao HE and Yaohui Jin},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=43SOcneD8W}\n}", "github": "", "project": "", "reviewers": "bKM7;AVcC;xtiA;VXXs", "site": "https://openreview.net/forum?id=43SOcneD8W", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "2;3;3;4", "excitement": "3;4;4;3", "reproducibility": "4;4;3;4", "correctness": "4;3;3;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.5, "reproducibility_avg": 3.75, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-7799-6449;;0000-0002-4851-7012;0000-0001-6158-6277;", "linkedin": ";;;;yaohui-jin-bab58511/;", "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;Sun Yat-sen University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;http://www.sysu.edu.cn", "aff_unique_abbr": "SJTU;SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "45uZxlMLol", "title": "Annotation Sensitivity: Training Data Collection Methods Affect Model Performance", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "When training data are collected from human annotators, the design of the annotation instrument, the instructions given to annotators, the characteristics of the annotators, and their interactions can impact training data. This study demonstrates that design choices made when creating an annotation instrument also impact the models trained on the resulting annotations. \n\nWe introduce the term annotation sensitivity to refer to the impact of annotation data collection methods on the annotations themselves and on downstream model performance and predictions.\n\nWe collect annotations of hate speech and offensive language in five experimental conditions of an annotation instrument, randomly assigning annotators to conditions. We then fine-tune BERT models on each of the five resulting datasets and evaluate model performance on a holdout portion of each condition. We find considerable differences between the conditions for 1) the share of hate speech/offensive language annotations, 2) model performance, 3) model predictions, and 4) model learning curves.\n\nOur results emphasize the crucial role played by the annotation instrument which has received little attention in the machine learning literature. We call for additional research into how and why the instrument impacts the annotations to inform the development of best practices in instrument design.", "keywords": "Annotation sensitivity;human annotation;annotation instrument;task structure effects", "primary_area": "", "supplementary_material": "", "author": "Christoph Kern;Stephanie Eckman;Jacob Beck;Rob Chew;Bolei Ma;Frauke Kreuter", "authorids": "~Christoph_Kern1;~Stephanie_Eckman1;~Jacob_Beck2;~Rob_Chew1;~Bolei_Ma1;~Frauke_Kreuter1", "gender": ";;;;;F", "homepage": ";;;https://www.rti.org/expert/robert-chew;;https://umd.edu/", "dblp": ";;;;352/4693;240/9345.html", "google_scholar": ";;;qdYrbFwAAAAJ;9KdJOfAAAAAJ;iD8Vb4MAAAAJ", "or_profile": "~Christoph_Kern1;~Stephanie_Eckman1;~Jacob_Beck2;~Rob_Chew1;~Bolei_Ma1;~Frauke_Kreuter1", "aff": "University of Maryland, Baltimore;;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;RTI International;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;University of Maryland", "aff_domain": "umaryland.edu;;lmu.de;rti.org;lmu.de;umd.edu", "position": "Assistant Professor;;PhD student;Researcher;MS student;Full Professor", "bibtex": "@inproceedings{\nkern2023annotation,\ntitle={Annotation Sensitivity: Training Data Collection Methods Affect Model Performance},\nauthor={Christoph Kern and Stephanie Eckman and Jacob Beck and Rob Chew and Bolei Ma and Frauke Kreuter},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=45uZxlMLol}\n}", "github": "", "project": "", "reviewers": "W7XF;JnQu;gqvR", "site": "https://openreview.net/forum?id=45uZxlMLol", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;5;4", "excitement": "4;4;2", "reproducibility": "4;5;5", "correctness": "4;3;2", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.666666666666667, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-7363-4299;;;;;", "linkedin": ";;jacob-beck-8b51a5193/;;;", "aff_unique_index": "0;1;2;1;0", "aff_unique_norm": "University of Maryland;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;RTI International", "aff_unique_dep": ";;", "aff_unique_url": "https://www.umaryland.edu;https://www.lmu.de;https://www.rti.org", "aff_unique_abbr": "UMD;LMU;RTI", "aff_campus_unique_index": "0", "aff_campus_unique": "Baltimore;", "aff_country_unique_index": "0;1;0;1;0", "aff_country_unique": "United States;Germany" }, { "id": "46WcPRhRwG", "title": "Evaluating and Enhancing the Robustness of Code Pre-trained Models through Structure-Aware Adversarial Samples Generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Code pre-trained models (CodePTMs) have significantly advanced the field of neural code intelligence. Despite their capabilities, \nthese models are susceptible to adversarial attacks that subtly modify the model inputs, resulting in incorrect outputs or predictions. Previous methods of robustness evaluation for CodePTMs primarily stem from a textual perspective, without explicitly taking into account the structure of the code. Furthermore, prior studies fail to encompass a broad enough spectrum of tasks and models. In this paper, we propose a set of novel robustness evaluation methods based on the intrinsic structure of the code. Specifically, we first launch adversarial attacks on crucial identifier tokens and sub-tree structures to explore the impact of imperceptible perturbation. Then, we perform global restructuring of the code using different traversal methods for abstract syntax trees, aiming to explore the model's sensitivity to input samples with equivalent information. Moreover, for each scenario, we employ adversarial training methods to explore the possibility of restoring the performance of perturbed models. For both code understanding and generation, our proposed method has demonstrated its effectiveness across a wide range of models and tasks, thereby allowing us to make one step forward in our understanding of the inner mechanisms of CodePTMs.", "keywords": "Neural Code Intelligence;Pre-trained Language Models;Adversarial Attack", "primary_area": "", "supplementary_material": "", "author": "Nuo Chen;Qiushi Sun;Jianing Wang;Ming Gao;Xiaoli Li;Xiang Li", "authorids": "~Nuo_Chen4;~Qiushi_Sun1;~Jianing_Wang4;~Ming_Gao1;~Xiaoli_Li1;~Xiang_Li24", "gender": ";M;M;M;M;M", "homepage": "https://nuojohnchen.github.io/;https://qiushisun.github.io/;http://dase.ecnu.edu.cn/mgao/;https://personal.ntu.edu.sg/xlli/;https://lixiang3776.github.io;https://github.com/wjn1996", "dblp": "135/5622-2.html;247/8469;71/4173-1;l/XiaoliLi.html;40/1491-67.html;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;QgMkYFAAAAAJ;https://scholar.google.com/citations?hl=zh-CN;E3yQKloAAAAJ;JnxxNtsAAAAJ;ccaimI8AAAAJ", "or_profile": "~Nuo_Chen4;~Qiushi_Sun1;~Ming_Gao1;~Xiaoli_Li1;~Xiang_Li24;~Jia-ning_Wang1", "aff": "The Chinese University of Hong Kong, Shenzhen;Institute of infocomm research, A*STAR;East China Normal University;A*STAR;East China Normal University;East China Normal University", "aff_domain": "cuhk.edu.cn;i2r.a-star.edu.sg;ecnu.edu.cn;a-star.edu.sg;ecnu.edu.cn;ecnu.edu.cn", "position": "Researcher;Intern;Full Professor;Principal Researcher;Full Professor;PhD student", "bibtex": "@inproceedings{\nchen2023evaluating,\ntitle={Evaluating and Enhancing the Robustness of Code Pre-trained Models through Structure-Aware Adversarial Samples Generation},\nauthor={Nuo Chen and Qiushi Sun and Jianing Wang and Ming Gao and Xiaoli Li and Xiang Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=46WcPRhRwG}\n}", "github": "", "project": "", "reviewers": "xnBH;BNiS;H95H", "site": "https://openreview.net/forum?id=46WcPRhRwG", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "3;4;4", "reproducibility": "4;3;3", "correctness": "2;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-6563-1215;0000-0002-5207-818X;0000-0002-5603-2680;0000-0002-0762-6562;0009-0003-0142-2483;0000-0001-6006-053X", "linkedin": ";qiushi-sun/;;li-xiaoli-41027ba/;;", "aff_unique_index": "0;1;2;3;2;2", "aff_unique_norm": "Chinese University of Hong Kong;Institute of Infocomm Research;East China Normal University;Agency for Science, Technology and Research", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cuhk.edu.cn;https://www.i2r.a-star.edu.sg;http://www.ecnu.edu.cn;https://www.a-star.edu.sg", "aff_unique_abbr": "CUHK;I2R;ECNU;A*STAR", "aff_campus_unique_index": "0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;1;0;1;0;0", "aff_country_unique": "China;Singapore" }, { "id": "49HfhYU9S6", "title": "Assessing Step-by-Step Reasoning against Lexical Negation: A Case Study on Syllogism", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Large language models (LLMs) take advantage of step-by-step reasoning instructions, e.g., chain-of-thought (CoT) prompting. \nBuilding on this, their ability to perform CoT-style reasoning robustly is of interest from a probing perspective. \nIn this study, we inspect the step-by-step reasoning ability of LLMs with a focus on negation, which is a core linguistic phenomenon that is difficult to process. \nIn particular, we introduce several controlled settings (e.g., reasoning in case of fictional entities) to evaluate the logical reasoning abilities of the models. We observed that dozens of modern LLMs were not robust against lexical negation (e.g., plausible$\\rightarrow$implausible) when performing CoT-style reasoning, and the results highlight unique limitations in each LLM family.", "keywords": "Negation;Prompting;Reasoning;Language model;Model Analysis;Probing;Chain-of-thought", "primary_area": "", "supplementary_material": "", "author": "Mengyu Ye;Tatsuki Kuribayashi;Jun Suzuki;Goro Kobayashi;Hiroaki Funayama", "authorids": "~Mengyu_Ye2;~Tatsuki_Kuribayashi1;~Jun_Suzuki1;~Goro_Kobayashi1;~Hiroaki_Funayama1", "gender": ";M;M;M;M", "homepage": ";https://kuribayashi4.github.io/;https://www.nlp.ecei.tohoku.ac.jp/~jun/;https://sites.google.com/view/goro-kobayashi;https://hiro819.github.io/", "dblp": ";228/5787;78/6923;;268/2319", "google_scholar": ";https://scholar.google.co.jp/citations?user=-bqmkaAAAAAJ;https://scholar.google.co.jp/citations?user=XO5CrIsAAAAJ;AT-ybe0AAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Mengyu_Ye2;~Tatsuki_Kuribayashi1;~Jun_Suzuki1;~Goro_Kobayashi1;~Hiroaki_Funayama1", "aff": ";Tohoku University;Tohoku University;Tohoku University;Tohoku University", "aff_domain": ";tohoku.ac.jp;tohoku.ac.jp;tohoku.ac.jp;tohoku.ac.jp", "position": ";PhD student;Full Professor;PhD student;PhD student", "bibtex": "@inproceedings{\nye2023assessing,\ntitle={Assessing Step-by-Step Reasoning against Lexical Negation: A Case Study on Syllogism},\nauthor={Mengyu Ye and Tatsuki Kuribayashi and Jun Suzuki and Goro Kobayashi and Hiroaki Funayama},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=49HfhYU9S6}\n}", "github": "", "project": "", "reviewers": "RddC;a7Sc;YrJj", "site": "https://openreview.net/forum?id=49HfhYU9S6", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;4", "excitement": "4;4;4", "reproducibility": "5;4;5", "correctness": "4;5;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.666666666666667, "correctness_avg": 4.333333333333333, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-2108-1340;;", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tohoku University", "aff_unique_dep": "", "aff_unique_url": "https://www.tohoku.ac.jp", "aff_unique_abbr": "Tohoku U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Japan" }, { "id": "4AcHxGE6M4", "title": "CP-BCS: Binary Code Summarization Guided by Control Flow Graph and Pseudo Code", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Automatically generating function summaries for binaries is an extremely valuable but challenging task, since it involves translating the execution behavior and semantics of the low-level language (assembly code) into human-readable natural language. However, most current works on understanding assembly code are oriented towards generating function names, which involve numerous abbreviations that make them still confusing. To bridge this gap, we focus on generating complete summaries for binary functions, especially for stripped binary (no symbol table and debug information in reality). To fully exploit the semantics of assembly code, we present a control flow graph and pseudo code guided binary code summarization framework called CP-BCS. CP-BCS utilizes a bidirectional instruction-level control flow graph and pseudo code that incorporates expert knowledge to learn the comprehensive binary function execution behavior and logic semantics. We evaluate CP-BCS on 3 different binary optimization levels (O1, O2, and O3) for 3 different computer architectures (X86, X64, and ARM). The evaluation results demonstrate CP-BCS is superior and significantly improves the efficiency of reverse engineering.", "keywords": "binary code summarization", "primary_area": "", "supplementary_material": "", "author": "Tong Ye;Lingfei Wu;Tengfei Ma;Xuhong Zhang;Yangkai Du;Peiyu Liu;Shouling Ji;Wenhai Wang", "authorids": "~Tong_Ye1;~Lingfei_Wu1;~Tengfei_Ma1;~Xuhong_Zhang1;~Yangkai_Du1;~Peiyu_Liu2;~Shouling_Ji1;~Wenhai_Wang3", "gender": "M;;M;M;M;M;M;M", "homepage": "https://tongye98.github.io/;https://sites.google.com/view/teddy-lfwu/;https://sites.google.com/site/matf0123/;https://person.zju.edu.cn/zhangxuhong;;https://nesa.zju.edu.cn/webpage/people.html;https://nesa.zju.edu.cn/;https://person.zju.edu.cn/wangweihai", "dblp": ";27/9060;94/9023-1;139/6932-2.html;260/9581;85/670-3.html;07/8388;", "google_scholar": "Qobw5GsAAAAJ;https://scholar.google.com/citations?hl=en;9OvNakkAAAAJ;bWLpm3sAAAAJ;2dx1fzQAAAAJ;;https://scholar.google.com.vn/citations?hl=en;", "or_profile": "~Tong_Ye1;~Lingfei_Wu1;~Tengfei_Ma1;~Xuhong_Zhang1;~Yangkai_Du1;~Peiyu_Liu2;~Shouling_Ji1;~Wenhai_Wang3", "aff": "Zhejiang University;Pinterest;International Business Machines;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;pinterest.com;ibm.com;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "PhD student;Engineering Manager;Researcher;Full Professor;MS student;Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\nye2023cpbcs,\ntitle={{CP}-{BCS}: Binary Code Summarization Guided by Control Flow Graph and Pseudo Code},\nauthor={Tong Ye and Lingfei Wu and Tengfei Ma and Xuhong Zhang and Yangkai Du and Peiyu Liu and Shouling Ji and Wenhai Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=4AcHxGE6M4}\n}", "github": "", "project": "", "reviewers": "bpsr;9guV;hYNs", "site": "https://openreview.net/forum?id=4AcHxGE6M4", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "2;3;4", "reproducibility": "4;3;3", "correctness": "4;3;3", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-1086-529X;;;0000-0001-7793-7633;0000-0003-4268-372X;", "linkedin": ";;;xuhongzhang/;;;;", "aff_unique_index": "0;1;2;0;0;0;0;0", "aff_unique_norm": "Zhejiang University;Pinterest;International Business Machines Corporation", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;https://www.pinterest.com;https://www.ibm.com", "aff_unique_abbr": "ZJU;Pinterest;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0;0;0;0", "aff_country_unique": "China;United States" }, { "id": "4AiERjB5JD", "title": "Prefix-Tuning Based Unsupervised Text Style Transfer", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Unsupervised text style transfer aims at training a generative model that can alter the style of the input sentence while preserving its content without using any parallel data. \nIn this paper, we employ powerful pre-trained large language models and present a new prefix-tuning-based method for unsupervised text style transfer. \nWe construct three different kinds of prefixes, i.e., shared prefix, style prefix, and content prefix, to encode task-specific information, target style, and the content information of the input sentence, respectively.\nCompared to embeddings used by previous works, the proposed prefixes can provide richer information for the model.\nFurthermore, we adopt a recursive way of using language models in the process of style transfer.\nThis strategy provides a more effective way for the interactions between the input sentence and GPT-2, helps the model construct more informative prefixes, and thus, helps improve the performance.\nEvaluations on the well-known datasets show that our method outperforms the state-of-the-art baselines. Results, analysis of ablation studies, and subjective evaluations from humans are also provided for a deeper understanding of the proposed method.", "keywords": "Unsupervised text style transfer;Prefix-Tuning", "primary_area": "", "supplementary_material": "", "author": "Huiyu Mai;Wenhao Jiang;Zhi-Hong Deng", "authorids": "~Huiyu_Mai1;~Wenhao_Jiang1;~Zhi-Hong_Deng1", "gender": "M;M;M", "homepage": ";https://cswhjiang.github.io/;http://www.cis.pku.edu.cn/jzyg/szdw/dzh.htm", "dblp": "302/7540.html;;161/4814-1", "google_scholar": ";rAlT64IAAAAJ;https://scholar.google.com.tw/citations?user=tRoAxlsAAAAJ", "or_profile": "~Huiyu_Mai1;~Wenhao_Jiang1;~Zhi-Hong_Deng1", "aff": "Peking University;Tencent AI Lab;Peking University", "aff_domain": "pku.edu.cn;tencent.com;pku.edu.cn", "position": "MS student;Senior Research Scientist;Full Professor", "bibtex": "@inproceedings{\nmai2023prefixtuning,\ntitle={Prefix-Tuning Based Unsupervised Text Style Transfer},\nauthor={Huiyu Mai and Wenhao Jiang and Zhi-Hong Deng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=4AiERjB5JD}\n}", "github": "", "project": "", "reviewers": "cYMW;p4xo;2nkN", "site": "https://openreview.net/forum?id=4AiERjB5JD", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;3;3", "excitement": "2;4;3", "reproducibility": "4;3;3", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-6625-3858;;0000-0002-0263-8142", "linkedin": ";;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Peking University;Tencent", "aff_unique_dep": ";Tencent AI Lab", "aff_unique_url": "http://www.pku.edu.cn;https://ai.tencent.com", "aff_unique_abbr": "Peking U;Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "4EXbwN9Ezw", "title": "A Boundary Offset Prediction Network for Named Entity Recognition", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Named entity recognition (NER) is a fundamental task in natural language processing that aims to identify and classify named entities in text. However, span-based methods for NER typically assign entity types to text spans, resulting in an imbalanced sample space and neglecting the connections between non-entity and entity spans. To address these issues, we propose a novel approach for NER, named the Boundary Offset Prediction Network (BOPN), which predicts the boundary offsets between candidate spans and their nearest entity spans. By leveraging the guiding semantics of boundary offsets, BOPN establishes connections between non-entity and entity spans, enabling non-entity spans to function as additional positive samples for entity detection. Furthermore, our method integrates entity type and span representations to generate type-aware boundary offsets instead of using entity types as detection targets. We conduct experiments on eight widely-used NER datasets, and the results demonstrate that our proposed BOPN outperforms previous state-of-the-art methods.", "keywords": "named entity recognition;span-based methods;boundary connections;boundary offset prediction network;type-related boundary offsets", "primary_area": "", "supplementary_material": "", "author": "Minghao Tang;Yongquan He;Yongxiu Xu;Hongbo Xu;Wenyuan Zhang;Yang Lin", "authorids": "~Minghao_Tang1;~Yongquan_He1;~Yongxiu_Xu1;~Hongbo_Xu3;~Wenyuan_Zhang2;~Yang_Lin4", "gender": "M;M;F;M;;M", "homepage": ";;;https://people.ucas.ac.cn/~xuhongbo;;", "dblp": "235/0717;276/5095.html;294/1202;https://dblp.org/search?q=Hongbo+Xu+Tingwen+Liu;;", "google_scholar": "Cx8GTawAAAAJ;https://scholar.google.com.hk/citations?user=NMvcXrYAAAAJ;https://scholar.google.ca/citations?hl=zh-CN;;;zZDoBQoAAAAJ", "or_profile": "~Minghao_Tang1;~Yongquan_He1;~Yongxiu_Xu1;~Hongbo_Xu3;~Wenyuan_Zhang2;~Yang_Lin4", "aff": "Institute of Information Engineering,Chinese Academy of Sciences;Meituan;Institute of Information Engineering, Chinese Academy of Sciences;Institute of Information Engineering;;", "aff_domain": "iie.ac.cn;meituan.com;iie.edu.cn;iie.ac.cn;;", "position": "PhD student;Researcher;Assistant Professor;Full Professor;;", "bibtex": "@inproceedings{\ntang2023a,\ntitle={A Boundary Offset Prediction Network for Named Entity Recognition},\nauthor={Minghao Tang and Yongquan He and Yongxiu Xu and Hongbo Xu and Wenyuan Zhang and Yang Lin},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=4EXbwN9Ezw}\n}", "github": "", "project": "", "reviewers": "TfVb;no4M;GY7H", "site": "https://openreview.net/forum?id=4EXbwN9Ezw", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "2;4;3", "reproducibility": "3;4;4", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-2926-3907;0000-0002-3079-8530;;0000-0002-0258-7840;;", "linkedin": ";;;;;", "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Chinese Academy of Sciences;Meituan;Institute of Information Engineering", "aff_unique_dep": "Institute of Information Engineering;;", "aff_unique_url": "http://www.cas.cn;https://www.meituan.com;", "aff_unique_abbr": "CAS;Meituan;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China;" }, { "id": "4FDx4KMZnu", "title": "Mixture of Soft Prompts for Controllable Data Generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models (LLMs) effectively generate fluent text when the target output follows natural language patterns. However, structured prediction tasks confine the output format to a limited ontology, causing even very large models to struggle since they were never trained with such restrictions in mind. The difficulty of using LLMs for direct prediction is exacerbated in few-shot learning scenarios, which commonly arise due to domain shift and resource limitations. We flip the problem on its head by leveraging the LLM as a tool for data augmentation rather than direct prediction. Our proposed Mixture of Soft Prompts (MSP) serves as a parameter-efficient procedure for generating multi-attribute data in a controlled manner. Denoising mechanisms are further applied to improve the quality of synthesized data. Automatic metrics show our method is capable of producing diverse and natural text, while preserving label semantics. Moreover, MSP achieves state-of-the-art results on three benchmarks when compared against strong baselines. Our method offers an alternate data-centric approach for applying LLMs to complex prediction tasks.", "keywords": "data augmentation;parameter efficient training;few-shot learning;structured prediction", "primary_area": "", "supplementary_material": "", "author": "Derek Chen;Celine Lee;Yunan Lu;Domenic Rosati;Zhou Yu", "authorids": "~Derek_Chen1;~Celine_Lee1;~Yunan_Lu2;~Domenic_Rosati2;~Zhou_Yu1", "gender": "M;F;F;;F", "homepage": "https://www.morethanoneturn.com;https://celine-lee.github.io;;https://domenicrosati.github.io/;http://www.cs.columbia.edu/~zhouyu/", "dblp": "225/7737;259/9736;;204/8129;83/3205", "google_scholar": "Xmv0998AAAAJ;F27S1VcAAAAJ;S0vGr-gAAAAJ;80aJAKYAAAAJ;https://scholar.google.com.tw/citations?user=jee2Dy0AAAAJ", "or_profile": "~Derek_Chen1;~Celine_Lee1;~Yunan_Lu2;~Domenic_Rosati2;~Zhou_Yu1", "aff": "Columbia University;Cornell University;;scite.ai;Columbia University", "aff_domain": "columbia.edu;cornell.edu;;scite.ai;columbia.edu", "position": "Researcher;PhD student;;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nchen2023mixture,\ntitle={Mixture of Soft Prompts for Controllable Data Generation},\nauthor={Derek Chen and Celine Lee and Yunan Lu and Domenic Rosati and Zhou Yu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=4FDx4KMZnu}\n}", "github": "", "project": "", "reviewers": "rBGC;uw25;4Rzr", "site": "https://openreview.net/forum?id=4FDx4KMZnu", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;3;3", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-8436-5124;;0000-0003-2666-7615;", "linkedin": "derekchen14/;;;https://linkedin.com/in/cinemod-itasor;", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Columbia University;Cornell University;scite.ai", "aff_unique_dep": ";;", "aff_unique_url": "https://www.columbia.edu;https://www.cornell.edu;https://www.scite.ai", "aff_unique_abbr": "Columbia;Cornell;scite.ai", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "4Ggw1DsgRQ", "title": "Enhancing Computation Efficiency in Large Language Models through Weight and Activation Quantization", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large Language Models (LLMs) are proficient in natural language processing tasks, but their deployment is often restricted by extensive parameter sizes and computational demands. This paper focuses on post-training quantization (PTQ) in LLMs, specifically 4-bit weight and 8-bit activation (W4A8) quantization, to enhance computational efficiency\u2014a topic less explored compared to weight-only quantization. We present two innovative techniques: activation-quantization-aware scaling (AQAS) and sequence-length-aware calibration (SLAC) to enhance PTQ by considering the combined effects on weights and activations and aligning calibration sequence lengths to target tasks. Moreover, we introduce dINT, a hybrid data format combining integer and denormal representations, to address the underflow issue in W4A8 quantization, where small values are rounded to zero. Through rigorous evaluations of LLMs, including OPT and LLaMA, we demonstrate that our techniques significantly boost task accuracies to levels comparable with full-precision models. By developing arithmetic units compatible with dINT, we further confirm that our methods yield a 2$\\times$ hardware efficiency improvement compared to 8-bit integer MAC unit.", "keywords": "Post-training quantization;LLM;Transformers;Numerical format", "primary_area": "", "supplementary_material": "", "author": "Janghwan Lee;Minsoo Kim;Seungcheol Baek;Seok Joong Hwang;Wonyong Sung;Jungwook Choi", "authorids": "~Janghwan_Lee2;~Minsoo_Kim2;~Seungcheol_Baek1;~Seok_Joong_Hwang1;~Wonyong_Sung1;~Jungwook_Choi1", "gender": "M;M;M;M;;M", "homepage": "https://superdocker.github.io;https://marsjacobs.github.io;;;;", "dblp": "27/10012;;;;22/1975;97/4140", "google_scholar": "_SJm7y4AAAAJ;https://scholar.google.co.kr/citations?hl=ko;https://scholar.google.com/citations?hl=en;NzS_Io0AAAAJ;https://scholar.google.co.kr/citations?user=1IfNFz4AAAAJ;YPT98zwAAAAJ", "or_profile": "~Janghwan_Lee2;~Minsoo_Kim2;~Seungcheol_Baek1;~Seok_Joong_Hwang1;~Wonyong_Sung1;~Jungwook_Choi1", "aff": "Hanyang University;Hanyang University;SAPEON;SAPEON Korea;Seoul National University;Hanyang University", "aff_domain": "hanyang.ac.kr;hanyang.ac.kr;sapeon.com;sapeon.com;snu.ac.kr;hanyang.ac.kr", "position": "PhD student;PhD student;Principal Researcher;Principal Researcher;Emeritus;Assistant Professor", "bibtex": "@inproceedings{\nlee2023enhancing,\ntitle={Enhancing Computation Efficiency in Large Language Models through Weight and Activation Quantization},\nauthor={Janghwan Lee and Minsoo Kim and Seungcheol Baek and Seok Joong Hwang and Wonyong Sung and Jungwook Choi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=4Ggw1DsgRQ}\n}", "github": "", "project": "", "reviewers": "XAcF;MJRd;WZ3Z", "site": "https://openreview.net/forum?id=4Ggw1DsgRQ", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "3;3;4", "reproducibility": "3;3;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0001-8801-210X;", "linkedin": ";minsoo-kim-37268a1b0/;seungcheol-baek-11b80b1b;seok-joong-hwang-236847b1/;;jungwook-choi-5854996b/", "aff_unique_index": "0;0;1;1;2;0", "aff_unique_norm": "Hanyang University;SAPEON;Seoul National University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.hanyang.ac.kr;https://www.sapeon.com;https://www.snu.ac.kr", "aff_unique_abbr": "HYU;;SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "4GmujJSuq0", "title": "What to Read in a Contract? Party-Specific Summarization of Legal Obligations, Entitlements, and Prohibitions", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Reviewing and comprehending key obligations, entitlements, and prohibitions in legal contracts can be a tedious task due to their length and domain-specificity. Furthermore, the key rights and duties requiring review vary for each contracting party. In this work, we propose a new task of \\textit{party-specific} extractive summarization for legal contracts to facilitate faster reviewing and improved comprehension of rights and duties. To facilitate this, we curate a dataset comprising of party-specific pairwise importance comparisons annotated by legal experts, covering ~293K sentence pairs that include obligations, entitlements, and prohibitions extracted from lease agreements. Using this dataset, we train a pairwise importance ranker and propose a pipeline-based extractive summarization system that generates a party-specific contract summary. We establish the need for incorporating domain-specific notions of importance during summarization by comparing our system against various baselines using both automatic and human evaluation methods.", "keywords": "Legal NLP;Rights and Obligation Extraction;Summarization;Importance Ranking", "primary_area": "", "supplementary_material": "", "author": "Abhilasha Sancheti;Aparna Garimella;Balaji Vasan Srinivasan;Rachel Rudinger", "authorids": "~Abhilasha_Sancheti1;~Aparna_Garimella1;~Balaji_Vasan_Srinivasan2;~Rachel_Rudinger1", "gender": "F;F;M;F", "homepage": "https://abhilashasancheti.github.io/;https://research.adobe.com/person/aparna-garimella/;https://research.adobe.com/person/balaji-vasan-srinivasan/;https://rudinger.github.io/", "dblp": "210/2594;183/5034.html;31/4182.html;136/8740", "google_scholar": "B0Cx2_kAAAAJ;Q4PJyXIAAAAJ;https://scholar.google.co.in/citations?user=2c_x00gAAAAJ;QKCHaHUAAAAJ", "or_profile": "~Abhilasha_Sancheti1;~Aparna_Garimella1;~Balaji_Vasan_Srinivasan2;~Rachel_Rudinger1", "aff": "University of Maryland, College Park;Adobe Research;Adobe Research;University of Maryland, College Park", "aff_domain": "umd.edu;adobe.com;adobe.com;umd.edu", "position": "PhD student;Researcher;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nsancheti2023what,\ntitle={What to Read in a Contract? Party-Specific Summarization of Legal Obligations, Entitlements, and Prohibitions},\nauthor={Abhilasha Sancheti and Aparna Garimella and Balaji Vasan Srinivasan and Rachel Rudinger},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=4GmujJSuq0}\n}", "github": "", "project": "", "reviewers": "EMMa;CFoT;at5K", "site": "https://openreview.net/forum?id=4GmujJSuq0", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;2;3", "excitement": "3;4;4", "reproducibility": "5;2;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-4426-7407;", "linkedin": ";aparna-garimella-639738110/;balajivasan/?originalSubdomain=in;", "aff_unique_index": "0;1;1;0", "aff_unique_norm": "University of Maryland;Adobe", "aff_unique_dep": ";Adobe Research", "aff_unique_url": "https://www/umd.edu;https://research.adobe.com", "aff_unique_abbr": "UMD;Adobe", "aff_campus_unique_index": "0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "4IubiozIFH", "title": "Exploring the Effectiveness of Multi-Lingual Commonsense Knowledge-Aware Open-Domain Dialogue Response Generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Prior works have shown the promising results of commonsense knowledge-aware models in improving informativeness while reducing the hallucination issue. Nonetheless, prior works often can only use monolingual knowledge whose language is consistent with the dialogue context. Except for a few high-resource languages, such as English and Chinese, most languages suffer from insufficient knowledge issues, especially minority languages. To this end, this work proposes a new task, Multi-Lingual Commonsense Knowledge-Aware Response Generation (MCKRG), which tries to use commonsense knowledge in other languages to enhance the current dialogue generation. Then, we construct a MCKRG dataset MCK-Dialog of seven languages with multiple alignment methods. Finally, we verify the effectiveness of using multi-lingual commonsense knowledge with a proposed MCK-T5 model. Extensive experimental results demonstrate the great potential of using multi-lingual commonsense knowledge in high-resource and low-resource languages. To the best of our knowledge, this work is the first to explore Multi-Lingual Commonsense Knowledge-Aware Response Generation.", "keywords": "response generation;dialogue system;commonsense knowledge;multi-lingual", "primary_area": "", "supplementary_material": "", "author": "Sixing Wu;Jiong Yu;Tianshi Che;Yang Zhou;Wei Zhou", "authorids": "~Sixing_Wu2;~Jiong_Yu2;~Tianshi_Che1;~Yang_Zhou4;~Wei_Zhou9", "gender": "M;M;M;;M", "homepage": ";;https://auburn.edu;http://eng.auburn.edu/users/yangzhou/;", "dblp": "188/3013;01/6743;296/8746;07/4580-1;69/5011-11", "google_scholar": ";;jkxujjEAAAAJ;yvE8Po0AAAAJ;", "or_profile": "~Sixing_Wu2;~Jiong_Yu2;~Tianshi_Che1;~Yang_Zhou4;~Wei_Zhou9", "aff": "Yunnan University;Yunnan University;Auburn University;Auburn University;Yunnan University", "aff_domain": "ynu.edu.cn;ynu.edu.cn;auburn.edu;auburn.edu;ynu.edu.cn", "position": "Lecturer;MS student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nwu2023exploring,\ntitle={Exploring the Effectiveness of Multi-Lingual Commonsense Knowledge-Aware Open-Domain Dialogue Response Generation},\nauthor={Sixing Wu and Jiong Yu and Tianshi Che and Yang Zhou and Wei Zhou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=4IubiozIFH}\n}", "github": "", "project": "", "reviewers": "z5FY;wvbY;cLnM", "site": "https://openreview.net/forum?id=4IubiozIFH", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;5", "excitement": "3;3;3", "reproducibility": "4;4;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 6, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0003-2381-4912;;0000-0001-7839-4933;", "linkedin": ";;tianshi-che-5169891b7/;;", "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "Yunnan University;Auburn University", "aff_unique_dep": ";", "aff_unique_url": "http://www.ynu.edu.cn;https://www.auburn.edu", "aff_unique_abbr": "YNU;Auburn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "China;United States" }, { "id": "4Jnjap7NSx", "title": "Distance-Based Propagation for Efficient Knowledge Graph Reasoning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Knowledge graph completion (KGC) aims to predict unseen edges in knowledge graphs (KGs), resulting in the discovery of new facts. A new class of methods have been proposed to tackle this problem by aggregating path information. These methods have shown tremendous ability in the task of KGC. However they are plagued by efficiency issues. Though there are a few recent attempts to address this through learnable path pruning, they often sacrifice the performance to gain efficiency. In this work, we identify two intrinsic limitations of these methods that affect the efficiency and representation quality. To address the limitations, we introduce a new method, TAGNet, which is able to efficiently propagate information. This is achieved by only aggregating paths in a fixed window for each source-target pair. We demonstrate that the complexity of TAGNet is independent of the number of layers. Extensive experiments demonstrate that TAGNet can cut down on the number of propagated messages by as much as $90$% while achieving competitive performance on multiple KG datasets.", "keywords": "knowledge graphs;link prediction;graph neural network", "primary_area": "", "supplementary_material": "", "author": "Harry Shomer;Yao Ma;Juanhui Li;Bo Wu;Charu C. Aggarwal;Jiliang Tang", "authorids": "~Harry_Shomer1;~Yao_Ma3;~Juanhui_Li1;~Bo_Wu1;~Charu_C._Aggarwal2;~Jiliang_Tang1", "gender": ";M;F;M;M;M", "homepage": "https://www.cse.msu.edu/~shomerha/;https://yaoma24.github.io/;https://juanhui28.github.io/;https://inside.mines.edu/~bwu/;http://www.charuaggarwal.net;https://www.cse.msu.edu/~tangjili/", "dblp": ";212/7871.html;313/9527.html;47/6534-2.html;a/CharuCAggarwal;64/10812", "google_scholar": "_6eE2vsAAAAJ;wf9TTOIAAAAJ;5J0dd-sAAAAJ;g6Wdt1YAAAAJ;x_wsduUAAAAJ;WtzKMWAAAAAJ", "or_profile": "~Harry_Shomer1;~Yao_Ma3;~Juanhui_Li1;~Bo_Wu1;~Charu_C._Aggarwal2;~Jiliang_Tang1", "aff": "Michigan State University;New Jersey Institute of Technology;Michigan State University;Colorado School of Mines;International Business Machines;Michigan State University", "aff_domain": "msu.edu;njit.edu;msu.edu;mines.edu;ibm.com;msu.edu", "position": "PhD student;Assistant Professor;PhD student;Associate Professor;Distinguished Research Staff Member;Full Professor", "bibtex": "@inproceedings{\nshomer2023distancebased,\ntitle={Distance-Based Propagation for Efficient Knowledge Graph Reasoning},\nauthor={Harry Shomer and Yao Ma and Juanhui Li and Bo Wu and Charu C. Aggarwal and Jiliang Tang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=4Jnjap7NSx}\n}", "github": "", "project": "", "reviewers": "NkaF;VWSR;jKiL", "site": "https://openreview.net/forum?id=4Jnjap7NSx", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "3;3;3", "reproducibility": "3;4;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-5081-1870;;0000-0003-4909-1778;0009-0001-1696-4272;0000-0003-2579-7581;0000-0001-7125-3898", "linkedin": ";;;bo-wu-47674734/;;", "aff_unique_index": "0;1;0;2;3;0", "aff_unique_norm": "Michigan State University;New Jersey Institute of Technology;Colorado School of Mines;International Business Machines Corporation", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.msu.edu;https://www.njit.edu;https://www.mines.edu;https://www.ibm.com", "aff_unique_abbr": "MSU;NJIT;CSM;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "4JpybEffzH", "title": "Non-Autoregressive Document-Level Machine Translation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Non-autoregressive translation (NAT) models achieve comparable performance and superior speed compared to auto-regressive translation (AT) models in the context of sentence-level machine translation (MT). However, their abilities are unexplored in document-level MT, hindering their usage in real scenarios. \nIn this paper, we conduct a comprehensive examination of typical NAT models in the context of document-level MT and further propose a simple but effective design of sentence alignment between source and target.\nExperiments show that NAT models achieve high acceleration on documents, and sentence alignment significantly enhances their performance. \n\nHowever, current NAT models still have a significant performance gap compared to their AT counterparts. Further investigation reveals that NAT models suffer more from the multi-modality and misalignment issues in the context of document-level MT, and current NAT models struggle with exploiting document context and handling discourse phenomena.\nWe delve into these challenges and provide our code at \\url{https://github.com/baoguangsheng/nat-on-doc}.", "keywords": "NAT;Document-level MT;Machine Translation", "primary_area": "", "supplementary_material": "", "author": "Guangsheng Bao;Zhiyang Teng;Hao Zhou;Jianhao Yan;Yue Zhang", "authorids": "~Guangsheng_Bao1;~Zhiyang_Teng1;~Hao_Zhou5;~Jianhao_Yan1;~Yue_Zhang7", "gender": "M;M;M;M;M", "homepage": "https://baoguangsheng.github.io/;https://zeeeyang.github.io;https://zhouh.github.io/;;http://frcchang.github.io", "dblp": "276/0515;136/8660;63/778-12;242/4255;47/722-4", "google_scholar": "cxPJx2kAAAAJ;9wOJrf8AAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=pnKLj_wAAAAJ;", "or_profile": "~Guangsheng_Bao1;~Zhiyang_Teng1;~Hao_Zhou5;~Jianhao_Yan1;~Yue_Zhang7", "aff": "Westlake University;Nanyang Technological University;Tsinghua University;Westlake University;Westlake University", "aff_domain": "westlake.edu.cn;ntu.edu.sg;tsinghua.edu.cn;westlake.edu.cn;westlake.edu.cn", "position": "PhD student;Researcher;Associate Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nbao2023nonautoregressive,\ntitle={Non-Autoregressive Document-Level Machine Translation},\nauthor={Guangsheng Bao and Zhiyang Teng and Hao Zhou and Jianhao Yan and Yue Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=4JpybEffzH}\n}", "github": "", "project": "", "reviewers": "HkNb;3tvA;NAdw", "site": "https://openreview.net/forum?id=4JpybEffzH", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;3", "excitement": "4;3;4", "reproducibility": "3;4;3", "correctness": "4;3;4", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-3815-3988;;;;0000-0002-5214-2268", "linkedin": ";;;;", "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Westlake University;Nanyang Technological University;Tsinghua University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.westlake.edu.cn;https://www.ntu.edu.sg;https://www.tsinghua.edu.cn", "aff_unique_abbr": "WU;NTU;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;Singapore" }, { "id": "4KRiWsfOwn", "title": "Merging Experts into One: Improving Computational Efficiency of Mixture of Experts", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Scaling the size of language models usually leads to remarkable advancements in NLP tasks. But it often comes with a price of growing computational cost. Although a sparse Mixture of Experts (MoE) can reduce the cost by activating a small subset of parameters (e.g., one expert) for each input, its computation escalates significantly if increasing the number of activated experts, limiting its practical utility. Can we retain the advantages of adding more experts without substantially increasing the computational costs? In this paper, we first demonstrate the superiority of selecting multiple experts and then propose a computation-efficient approach called \\textbf{\\texttt{Merging Experts into One}} (MEO), which reduces the computation cost to that of a single expert. Extensive experiments show that MEO significantly improves computational efficiency, e.g., FLOPS drops from 72.0G of vanilla MoE to 28.6G (MEO). Moreover, we propose a token-level attention block that further enhances the efficiency and performance of token-level MEO, e.g., 83.3\\% (MEO) vs. 82.6\\% (vanilla MoE) average score on the GLUE benchmark. Our code will be released upon acceptance. Code will be released at: \\url{https://github.com/Shwai-He/MEO}.", "keywords": "Mixture of Experts;Computational Efficiency", "primary_area": "", "supplementary_material": "", "author": "Shwai He;Run-Ze Fan;Liang Ding;Li Shen;Tianyi Zhou;Dacheng Tao", "authorids": "~Shwai_He1;~Run-Ze_Fan1;~Liang_Ding3;~Li_Shen1;~Tianyi_Zhou1;~Dacheng_Tao1", "gender": ";M;M;M;M;", "homepage": ";https://rzfan525.github.io/;http://liamding.cc/;https://sites.google.com/site/mathshenli/home;https://tianyizhou.github.io/;", "dblp": ";355/5702;88/3340-6.html;91/3680-8;88/8205-1;", "google_scholar": ";https://scholar.google.com.hk/citations?user=mhot7AUAAAAJ;lFCLvOAAAAAJ;yVhgENIAAAAJ;OKvgizMAAAAJ;", "or_profile": "~Shwai_He1;~Run-Ze_Fan1;~Liang_Ding3;~Li_Shen1;~Tianyi_Zhou1;~Dacheng_Tao1", "aff": ";University of Chinese Academy of Sciences;JD Explore Academy, JD.com Inc.;JD Explore Academy;University of Maryland, College Park;", "aff_domain": ";mails.ucas.ac.cn;jd.com;jd.com;umd.edu;", "position": ";MS student;Research Scientist;Researcher;Assistant Professor;", "bibtex": "@inproceedings{\nhe2023merging,\ntitle={Merging Experts into One: Improving Computational Efficiency of Mixture of Experts},\nauthor={Shwai He and Run-Ze Fan and Liang Ding and Li Shen and Tianyi Zhou and Dacheng Tao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=4KRiWsfOwn}\n}", "github": "", "project": "", "reviewers": "92Jb;tZMq;6rLC", "site": "https://openreview.net/forum?id=4KRiWsfOwn", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;4;3", "excitement": "4;3;2", "reproducibility": "4;4;3", "correctness": "4;4;2", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-8505-7756;;;0000-0001-5348-0632;", "linkedin": ";;;;tianyizhou;", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Chinese Academy of Sciences;JD.com Inc.;JD;University of Maryland", "aff_unique_dep": ";JD Explore Academy;JD Explore Academy;", "aff_unique_url": "http://www.ucas.ac.cn;https://www.jd.com;;https://www/umd.edu", "aff_unique_abbr": "UCAS;JD.com;;UMD", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;0;2", "aff_country_unique": "China;;United States" }, { "id": "4M4U3uC3Iy", "title": "ChatCoT: Tool-Augmented Chain-of-Thought Reasoning on Chat-based Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Although large language models (LLMs) have achieved excellent performance in a variety of evaluation benchmarks, they still struggle in complex reasoning tasks which require specific knowledge and multi-hop reasoning.\nTo improve the reasoning abilities, we propose $\\textbf{ChatCoT}$, a tool-augmented chain-of-thought reasoning framework for chat-based LLMs ($\\textit{e.g.,}$ ChatGPT).\nIn ChatCoT, we model the chain-of-thought~(CoT) reasoning as multi-turn conversations, to utilize tools in a more natural way through chatting.\nAt each turn, LLMs can either interact with tools or perform the reasoning. \nOur approach can effectively leverage the multi-turn conversation ability of chat-based LLMs, and integrate the thought chain following and tools manipulation in a unified way. \nSpecially, we initialize the early turns of the conversation by the knowledge about tools, tasks, and reasoning format, and propose an iterative $\\textit{tool-augmented reasoning}$ step to perform step-by-step tool-augmented reasoning.\nThe experiment results on two complex reasoning datasets (MATH and HotpotQA) have shown the effectiveness of ChatCoT on complex reasoning tasks, achieving a 7.9\\% relative improvement over the state-of-the-art baseline.", "keywords": "large language models;complex reasoning;chain-of-thought", "primary_area": "", "supplementary_material": "", "author": "Zhipeng Chen;Kun Zhou;Beichen Zhang;Zheng Gong;Xin Zhao;Ji-Rong Wen", "authorids": "~Zhipeng_Chen2;~Kun_Zhou2;~Beichen_Zhang1;~Zheng_Gong1;~Xin_Zhao10;~Ji-Rong_Wen1", "gender": "M;M;M;M;M;M", "homepage": ";https://lancelot39.github.io/;https://github.com/ToheartZhang;;https://gsai.ruc.edu.cn/addons/teacher/index/info.html?user_id=5&ruccode=20140041&ln=cn;https://gsai.ruc.edu.cn/english/jrwen", "dblp": ";48/3927-2.html;71/9257;;https://dblp.uni-trier.de/pid/52/8700.html;w/JRWen", "google_scholar": "wMoUf6wAAAAJ;bmRJVjwAAAAJ;;Y4IjeicAAAAJ;JNhNacoAAAAJ;tbxCHJgAAAAJ", "or_profile": "~Zhipeng_Chen2;~Kun_Zhou2;~Beichen_Zhang1;~Zheng_Gong1;~Xin_Zhao10;~Ji-Rong_Wen1", "aff": "Renmin University of China;Renmin University of China;Renmin University of China;Renmin University of China, Tsinghua University;Renmin University of China;Renmin University of China", "aff_domain": "ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;ruc.edu.cn", "position": "Undergrad student;PhD student;MS student;MS student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nchen2023chatcot,\ntitle={ChatCoT: Tool-Augmented Chain-of-Thought Reasoning on Chat-based Large Language Models},\nauthor={Zhipeng Chen and Kun Zhou and Beichen Zhang and Zheng Gong and Xin Zhao and Ji-Rong Wen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=4M4U3uC3Iy}\n}", "github": "", "project": "", "reviewers": "iNwF;GffQ;X74Y", "site": "https://openreview.net/forum?id=4M4U3uC3Iy", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "2;4;2", "reproducibility": "3;3;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-8333-6196;0000-0002-9777-9676", "linkedin": ";;;;;", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Renmin University of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ruc.edu.cn", "aff_unique_abbr": "RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "4MjZNeTCqZ", "title": "UniChart: A Universal Vision-language Pretrained Model for Chart Comprehension and Reasoning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Charts are widely used for data analysis, providing visual representations and insights into complex data. To facilitate chart-based data analysis using natural language, several downstream tasks have been introduced recently such as chart question answering and chart summarization. However, existing methods for these tasks often rely on pretraining on language or vision-language tasks, neglecting the explicit modeling of chart structures (e.g., how chart elements are related to each other). To address this, we first build a large corpus of charts covering diverse topics and visual styles. We then present UniChart, a pretrained model for chart comprehension and reasoning. UniChart encodes the relevant text, data, and visual elements of charts and then uses a chart-grounded text decoder for text generation. We propose several chart-specific pretraining tasks that include: (i) low-level tasks to extract the visual elements (e.g., bars, lines) and data from charts, and (ii) high-level tasks to acquire chart understanding and reasoning skills. Our experiments demonstrate that pretraining UniChart on a large corpus with chart-specific objectives, followed by fine-tuning, yields state-of-the-art performance on four downstream tasks. Moreover, our model exhibits superior generalizability to unseen chart corpus, surpassing previous approaches that lack chart-specific objectives and utilize limited chart resources.", "keywords": "Charts;Pretraining", "primary_area": "", "supplementary_material": "", "author": "Ahmed Masry;Parsa Kavehzadeh;Do Xuan Long;Enamul Hoque;Shafiq Joty", "authorids": "~Ahmed_Masry1;~Parsa_Kavehzadeh1;~Do_Xuan_Long1;~Enamul_Hoque2;~Shafiq_Joty1", "gender": "M;M;M;;M", "homepage": "https://ahmedmasryku.github.io/;;https://dxlong2000.github.io/;https://www.yorku.ca/enamulh/;https://raihanjoty.github.io/", "dblp": "287/6325;299/1001;317/0657.html;71/4476.html;62/2078", "google_scholar": "XqPX5XcAAAAJ;jSn9dykAAAAJ;uZyF8wwAAAAJ;https://scholar.google.ca/citations?user=NySeLFcAAAAJ;hR249csAAAAJ", "or_profile": "~Ahmed_Masry1;~Parsa_Kavehzadeh1;~Do_Xuan_Long1;~Enamul_Hoque2;~Shafiq_Joty1", "aff": "York University;York University;Nanyang Technological University ;York University;SalesForce.com", "aff_domain": "yorku.ca;yorku.ca;e.ntu.edu.sg;yorku.ca;salesforce.com", "position": "Researcher;MS student;Undergrad student;Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\nmasry2023unichart,\ntitle={UniChart: A Universal Vision-language Pretrained Model for Chart Comprehension and Reasoning},\nauthor={Ahmed Masry and Parsa Kavehzadeh and Do Xuan Long and Enamul Hoque and Shafiq Joty},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=4MjZNeTCqZ}\n}", "github": "", "project": "", "reviewers": "hLdJ;8Lrx;bdEC", "site": "https://openreview.net/forum?id=4MjZNeTCqZ", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;3;4", "reproducibility": "3;3;4", "correctness": "3;3;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "ahmed-masry-ku/;;;;", "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "York University;Nanyang Technological University;Salesforce", "aff_unique_dep": ";;", "aff_unique_url": "https://www.yorku.ca;https://www.ntu.edu.sg;https://www.salesforce.com", "aff_unique_abbr": "York U;NTU;Salesforce", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;2", "aff_country_unique": "Canada;Singapore;United States" }, { "id": "4PPT1An0kY", "title": "Self-Ensemble of $N$-best Generation Hypotheses by Lexically Constrained Decoding", "track": "main", "status": "Short Main", "tldr": "", "abstract": "We propose a method that ensembles $N$-best hypotheses to improve natural language generation. \nPrevious studies have achieved notable improvements in generation quality by explicitly reranking $N$-best candidates. \nThese studies assume that there exists a hypothesis of higher quality. \nWe expand the assumption to be more practical as there exist \\emph{partly} higher quality hypotheses in the $N$-best yet they may be imperfect as the entire sentences. \nBy merging these high-quality fragments, we can obtain a higher-quality output than the single-best sentence. \nSpecifically, we first obtain $N$-best hypotheses and conduct token-level quality estimation. \nWe then apply tokens that should or should not be present in the final output as lexical constraints in decoding. \nEmpirical experiments on paraphrase generation, summarisation, and constrained text generation confirm that our method outperforms the strong $N$-best reranking methods.", "keywords": "Reranking;Lexically Constrained Decoding;Generation", "primary_area": "", "supplementary_material": "", "author": "Ryota Miyano;Tomoyuki Kajiwara;Yuki Arase", "authorids": "~Ryota_Miyano1;~Tomoyuki_Kajiwara1;~Yuki_Arase1", "gender": "M;;F", "homepage": "http://www-bigdata.ist.osaka-u.ac.jp/;https://moguranosenshi.sakura.ne.jp/cv.pdf;https://yukiar.github.io/", "dblp": ";140/3305;25/1605", "google_scholar": ";cCAR9aYAAAAJ;uoL1Wr0AAAAJ", "or_profile": "~Ryota_Miyano1;~Tomoyuki_Kajiwara1;~Yuki_Arase1", "aff": "Osaka University, School of Engineering;Ehime University;Osaka University", "aff_domain": "osaka-u.ac.jp;ehime-u.ac.jp;osaka-u.ac.jp", "position": "Undergrad student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nmiyano2023selfensemble,\ntitle={Self-Ensemble of \\$N\\$-best Generation Hypotheses by Lexically Constrained Decoding},\nauthor={Ryota Miyano and Tomoyuki Kajiwara and Yuki Arase},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=4PPT1An0kY}\n}", "github": "", "project": "", "reviewers": "26qx;8gyk;5Mk4", "site": "https://openreview.net/forum?id=4PPT1An0kY", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;4;3", "reproducibility": "4;4;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Osaka University;Ehime University", "aff_unique_dep": "School of Engineering;", "aff_unique_url": "https://www.osaka-u.ac.jp;https://www.ehime-u.ac.jp", "aff_unique_abbr": "Osaka U;Ehime U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "id": "4WrqZlEK3K", "title": "LMGQS: A Large-scale Dataset for Query-focused Summarization", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Query-focused summarization (QFS) aims to extract or generate a summary of an input document that directly answers or is relevant to a given query. The lack of large-scale datasets in the form of documents, queries, and summaries has hindered model development in this area. In contrast, multiple large-scale high-quality datasets for generic summarization exist. We hypothesize that there is a hidden query for each summary sentence in a generic summarization annotation, and we utilize a large-scale pretrained language model to recover it. In this way, we convert four generic summarization benchmarks into a new QFS benchmark dataset, LMGQS, which consists of over 1 million document-query-summary samples. We thoroughly investigate the properties of our proposed dataset and establish baselines with state-of-the-art summarization models. By fine-tuning a language model on LMGQS, we achieve state-of-the-art zero-shot and supervised performance on multiple existing QFS benchmarks, demonstrating the high quality and diversity of LMGQS.", "keywords": "query-focused summarization;large-scale dataset;zero-shot summarization;large language model", "primary_area": "", "supplementary_material": "", "author": "Ruochen Xu;Song Wang;Yang Liu;Shuohang Wang;Yichong Xu;Dan Iter;Pengcheng He;Chenguang Zhu;Michael Zeng", "authorids": "~Ruochen_Xu2;~Song_Wang10;~Yang_Liu50;~Shuohang_Wang1;~Yichong_Xu1;~Dan_Iter1;~Pengcheng_He2;~Chenguang_Zhu1;~Michael_Zeng1", "gender": "M;M;M;M;M;Not Specified;M;M;M", "homepage": "https://xrc10.github.io/;;https://nlp-yang.github.io/;;http://xycking.wixsite.com/yichongxu;https://daniter-cu.github.io/;;;https://www.microsoft.com/en-us/research/people/nzeng/", "dblp": "188/3515;62/3151-12;;173/5469.html;154/6421;63/10689.html;116/8665;48/7536-1.html;232/1866-1.html", "google_scholar": "HTp5S00AAAAJ;ho1SePQAAAAJ;HxTr-CtMdrsC;mN-IO6wAAAAJ;sYza2XwAAAAJ;bg8RrSkAAAAJ;https://scholar.google.com/citations?hl=en;1b2kKWoAAAAJ;", "or_profile": "~Ruochen_Xu2;~Song_Wang10;~Yang_Liu50;~Shuohang_Wang1;~Yichong_Xu1;~Dan_Iter1;~Pengcheng_He2;~Chenguang_Zhu1;~Michael_Zeng1", "aff": "Microsoft Research;Microsoft Azure AI;Microsoft;Microsoft;Microsoft;Microsoft;Microsoft;Zoom;Microsoft", "aff_domain": "research.microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;zoom.us;microsoft.com", "position": "Researcher;Senior Applied Scientist;Researcher;Researcher;Senior Researcher;Researcher;Principal Researcher;Principal Researcher;Vice President Research Manager", "bibtex": "@inproceedings{\nxu2023lmgqs,\ntitle={{LMGQS}: A Large-scale Dataset for Query-focused Summarization},\nauthor={Ruochen Xu and Song Wang and Yang Liu and Shuohang Wang and Yichong Xu and Dan Iter and Pengcheng He and Chenguang Zhu and Michael Zeng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=4WrqZlEK3K}\n}", "github": "", "project": "", "reviewers": "udAj;Kn1x;HLfb", "site": "https://openreview.net/forum?id=4WrqZlEK3K", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "3;4;3", "reproducibility": "3;4;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 12, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;;", "linkedin": "ruochenx/;;;;;daniter;;;michaelnanshanzeng/", "aff_unique_index": "0;0;0;0;0;0;0;1;0", "aff_unique_norm": "Microsoft;Zoom Video Communications Inc.", "aff_unique_dep": "Microsoft Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://zoom.us", "aff_unique_abbr": "MSR;Zoom", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "4aBxFtqRNa", "title": "GNAT: A General Narrative Alignment Tool", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Algorithmic sequence alignment identifies similar segments shared between pairs of documents, and is fundamental to many NLP tasks.\nBut it is difficult to recognize similarities between distant versions of narratives such as translations and retellings, particularly for summaries and abridgements which are much shorter than the original novels.\n\nWe develop a general approach to narrative alignment coupling the Smith-Waterman algorithm from bioinformatics with modern text similarity metrics. We show that the background of alignment scores fits a Gumbel distribution, enabling us to define rigorous p-values on the significance of any alignment. We apply and evaluate our general narrative alignment tool (GNAT) on four distinct problem domains differing greatly in both the relative and absolute length of documents, namely summary-to-book alignment, translated book alignment, short story alignment, and plagiarism detection---demonstrating the power and performance of our methods.", "keywords": "Text Alignment", "primary_area": "", "supplementary_material": "", "author": "Tanzir Pial;Steven Skiena", "authorids": "~Tanzir_Pial1;~Steven_Skiena1", "gender": "M;M", "homepage": ";https://www.cs.stonybrook.edu/~skiena", "dblp": "226/7216;s/StevenSkiena.html", "google_scholar": "YUcK9-MAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Tanzir_Pial1;~Steven_Skiena1", "aff": "State University of New York at Stony Brook;State University of New York at Stony Brook", "aff_domain": "stonybrook.edu;stonybrook.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\npial2023gnat,\ntitle={{GNAT}: A General Narrative Alignment Tool},\nauthor={Tanzir Pial and Steven Skiena},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=4aBxFtqRNa}\n}", "github": "", "project": "", "reviewers": "ynEf;jEsz;Abc9", "site": "https://openreview.net/forum?id=4aBxFtqRNa", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;3", "excitement": "4;4;4", "reproducibility": "3;5;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": "tanzir-pial/;", "aff_unique_index": "0;0", "aff_unique_norm": "State University of New York at Stony Brook", "aff_unique_dep": "", "aff_unique_url": "https://www.stonybrook.edu", "aff_unique_abbr": "SUNY Stony Brook", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stony Brook", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "4akD4Z2BBg", "title": "Biomedical Named Entity Recognition via Dictionary-based Synonym Generalization", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Biomedical named entity recognition is one of the core tasks in biomedical natural language processing (BioNLP). To tackle this task, numerous supervised/distantly supervised approaches have been proposed. Despite their remarkable success, these approaches inescapably demand laborious human effort. To alleviate the need of human effort, dictionary-based approaches have been proposed to extract named entities simply based on a given dictionary. However, one downside of existing dictionary-based approaches is that they are challenged to identify concept synonyms that are not listed in the given dictionary, which we refer as the synonym generalization problem. \nIn this study, we propose a novel Synonym Generalization (SynGen) framework that recognizes the biomedical concepts contained in the input text using span-based predictions. In particular, SynGen introduces two regularization terms, namely, (1) a synonym distance regularizer; and (2) a noise perturbation regularizer, to minimize the synonym generalization error. To demonstrate the effectiveness of our approach, we provide a theoretical analysis of the bound of synonym generalization error. We extensively evaluate our approach on a wide range of benchmarks and the results verify that SynGen outperforms previous dictionary-based models by notable margins. Lastly, we provide a detailed analysis to further reveal the merits and inner-workings of our approach.", "keywords": "Biomedical named entity recognition;NER;BioNLP;Synonym Generalization", "primary_area": "", "supplementary_material": "", "author": "Zihao Fu;Yixuan Su;Zaiqiao Meng;Nigel Collier", "authorids": "~Zihao_Fu1;~Yixuan_Su1;~Zaiqiao_Meng1;~Nigel_Collier1", "gender": "M;M;M;M", "homepage": "https://fuzihaofzh.github.io/;https://yxuansu.github.io/;https://mengzaiqiao.github.io/;https://sites.google.com/site/nhcollier/", "dblp": ";262/3282.html;185/0748;90/2619", "google_scholar": "64CHB2YAAAAJ;VuVuWEoAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?user=ZMelBa0AAAAJ", "or_profile": "~Zihao_Fu1;~Yixuan_Su1;~Zaiqiao_Meng1;~Nigel_Collier1", "aff": "University of Cambridge;University of Cambridge;University of Glasgow;University of Cambridge", "aff_domain": "cam.ac.uk;cam.ac.uk;glasgow.ac.uk;cam.ac.uk", "position": "Postdoc;PhD student;Lecturer;Full Professor", "bibtex": "@inproceedings{\nfu2023biomedical,\ntitle={Biomedical Named Entity Recognition via Dictionary-based Synonym Generalization},\nauthor={Zihao Fu and Yixuan Su and Zaiqiao Meng and Nigel Collier},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=4akD4Z2BBg}\n}", "github": "", "project": "", "reviewers": "gSYy;JQ58;tQnM;GYVt", "site": "https://openreview.net/forum?id=4akD4Z2BBg", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "2;4;3;3", "excitement": "4;4;4;4", "reproducibility": "4;5;4;4", "correctness": "4;4;4;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 4.25, "correctness_avg": 4.0, "replies_avg": 6, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-1472-7791;;0000-0002-7230-4164", "linkedin": ";;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Cambridge;University of Glasgow", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.gla.ac.uk", "aff_unique_abbr": "Cambridge;Glasgow", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "4dJMzjIR2k", "title": "Hi-ArG: Exploring the Integration of Hierarchical Argumentation Graphs in Language Pretraining", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The knowledge graph is a structure to store and represent knowledge, and recent studies have discussed its capability to assist language models for various applications. Some variations of knowledge graphs aim to record arguments and their relations for computational argumentation tasks. However, many must simplify semantic types to fit specific schemas, thus losing flexibility and expression ability. In this paper, we propose the **Hi**erarchical **Ar**gumentation **G**raph (Hi-ArG), a new structure to organize arguments. We also introduce two approaches to exploit Hi-ArG, including a text-graph multi-modal model GreaseArG and a new pre-training framework augmented with graph information. Experiments on two argumentation tasks have shown that after further pre-training and fine-tuning, GreaseArG supersedes same-scale language models on these tasks, while incorporating graph information during further pre-training can also improve the performance of vanilla language models. Code for this paper is available at .", "keywords": "computational argumentation;knowledge graph;abstract meaning representation", "primary_area": "", "supplementary_material": "", "author": "Jingcong Liang;Rong Ye;Meng Han;Qi Zhang;Ruofei Lai;Xinyu Zhang;Zhao Cao;Xuanjing Huang;zhongyu wei", "authorids": "~Jingcong_Liang1;~Rong_Ye1;~Meng_Han5;~Qi_Zhang8;~Ruofei_Lai1;~Xinyu_Zhang6;~Zhao_Cao1;~Xuanjing_Huang1;~zhongyu_wei1", "gender": "M;F;F;M;M;M;M;F;M", "homepage": ";https://reneeye.github.io/;;http://qizhang.info;;https://scholar.google.com/citations?hl=en&user=W_WZEQEAAAAJ;http://caozhao.hw;https://xuanjing-huang.github.io/;http://www.sdspeople.fudan.edu.cn/zywei/", "dblp": "362/5859;84/5795.html;;52/323-1;301/9182;https://dblp.uni-trier.de/pid/58/4582;69/8078;05/6735-1;31/10489", "google_scholar": "https://scholar.google.com.hk/citations?user=ef6J_a8AAAAJ;UV4u5UQAAAAJ;https://scholar.google.com/citations?hl=zh-CN;XfqR3yYAAAAJ;;https://scholar.google.com/citations?hl=en;aJmTPaoAAAAJ;RGsMgZA4H78C;AjLDxxgAAAAJ", "or_profile": "~Jingcong_Liang1;~Rong_Ye1;~Meng_Han5;~Qi_Zhang8;~Ruofei_Lai1;~Xinyu_Zhang6;~Zhao_Cao1;~Xuanjing_Huang1;~zhongyu_wei1", "aff": "Fudan University;ByteDance;;Fudan University;;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;bytedance.com;;fudan.edu.cn;;huawei.com;huawei.com;fudan.edu.cn;fudan.edu.cn", "position": "PhD student;Researcher;;Full Professor;;Principal Researcher;Principal Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nliang2023hiarg,\ntitle={Hi-ArG: Exploring the Integration of Hierarchical Argumentation Graphs in Language Pretraining},\nauthor={Jingcong Liang and Rong Ye and Meng Han and Qi Zhang and Ruofei Lai and Xinyu Zhang and Zhao Cao and Xuanjing Huang and zhongyu wei},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=4dJMzjIR2k}\n}", "github": "", "project": "", "reviewers": "TQxT;nF6N;zpwA", "site": "https://openreview.net/forum?id=4dJMzjIR2k", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "4;4;4", "reproducibility": "3;3;4", "correctness": "3;4;5", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0000-2488-2683;;;;;0000-0002-6829-4522;0000-0002-4214-7858;0000-0001-9197-9426;", "linkedin": ";;;;;;;;", "aff_unique_index": "0;1;0;2;2;0;0", "aff_unique_norm": "Fudan University;ByteDance;Huawei", "aff_unique_dep": ";;Huawei Technologies", "aff_unique_url": "https://www.fudan.edu.cn;https://www.bytedance.com;https://www.huawei.com", "aff_unique_abbr": "Fudan;ByteDance;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "4k5BcBYKAS", "title": "GTA: Gated Toxicity Avoidance for LM Performance Preservation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Caution: This paper includes offensive words that could potentially cause unpleasantness. The fast-paced evolution of generative language models such as GPT-4 has demonstrated outstanding results in various NLP generation tasks. However, due to the potential generation of offensive words related to race or gender, various Controllable Text Generation (CTG) methods have been proposed to mitigate the occurrence of harmful words. However, existing CTG methods not only reduce toxicity but also negatively impact several aspects of the language model's generation performance, including topic consistency, grammar, and perplexity. This paper explores the limitations of previous methods and introduces a novel solution in the form of a simple Gated Toxicity Avoidance (GTA) that can be applied to any CTG method. We also evaluate the effectiveness of the proposed GTA by comparing it with state-of-the-art CTG methods across various datasets. Our findings reveal that gated toxicity avoidance efficiently achieves comparable levels of toxicity reduction to the original CTG methods while preserving the generation performance of the language model.", "keywords": "language model;controllable text generation;toxicity avoidance;nontoxic text generation;text generation;natural language generation", "primary_area": "", "supplementary_material": "", "author": "Heegyu Kim;Hyunsouk Cho", "authorids": "~Heegyu_Kim1;~Hyunsouk_Cho1", "gender": "M;M", "homepage": "https://github.com/HeegyuKim;https://sites.google.com/view/iknow-lab/professor?authuser=0", "dblp": ";116/5184", "google_scholar": ";3IQB4c0AAAAJ", "or_profile": "~Heegyu_Kim1;~Hyunsouk_Cho1", "aff": "Ajou University;Ajou University", "aff_domain": "ajou.ac.kr;ajou.ac.kr", "position": "Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nkim2023gta,\ntitle={{GTA}: Gated Toxicity Avoidance for {LM} Performance Preservation},\nauthor={Heegyu Kim and Hyunsouk Cho},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=4k5BcBYKAS}\n}", "github": "", "project": "", "reviewers": "Byj4;Gtqh;9wix;4a3Y", "site": "https://openreview.net/forum?id=4k5BcBYKAS", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;5;4", "excitement": "2;4;3;3", "reproducibility": "4;5;4;3", "correctness": "3;4;3;4", "rating_avg": 3.0, "confidence_avg": 4.25, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-9134-1921", "linkedin": "%ED%9D%AC%EA%B7%9C-%EA%B9%80-179680bb/?locale=en_US;", "aff_unique_index": "0;0", "aff_unique_norm": "Ajou University", "aff_unique_dep": "", "aff_unique_url": "https://www.ajou.ac.kr", "aff_unique_abbr": "Ajou", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "4kuLaebvKx", "title": "ICU: Conquering Language Barriers in Vision-and-Language Modeling by Dividing the Tasks into Image Captioning and Language Understanding", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Most multilingual vision-and-language (V\\&L) research aims to accomplish multilingual and multimodal capabilities within one model. However, the scarcity of multilingual captions for images has hindered the development. To overcome this obstacle, we propose ICU, Image Caption Understanding, which divides a V\\&L task into two stages: a V\\&L model performs image captioning in English, and a multilingual language model (mLM), in turn, takes the caption as the alt text and performs cross-lingual language understanding. The burden of multilingual processing is lifted off V\\&L model and placed on mLM. Since the multilingual text data is relatively of higher abundance and quality, ICU can facilitate the conquering of language barriers for V\\&L models. In experiments on two tasks across 9 languages in the IGLUE benchmark, we show that ICU can achieve new state-of-the-art results for five languages, and comparable results for the rest.", "keywords": "Cross-lingual Language Understanding;Multimodality", "primary_area": "", "supplementary_material": "", "author": "Guojun Wu", "authorids": "~Guojun_Wu2", "gender": "M", "homepage": "https://guojun-wu.github.io/", "dblp": "", "google_scholar": "", "or_profile": "~Guojun_Wu2", "aff": "University of Zurich", "aff_domain": "uzh.ch", "position": "MS student", "bibtex": "@inproceedings{\nwu2023icu,\ntitle={{ICU}: Conquering Language Barriers in Vision-and-Language Modeling by Dividing the Tasks into Image Captioning and Language Understanding},\nauthor={Guojun Wu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=4kuLaebvKx}\n}", "github": "", "project": "", "reviewers": "UdNy;ZPdR;oSwq", "site": "https://openreview.net/forum?id=4kuLaebvKx", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "3;2;3", "reproducibility": "4;3;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 1, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "", "linkedin": "", "aff_unique_index": "0", "aff_unique_norm": "University of Zurich", "aff_unique_dep": "", "aff_unique_url": "https://www.unizh.ch", "aff_unique_abbr": "UZH", "aff_country_unique_index": "0", "aff_country_unique": "Switzerland" }, { "id": "4nQN6Z6OY3", "title": "Outlier Dimensions Encode Task Specific Knowledge", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Representations from large language models (LLMs) are known to be dominated by a small subset of dimensions with exceedingly high variance. Previous works have argued that although ablating these outlier dimensions in LLM representations hurts downstream performance, outlier dimensions are detrimental to the representational quality of embeddings. In this study, we investigate how fine-tuning impacts outlier dimensions and show that 1) outlier dimensions that occur in pre-training persist in fine-tuned models and 2) a single outlier dimension can complete downstream tasks with a minimal error rate. Our results suggest that outlier dimensions can encode crucial task-specific knowledge and that the value of a representation in a single outlier dimension drives downstream model decisions.", "keywords": "outlier dimensions;LLMs;fine-tuning;interpretability", "primary_area": "", "supplementary_material": "", "author": "William Rudman;Catherine Chen;Carsten Eickhoff", "authorids": "~William_Rudman1;~Catherine_Chen3;~Carsten_Eickhoff1", "gender": "M;F;M", "homepage": ";https://catherineschen.github.io/;https://health-nlp.org", "dblp": "299/8116;05/5358-1;42/8700", "google_scholar": "https://scholar.google.com/citations?hl=en;rkWYq-YAAAAJ;QQi1_rAAAAAJ", "or_profile": "~William_Rudman1;~Catherine_Chen3;~Carsten_Eickhoff1", "aff": "Brown University;Brown University;Eberhard-Karls-Universit\u00e4t T\u00fcbingen", "aff_domain": "brown.edu;brown.edu;uni-tuebingen.de", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nrudman2023outlier,\ntitle={Outlier Dimensions Encode Task Specific Knowledge},\nauthor={William Rudman and Catherine Chen and Carsten Eickhoff},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=4nQN6Z6OY3}\n}", "github": "", "project": "", "reviewers": "BqrZ;Qjdk;WcPR", "site": "https://openreview.net/forum?id=4nQN6Z6OY3", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;4;3", "reproducibility": "4;4;4", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0009-8734-436X;0000-0001-9895-4061", "linkedin": ";;", "aff_unique_index": "0;0;1", "aff_unique_norm": "Brown University;Eberhard Karls University of T\u00fcbingen", "aff_unique_dep": ";", "aff_unique_url": "https://www.brown.edu;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Brown;Uni T\u00fcbingen", "aff_campus_unique_index": "1", "aff_campus_unique": ";T\u00fcbingen", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Germany" }, { "id": "4sgXjFtnqg", "title": "Efficient Multilingual Language Model Compression through Vocabulary Trimming", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Multilingual language models (LMs) have become a powerful tool in NLP, especially for non-English languages. Nevertheless, model parameters of multilingual LMs remain large due to the larger embedding matrix of the vocabulary covering tokens in different languages. Instead, monolingual LMs can be trained in a target language with the language-specific vocabulary only. In this paper, we propose vocabulary-trimming (VT), a method to reduce a multilingual LM vocabulary to a target language by deleting potentially irrelevant tokens from its vocabulary. In theory, VT can compress any existing multilingual LM to any language covered by the original model. In our experiments, we show that VT can retain the original performance of the multilingual LM, while being considerably smaller in size than the original multilingual LM. The evaluation is performed over four NLP tasks (two generative and two classification tasks) among four widely used multilingual LMs in seven languages. The results show that this methodology can keep the best of both monolingual and multilingual worlds by keeping a small size as monolingual models without the need for specifically retraining them, and can even help limit potentially harmful social biases.", "keywords": "multilingual;language model;model compression;efficiency", "primary_area": "", "supplementary_material": "", "author": "Asahi Ushio;Yi Zhou;Jose Camacho-Collados", "authorids": "~Asahi_Ushio1;~Yi_Zhou14;~Jose_Camacho-Collados1", "gender": "M;F;M", "homepage": "https://asahi417.github.io/;https://aclanthology.org/people/y/yi-zhou/;http://www.josecamachocollados.com", "dblp": ";01/1901-19;165/0790", "google_scholar": "RstIo9oAAAAJ;3BdddIMAAAAJ;NP4KdQQAAAAJ", "or_profile": "~Asahi_Ushio1;~Yi_Zhou14;~Jose_Camacho-Collados1", "aff": "Cardiff University;Cardiff University;Cardiff University", "aff_domain": "cardiff.ac.uk;cardiff.ac.uk;cardiff.ac.uk", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nushio2023efficient,\ntitle={Efficient Multilingual Language Model Compression through Vocabulary Trimming},\nauthor={Asahi Ushio and Yi Zhou and Jose Camacho-Collados},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=4sgXjFtnqg}\n}", "github": "", "project": "", "reviewers": "dgGG;9JRE;XE1g", "site": "https://openreview.net/forum?id=4sgXjFtnqg", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;4;5", "excitement": "3;3;3", "reproducibility": "4;4;5", "correctness": "4;4;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-7009-8515;", "linkedin": ";yi-zhou-867578210/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Cardiff University", "aff_unique_dep": "", "aff_unique_url": "https://www.cardiff.ac.uk", "aff_unique_abbr": "Cardiff", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "4to6zjnEQV", "title": "Bridging Continuous and Discrete Spaces: Interpretable Sentence Representation Learning via Compositional Operations", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Traditional sentence embedding models encode sentences into vector representations to capture useful properties such as the semantic similarity between sentences. However, in addition to similarity, sentence semantics can also be interpreted via compositional operations such as sentence fusion or difference. It is unclear whether the compositional semantics of sentences can be directly reflected as compositional operations in the embedding space. To more effectively bridge the continuous embedding and discrete text spaces, we explore the plausibility of incorporating various compositional properties into the sentence embedding space that allows us to interpret embedding transformations as compositional sentence operations. We propose InterSent, an end-to-end framework for learning interpretable sentence embeddings that supports compositional sentence operations in the embedding space. Our method optimizes operator networks and a bottleneck encoder-decoder model to produce meaningful and interpretable sentence embeddings. Experimental results demonstrate that our method significantly improves the interpretability of sentence embeddings on four textual generation tasks over existing approaches while maintaining strong performance on traditional semantic similarity tasks.", "keywords": "sentence embedding;representation learning", "primary_area": "", "supplementary_material": "", "author": "James Y. Huang;Wenlin Yao;Kaiqiang Song;Hongming Zhang;Muhao Chen;Dong Yu", "authorids": "~James_Y._Huang1;~Wenlin_Yao1;~Kaiqiang_Song2;~Hongming_Zhang2;~Muhao_Chen1;~Dong_Yu2", "gender": ";M;M;M;M;M", "homepage": "https://jyhuang36.github.io/;https://wenlinyao.github.io/;http://i2u.world/kqsong/;http://www.cse.ust.hk/~hzhangal/;https://muhaochen.github.io/;https://sites.google.com/view/dongyu888/", "dblp": "290/1648;203/8711;;;173/2608;71/4598-1", "google_scholar": "8-4RhoQAAAAJ;qwo2A24AAAAJ;PHoJwakAAAAJ;i5ETuuQAAAAJ;k79yEZkAAAAJ;tMY31_gAAAAJ", "or_profile": "~James_Y._Huang1;~Wenlin_Yao1;~Kaiqiang_Song2;~Hongming_Zhang2;~Muhao_Chen1;~Dong_Yu2", "aff": "University of Southern California;Tencent AI Lab;Tencent AI Lab;Tencent AI Lab Seattle;University of Southern California;Tencent AI Lab", "aff_domain": "usc.edu;tencent.com;tencent.com;tencent.com;usc.edu;tencent.com", "position": "PhD student;Researcher;Senior Researcher;Researcher;Assistant Research Professor;Distinguished Scientist", "bibtex": "@inproceedings{\nhuang2023bridging,\ntitle={Bridging Continuous and Discrete Spaces: Interpretable Sentence Representation Learning via Compositional Operations},\nauthor={James Y. Huang and Wenlin Yao and Kaiqiang Song and Hongming Zhang and Muhao Chen and Dong Yu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=4to6zjnEQV}\n}", "github": "", "project": "", "reviewers": "KAiQ;43Hp;eJVG", "site": "https://openreview.net/forum?id=4to6zjnEQV", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "4;5;3", "reproducibility": "4;4;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0003-0118-3147;0000-0003-0520-6844", "linkedin": ";;;;;dongyu/", "aff_unique_index": "0;1;1;1;0;1", "aff_unique_norm": "University of Southern California;Tencent", "aff_unique_dep": ";Tencent AI Lab", "aff_unique_url": "https://www.usc.edu;https://ai.tencent.com", "aff_unique_abbr": "USC;Tencent AI Lab", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Los Angeles;;Seattle", "aff_country_unique_index": "0;1;1;0;0;1", "aff_country_unique": "United States;China" }, { "id": "4toYWE7g6U", "title": "ChatEdit: Towards Multi-turn Interactive Facial Image Editing via Dialogue", "track": "main", "status": "Long Main", "tldr": "", "abstract": "This paper explores interactive facial image editing through dialogue and presents the ChatEdit benchmark dataset for evaluating image editing and conversation abilities in this context. ChatEdit is constructed from the CelebA-HQ dataset, incorporating annotated multi-turn dialogues corresponding to user editing requests on the images. The dataset is challenging, as it requires the system to dynamically track and edit images based on user requests, while generating appropriate natural language responses. To address these challenges, we propose a framework comprising a dialogue module for tracking user requests as well as generating responses, and an image editing module for editing images accordingly. Unlike previous approaches, our framework directly tracks the user request of the current turn from the entire dialogue history and edits the initial image instead of manipulating the output from the previous turn, mitigating error accumulation and attribute forgetting issues. Extensive experiments on the ChatEdit dataset demonstrate the superiority of our framework over previous methods and also improvement rooms, encouraging future research. We will release the code and data publicly to facilitate advancements in complex interactive facial image editing.", "keywords": "Interactive image editing;Task-oriented dialogue", "primary_area": "", "supplementary_material": "", "author": "Xing Cui;Zekun Li;Pei Pei Li;Yibo Hu;Hailin Shi;Chunshui Cao;Zhaofeng He", "authorids": "~Xing_Cui1;~Zekun_Li2;~Pei_Pei_Li2;~Yibo_Hu1;~Hailin_Shi4;~Chunshui_Cao2;~Zhaofeng_He1", "gender": "M;;F;M;M;M;M", "homepage": ";;;https://aberhu.github.io/;http://sites.google.com/view/hailin-shi;;https://teacher.bupt.edu.cn/zhaofenghe/zh_CN/index.htm", "dblp": ";;;23/3288-1;172/1112.html;176/1432;13/3992", "google_scholar": ";;https://scholar.google.com/citations?hl=zh-CN;wyX8dzgAAAAJ;https://scholar.google.com/citations?hl=en;GtwD2CUAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN", "or_profile": "~Xing_Cui1;~Zekun_Li2;~Pei_Pei_Li2;~Yibo_Hu1;~Hailin_Shi4;~Chunshui_Cao2;~Zhaofeng_He1", "aff": "Beijing University of Posts and Telecommunications;;Beijing University of Posts and Telecommunications;NIO;NIO;Watrix Technology;Beijing University of Post and Telecommunication", "aff_domain": "bupt.edu.cn;;bupt.edu.cn;nio.com;nio.com;watrix.ai;bupt.edu.cn", "position": "PhD student;;Assistant Professor;Researcher;Expert;Researcher;Full Professor", "bibtex": "@inproceedings{\ncui2023chatedit,\ntitle={ChatEdit: Towards Multi-turn Interactive Facial Image Editing via Dialogue},\nauthor={Xing Cui and Zekun Li and Pei Pei Li and Yibo Hu and Hailin Shi and Chunshui Cao and Zhaofeng He},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=4toYWE7g6U}\n}", "github": "", "project": "", "reviewers": "Ht4U;zAak;893r", "site": "https://openreview.net/forum?id=4toYWE7g6U", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "4;3;3", "reproducibility": "4;4;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-6788-1920;;;;;;0000-0002-3433-8435", "linkedin": ";;;;;;", "aff_unique_index": "0;0;1;1;2;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications;NIO;Watrix Technology", "aff_unique_dep": ";;", "aff_unique_url": "http://www.bupt.edu.cn/;;", "aff_unique_abbr": "BUPT;;", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;0;2;0", "aff_country_unique": "China;;United States" }, { "id": "4uylA0mUkk", "title": "Data Factors for Better Compositional Generalization", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recent diagnostic datasets on compositional generalization, such as SCAN (Lake and Baroni, 2018) and COGS (Kim and Linzen, 2020), expose severe problems in models trained from scratch on these datasets. However, in contrast to this poor performance, state-of-the-art models trained on larger and more general datasets show better generalization ability. In this work, to reconcile this inconsistency, we conduct an empirical analysis by training Transformer models on a variety of training sets with different data factors, including dataset scale, pattern complexity, example difficulty, etc. First, we show that increased dataset complexity can lead to better generalization behavior on multiple different generalization challenges. To further understand this improvement, we show two axes of the benefit from more complex datasets: they provide more diverse examples so compositional understanding becomes more effective, and they also prevent ungeneralizable memorization of the examples due to reduced example repetition frequency. Finally, we explore how training examples of different difficulty levels influence generalization differently. On synthetic datasets, simple examples invoke stronger compositionality than hard examples do. On larger-scale real language datasets, while hard examples become more important potentially to ensure decent data coverage, a balanced mixture of simple and hard examples manages to induce the strongest generalizability.", "keywords": "compositional generalization;data factors", "primary_area": "", "supplementary_material": "", "author": "Xiang Zhou;Yichen Jiang;Mohit Bansal", "authorids": "~Xiang_Zhou3;~Yichen_Jiang1;~Mohit_Bansal2", "gender": ";M;M", "homepage": "https://owenzx.github.io/;https://www.jiang-yichen.io/;https://www.cs.unc.edu/~mbansal/", "dblp": "65/5138;7206;32/5243.html", "google_scholar": "Q9gfhNMAAAAJ;JgrPIsgAAAAJ;DN8QtscAAAAJ", "or_profile": "~Xiang_Zhou3;~Yichen_Jiang1;~Mohit_Bansal2", "aff": "University of North Carolina, Chapel Hill;Department of Computer Science, University of North Carolina, Chapel Hill;University of North Carolina at Chapel Hill", "aff_domain": "cs.unc.edu;cs.unc.edu;unc.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nzhou2023data,\ntitle={Data Factors for Better Compositional Generalization},\nauthor={Xiang Zhou and Yichen Jiang and Mohit Bansal},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=4uylA0mUkk}\n}", "github": "", "project": "", "reviewers": "3qWv;CqGi;P8xF;9qDf", "site": "https://openreview.net/forum?id=4uylA0mUkk", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;4;4;4", "excitement": "3;3;3;4", "reproducibility": "4;4;4;4", "correctness": "4;3;3;3", "rating_avg": 3.0, "confidence_avg": 3.75, "excitement_avg": 3.25, "reproducibility_avg": 4.0, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of North Carolina", "aff_unique_dep": "", "aff_unique_url": "https://www.unc.edu", "aff_unique_abbr": "UNC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Chapel Hill", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "4wAKqlfV5t", "title": "Improving Multimodal Sentiment Analysis: Supervised Angular margin-based Contrastive Learning for Enhanced Fusion Representation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The effectiveness of a model is heavily reliant on the quality of the fusion representation of multiple modalities in multimodal sentiment analysis. Moreover, each modality is extracted from raw input and integrated with the rest to construct a multimodal representation. Although previous methods have proposed multimodal representations and achieved promising results, most of them focus on forming positive and negative pairs, neglecting the variation in sentiment scores within the same class. Additionally, they fail to capture the significance of unimodal representations in the fusion vector. To address these limitations, we introduce a framework called Supervised Angular-based Contrastive Learning for Multimodal Sentiment Analysis. This framework aims to enhance discrimination and generalizability of the multimodal representation and overcome biases in the fusion vector's modality. Our experimental results, along with visualizations on two widely used datasets, demonstrate the effectiveness of our approach.", "keywords": "Multimodal Sentiment Analysis;Contrastive Learning", "primary_area": "", "supplementary_material": "", "author": "Cong-Duy T Nguyen;Thong Thanh Nguyen;Duc Anh Vu;Anh Tuan Luu", "authorids": "~Cong-Duy_T_Nguyen1;~Thong_Thanh_Nguyen1;~Duc_Anh_Vu1;~Anh_Tuan_Luu2", "gender": "M;M;M;M", "homepage": "https://duyngtr16061999.github.io/;https://nguyentthong.github.io/;https://vuducanh0802.github.io/;https://tuanluu.github.io/", "dblp": ";29/5255.html;;81/8329.html", "google_scholar": "vIdT3F8AAAAJ;C2zb0lkAAAAJ;;https://scholar.google.com.sg/citations?hl=en", "or_profile": "~Cong-Duy_T_Nguyen1;~Thong_Thanh_Nguyen1;~Duc_Anh_Vu1;~Anh_Tuan_Luu2", "aff": "School of Computer Science and Engineering, Nanyang Technological University;National University of Singapore;Nanyang Technological University;Nanyang Technological University", "aff_domain": "scse.ntu.edu.sg;nus.edu;ntu.edu.sg;ntu.edu.sg", "position": "PhD student;PhD student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nnguyen2023improving,\ntitle={Improving Multimodal Sentiment Analysis: Supervised Angular margin-based Contrastive Learning for Enhanced Fusion Representation},\nauthor={Cong-Duy T Nguyen and Thong Thanh Nguyen and Duc Anh Vu and Anh Tuan Luu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=4wAKqlfV5t}\n}", "github": "", "project": "", "reviewers": "kyoU;yH9j;qMbV;qpZv", "site": "https://openreview.net/forum?id=4wAKqlfV5t", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;1;3;5", "excitement": "4;3;2;3", "reproducibility": "4;3;3;4", "correctness": "4;2;3;2", "rating_avg": 3.0, "confidence_avg": 3.25, "excitement_avg": 3.0, "reproducibility_avg": 3.5, "correctness_avg": 2.75, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Nanyang Technological University;National University of Singapore", "aff_unique_dep": "School of Computer Science and Engineering;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.nus.edu.sg", "aff_unique_abbr": "NTU;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore" }, { "id": "50rXrJNqHQ", "title": "API-Assisted Code Generation for Question Answering on Varied Table Structures", "track": "main", "status": "Long Main", "tldr": "", "abstract": "A persistent challenge to table question answering (TableQA) by generating executable programs has been adapting to varied table structures, typically requiring domain-specific logical forms. In response, this paper introduces a unified TableQA framework that: (1) provides a unified representation for structured tables as multi-index Pandas data frames, (2) uses Python as a powerful querying language, and (3) uses few-shot prompting to translate NL questions into Python programs, which are executable on Pandas data frames. Furthermore, to answer complex relational questions with extended program functionality and external knowledge, our framework allows customized APIs that Python programs can call. We experiment with four TableQA datasets that involve tables of different structures --- relational, multi-table, and hierarchical matrix shapes --- and achieve prominent improvements over past state-of-the-art systems. In ablation studies, we (1) show benefits from our multi-index representation and APIs over baselines that use only an LLM, and (2) demonstrate that our approach is modular and can incorporate additional APIs.", "keywords": "table question answering;code generation", "primary_area": "", "supplementary_material": "", "author": "Yihan Cao;Shuyi Chen;Ryan Liu;Zhiruo Wang;Daniel Fried", "authorids": "~Yihan_Cao1;~Shuyi_Chen2;~Ryan_Liu1;~Zhiruo_Wang1;~Daniel_Fried1", "gender": "F;M;M;F;M", "homepage": ";https://shuyiryanchen.github.io/;https://theryanl.github.io;https://zorazrw.github.io;https://dpfried.github.io/", "dblp": ";;;249/2286;117/4804", "google_scholar": "OqAc0T0AAAAJ;culGbtkAAAAJ;s3McVn8AAAAJ;https://scholar.google.com/citations?hl=en;sJDqACEAAAAJ", "or_profile": "~Yihan_Cao1;~Shuyi_Chen2;~Ryan_Liu1;~Zhiruo_Wang1;~Daniel_Fried1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;cmu.edu;andrew.cmu.edu;cmu.edu;cmu.edu", "position": "MS student;MS student;MS student;MS student;Assistant Professor", "bibtex": "@inproceedings{\ncao2023apiassisted,\ntitle={{API}-Assisted Code Generation for Question Answering on Varied Table Structures},\nauthor={Yihan Cao and Shuyi Chen and Ryan Liu and Zhiruo Wang and Daniel Fried},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=50rXrJNqHQ}\n}", "github": "", "project": "", "reviewers": "87ms;VuHH;rUJ6", "site": "https://openreview.net/forum?id=50rXrJNqHQ", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;5", "excitement": "3;4;4", "reproducibility": "4;4;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 4.666666666666667, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-4420-8252;0009-0005-5869-7183;;;", "linkedin": ";;ryanchenliu/;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "51BB1xOWq1", "title": "GenKIE: Robust Generative Multimodal Document Key Information Extraction", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Key information extraction (KIE) from scanned documents has gained increasing attention because of its applications in various domains. Although promising results have been achieved by some recent KIE approaches, they are usually built based on discriminative models, which lack the ability to handle optical character recognition (OCR) errors and require laborious token-level labeling. In this paper, we propose a novel generative end-to-end model, named GenKIE, to address the KIE task. GenKIE is a sequence-to-sequence multimodal generative model that utilizes multimodal encoders to embed visual, layout and textual features and a decoder to generate the desired output. Well-designed prompts are leveraged to incorporate the label semantics as the weakly supervised signals and entice the generation of the key information. One notable advantage of the generative model is that it enables automatic correction of OCR errors. Besides, token-level granular annotation is not required. Extensive experiments on multiple public real-world datasets show that GenKIE effectively generalizes over different types of documents and achieves state-of-the-art results. Our experiments also validate the model's robustness against OCR errors, making GenKIE highly applicable in real-world scenarios.", "keywords": "Key information extraction;Multimodal generative model", "primary_area": "", "supplementary_material": "", "author": "Panfeng Cao;Ye Wang;Qiang Zhang;Zaiqiao Meng", "authorids": "~Panfeng_Cao1;~Ye_Wang5;~Qiang_Zhang6;~Zaiqiao_Meng1", "gender": "M;F;;M", "homepage": ";;https://qiangairesearcher.github.io;https://mengzaiqiao.github.io/", "dblp": "347/3907;;72/3527-26;185/0748", "google_scholar": ";https://scholar.google.com.hk/citations?user=rT6tzJAAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en", "or_profile": "~Panfeng_Cao1;~Ye_Wang5;~Qiang_Zhang6;~Zaiqiao_Meng1", "aff": ";National University of Defense Technology;Zhejiang University;University of Glasgow", "aff_domain": ";nudt.edu.cn;zju.edu.cn;glasgow.ac.uk", "position": ";PhD student;Principal Researcher;Lecturer", "bibtex": "@inproceedings{\ncao2023genkie,\ntitle={Gen{KIE}: Robust Generative Multimodal Document Key Information Extraction},\nauthor={Panfeng Cao and Ye Wang and Qiang Zhang and Zaiqiao Meng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=51BB1xOWq1}\n}", "github": "", "project": "", "reviewers": "FmZH;Y6Ra;r4Eh;Bc8K", "site": "https://openreview.net/forum?id=51BB1xOWq1", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "2;5;4;3", "excitement": "3;4;4;3", "reproducibility": "3;4;4;4", "correctness": "2;4;3;3", "rating_avg": 3.0, "confidence_avg": 3.5, "excitement_avg": 3.5, "reproducibility_avg": 3.75, "correctness_avg": 3.0, "replies_avg": 6, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-3778-1894;;;", "linkedin": ";;;", "aff_unique_index": "0;1;2", "aff_unique_norm": "National University of Defense Technology;Zhejiang University;University of Glasgow", "aff_unique_dep": ";;", "aff_unique_url": "http://www.nudt.edu.cn/;https://www.zju.edu.cn;https://www.gla.ac.uk", "aff_unique_abbr": "NUDT;ZJU;Glasgow", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;United Kingdom" }, { "id": "51gbtl2VxL", "title": "Incorporating Probing Signals into Multimodal Machine Translation via Visual Question-Answering Pairs", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "This paper presents an in-depth study of multimodal machine translation (MMT), examining the prevailing understanding that MMT systems exhibit decreased sensitivity to visual information when text inputs are complete. Instead, we attribute this phenomenon to insufficient cross-modal interaction, rather than image information redundancy. A novel approach is proposed to generate parallel Visual Question-Answering (VQA) style pairs from the source text, fostering more robust cross-modal interaction. Using Large Language Models (LLMs), we explicitly model the probing signal in MMT to convert it into VQA-style data to create the Multi30K-VQA dataset. An MMT-VQA multitask learning framework is introduced to incorporate explicit probing signals from the dataset into the MMT training process. Experimental results on two widely-used benchmarks demonstrate the effectiveness of this novel approach. Our code and data would be available at: \\url{https://github.com/libeineu/MMT-VQA}.", "keywords": "Multimodal machine translation;Large language models;VQA;Probing tasks", "primary_area": "", "supplementary_material": "", "author": "Yuxin Zuo;Bei Li;Chuanhao Lv;Tong Zheng;Tong Xiao;JingBo Zhu", "authorids": "~Yuxin_Zuo1;~Bei_Li1;~Chuanhao_Lv2;~Tong_Zheng1;~Tong_Xiao4;~JingBo_Zhu2", "gender": ";M;;M;M;", "homepage": ";https://libeineu.github.io/;;https://kidzheng.github.io/;https://www.nlplab.com/members/xiaotong.html;https://dblp.org/pid/73/2129.html", "dblp": ";;;;05/5091;", "google_scholar": ";wzbJ5EIAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;-fov7zkAAAAJ;", "or_profile": "~Yuxin_Zuo1;~Bei_Li1;~Chuanhao_Lv2;~Tong_Zheng1;~Tong_Xiao4;~JingBo_Zhu2", "aff": ";Northeastern University;;;Northeastern University;Northeastern University", "aff_domain": ";neu.edu.cn;;;mail.neu.edu.cn;mail.neu.edu.cn", "position": ";PhD student;;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzuo2023incorporating,\ntitle={Incorporating Probing Signals into Multimodal Machine Translation via Visual Question-Answering Pairs},\nauthor={Yuxin Zuo and Bei Li and Chuanhao Lv and Tong Zheng and Tong Xiao and JingBo Zhu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=51gbtl2VxL}\n}", "github": "", "project": "", "reviewers": "6B61;gdQP;acWW;EZME;ntqX", "site": "https://openreview.net/forum?id=51gbtl2VxL", "pdf_size": 0, "rating": "3;3;3;3;3", "confidence": "3;4;1;4;4", "excitement": "4;3;3;3;2", "reproducibility": "3;4;4;4;2", "correctness": "3;4;4;3;2", "rating_avg": 3.0, "confidence_avg": 3.2, "excitement_avg": 3.0, "reproducibility_avg": 3.4, "correctness_avg": 3.2, "replies_avg": 15, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-3472-4387;;", "linkedin": ";;;;tong-xiao-168bb081/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Northeastern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northeastern.edu", "aff_unique_abbr": "NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "54WhV6RTzi", "title": "Style-Aware Radiology Report Generation with RadGraph and Few-Shot Prompting", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Automatically generated reports from medical images promise to improve the workflow of radiologists. Existing methods consider an image-to-report modeling task by directly generating a fully-fledged report from an image. However, this conflates the content of the report (e.g., findings and their attributes) with its style (e.g., format and choice of words), which can lead to clinically inaccurate reports. To address this, we propose a two-step approach for radiology report generation. First, we extract the content from an image; then, we verbalize the extracted content into a report that matches the style of a specific radiologist. For this, we leverage RadGraph---a graph representation of reports---together with large language models (LLMs). In our quantitative evaluations, we find that our approach leads to beneficial performance. Our human evaluation with clinical raters highlights that the AI-generated reports are indistinguishably tailored to the style of individual radiologist despite leveraging only a few examples as context.", "keywords": "language model;radiology;report generation;few-shot prompting;in-context learning;radgraph", "primary_area": "", "supplementary_material": "", "author": "Benjamin Yan;Ruochen Liu;David E Kuo;Subathra Adithan;Eduardo Pontes Reis;Stephen Kwak;Vasantha Kumar Venugopal;Chloe P O'Connell;Agustina Saenz;Pranav Rajpurkar;Michael Moor", "authorids": "~Benjamin_Yan1;~Ruochen_Liu4;~David_E_Kuo1;~Subathra_Adithan1;~Eduardo_Pontes_Reis1;~Stephen_Kwak1;~Vasantha_Kumar_Venugopal1;~Chloe_P_O'Connell1;~Agustina_Saenz1;~Pranav_Rajpurkar1;~Michael_Moor1", "gender": "M;F;M;F;M;M;M;F;;;", "homepage": ";;https://github.com/davidekuo;;;;;;;;", "dblp": ";;;;;;;;;;", "google_scholar": ";;;;;;LSnAkaUAAAAJ;Y32AlcYAAAAJ;;;", "or_profile": "~Benjamin_Yan1;~Ruochen_Liu4;~David_E_Kuo1;~Subathra_Adithan1;~Eduardo_Pontes_Reis1;~Stephen_Kwak1;~Vasantha_Kumar_Venugopal1;~Chloe_P_O'Connell1;~Agustina_Saenz1;~Pranav_Rajpurkar1;~Michael_Moor1", "aff": "Stanford University;Stanford University;Stanford University;Jawaharlal Institute of Postgraduate Medical Education and Research;Hospital Israelita Albert Einstein;Johns Hopkins University;;;Harvard Medical School ;;", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;jipmer.edu.in;einstein.br;jh.edu;;;hms.edu;;", "position": "Undergrad student;MS student;MS student;Associate Professor;Researcher;Instructor;;;Instructor;;", "bibtex": "@inproceedings{\nyan2023styleaware,\ntitle={Style-Aware Radiology Report Generation with RadGraph and Few-Shot Prompting},\nauthor={Benjamin Yan and Ruochen Liu and David E Kuo and Subathra Adithan and Eduardo Pontes Reis and Stephen Kwak and Vasantha Kumar Venugopal and Chloe P O'Connell and Agustina Saenz and Pranav Rajpurkar and Michael Moor},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=54WhV6RTzi}\n}", "github": "", "project": "", "reviewers": "SCYV;aXs3;5JVq", "site": "https://openreview.net/forum?id=54WhV6RTzi", "pdf_size": 0, "rating": "2;2;2", "confidence": "5;4;4", "excitement": "4;2;4", "reproducibility": "4;5;4", "correctness": "3;2;3", "rating_avg": 2.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 2.6666666666666665, "replies_avg": 10, "authors#_avg": 11, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-1464-9911;0000-0001-5110-457X;0000-0001-9430-503X;0000-0003-1357-6941;;0000-0003-3666-9075;;", "linkedin": "benjaminbyan/;ruochen99;;subathra-adithan-a117659/;;;;chloeoco/;;;", "aff_unique_index": "0;0;0;1;2;3;4", "aff_unique_norm": "Stanford University;Jawaharlal Institute of Postgraduate Medical Education and Research;Hospital Israelita Albert Einstein;Johns Hopkins University;Harvard University", "aff_unique_dep": ";;;;Medical School", "aff_unique_url": "https://www.stanford.edu;https://www.jipmer.puducherry.gov.in;https://www.hospitalisraelita.org.br;https://www.jhu.edu;https://hms.harvard.edu", "aff_unique_abbr": "Stanford;JIPMER;;JHU;HMS", "aff_campus_unique_index": "0;0;0;2", "aff_campus_unique": "Stanford;;Boston", "aff_country_unique_index": "0;0;0;1;2;0;0", "aff_country_unique": "United States;India;Brazil" }, { "id": "56UYArtXyA", "title": "FreeAL: Towards Human-Free Active Learning in the Era of Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Collecting high-quality labeled data for model training is notoriously time-consuming and labor-intensive for various NLP tasks. While copious solutions, such as active learning for small language models (SLMs) and prevalent in-context learning in the era of large language models (LLMs), have been proposed and alleviate the labeling burden to some extent, their performances are still subject to human intervention. It is still underexplored how to reduce the annotation cost in the LLMs era. To bridge this, we revolutionize traditional active learning and propose an innovative collaborative learning framework FreeAL to interactively distill and filter the task-specific knowledge from LLMs. During collaborative training, an LLM serves as an active annotator inculcating its coarse-grained knowledge, while a downstream SLM is incurred as a student to filter out high-quality in-context samples to feedback LLM for the subsequent label refinery. Extensive experiments on eight benchmark datasets demonstrate that FreeAL largely enhances the zero-shot performances for both SLM and LLM without any human supervision.", "keywords": "Active learning;Large language model;Human-free zero-shot learning", "primary_area": "", "supplementary_material": "", "author": "Ruixuan Xiao;Yiwen Dong;Junbo Zhao;Runze Wu;Minmin Lin;Gang Chen;Haobo Wang", "authorids": "~Ruixuan_Xiao1;~Yiwen_Dong2;~Junbo_Zhao1;~Runze_Wu1;~Minmin_Lin1;~Gang_Chen6;~Haobo_Wang1", "gender": "M;F;M;M;F;M;M", "homepage": "https://github.com/Justherozen;https://github.com/Ace424;http://jakezhao.net/;https://wu-runze.github.io/;https://scholar.google.co.jp/citations?user=HhRZ0gEAAAAJ&hl=zh-CN;;https://hbzju.github.io/", "dblp": "312/5605;274/6496-3;191/6665;;;67/6383-1;", "google_scholar": "OLQeOJgAAAAJ;;8ipao8MAAAAJ;8Uxbo9AAAAAJ;https://scholar.google.co.jp/citations?user=HhRZ0gEAAAAJ;;DnN-rggAAAAJ", "or_profile": "~Ruixuan_Xiao1;~Yiwen_Dong2;~Junbo_Zhao1;~Runze_Wu1;~Minmin_Lin1;~Gang_Chen6;~Haobo_Wang1", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;NetEase Corp;NetEase, Inc.;College of Computer Science and Technology, Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;netease.com;netease.com;cs.zju.edu.cn;zju.edu.cn", "position": "MS student;MS student;Assistant Professor;Principal Researcher;Postdoc;Full Professor;PhD student", "bibtex": "@inproceedings{\nxiao2023freeal,\ntitle={Free{AL}: Towards Human-Free Active Learning in the Era of Large Language Models},\nauthor={Ruixuan Xiao and Yiwen Dong and Junbo Zhao and Runze Wu and Minmin Lin and Gang Chen and Haobo Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=56UYArtXyA}\n}", "github": "", "project": "", "reviewers": "3RFu;3J9a;7Urc", "site": "https://openreview.net/forum?id=56UYArtXyA", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "4;5;3", "reproducibility": "4;4;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-6986-5825;;0000-0002-7483-0045;0000-0001-8586-3048", "linkedin": ";;;;;;", "aff_unique_index": "0;0;0;1;2;0;0", "aff_unique_norm": "Zhejiang University;NetEase Corporation;NetEase, Inc.", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;https://www.163.com;https://www.163.com", "aff_unique_abbr": "ZJU;NetEase;NetEase", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "57yfvVESPE", "title": "Tunable Soft Prompts are Messengers in Federated Learning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Federated learning (FL) enables multiple participants to collaboratively train machine learning models using decentralized data sources, alleviating privacy concerns that arise from directly sharing local data. However, the lack of model privacy protection in FL becomes an unneglectable challenge, especially when people want to federally finetune models based on a proprietary large language model. In this study, we propose a novel FL training approach that accomplishes information exchange among participants via tunable soft prompts. These soft prompts, updated and transmitted between the server and clients, assume the role of the global model parameters and serve as messengers to deliver useful knowledge from the local data and global model. As the global model itself is not required to be shared and the local training is conducted based on an auxiliary model with fewer parameters than the global model, the proposed approach provides protection for the global model while reducing communication and computation costs in FL. Extensive experiments show the effectiveness of the proposed approach compared to several baselines. We have released the source code at https://github.com/alibaba/FederatedScope/tree/fedsp/federatedscope/nlp/fedsp.", "keywords": "federated learning", "primary_area": "", "supplementary_material": "", "author": "Chenhe Dong;Yuexiang Xie;Bolin Ding;Ying Shen;Yaliang Li", "authorids": "~Chenhe_Dong1;~Yuexiang_Xie1;~Bolin_Ding3;~Ying_Shen1;~Yaliang_Li1", "gender": ";M;M;F;M", "homepage": ";https://xieyxclack.github.io/;https://bolinding.github.io/;http://ise.sysu.edu.cn/teacher/teacher02/1371452.htm;https://sites.google.com/site/yaliangli/", "dblp": "254/8252;232/2045;46/3522.html;01/8558-1;https://dblp.org/pers/hd/l/Li:Yaliang", "google_scholar": "iDp0iYkAAAAJ;https://scholar.google.com/citations?hl=zh-CN;AjYkTi8AAAAJ;rVpl7SIAAAAJ;CCPBcdYAAAAJ", "or_profile": "~Chenhe_Dong1;~Yuexiang_Xie1;~Bolin_Ding3;~Ying_Shen1;~Yaliang_Li1", "aff": "Sun Yat-sen University;Alibaba Group;Alibaba Group;SUN YAT-SEN UNIVERSITY, Tsinghua University;Alibaba Group", "aff_domain": "sysu.edu.cn;alibaba-inc.com;alibaba-inc.com;sysu.edu.cn;alibaba-inc.com", "position": "MS student;Staff;Senior Director;Associate Professor;Staff Engineer", "bibtex": "@inproceedings{\ndong2023tunable,\ntitle={Tunable Soft Prompts are Messengers in Federated Learning},\nauthor={Chenhe Dong and Yuexiang Xie and Bolin Ding and Ying Shen and Yaliang Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=57yfvVESPE}\n}", "github": "", "project": "", "reviewers": "d21D;Mx16;4Hpf", "site": "https://openreview.net/forum?id=57yfvVESPE", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;4;3", "excitement": "3;3;2", "reproducibility": "5;5;4", "correctness": "3;3;3", "rating_avg": 2.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.666666666666667, "correctness_avg": 3.0, "replies_avg": 12, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-2211-5138;0009-0005-6545-7882;;0000-0002-3220-904X;0000-0002-4204-6096", "linkedin": ";;bolin-ding-50a0119/;;", "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "Sun Yat-sen University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "http://www.sysu.edu.cn/;https://www.alibaba.com", "aff_unique_abbr": "SYSU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "58jpJdPgKi", "title": "Representation Projection Invariance Mitigates Representation Collapse", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Fine-tuning contextualized representations learned by pre-trained language models remains a prevalent practice in NLP. However, fine-tuning can lead to representation degradation (also known as representation collapse), which may result in instability, sub-optimal performance, and weak generalization. In this paper, we propose Representation Projection Invariance (REPINA), a novel regularization method to maintain the information content of representation and reduce representation collapse during fine-tuning by discouraging undesirable changes in the representations. We study the empirical behavior of the proposed regularization in comparison to 5 comparable baselines across 13 language understanding tasks (GLUE benchmark and six additional datasets). When evaluating in-domain performance, REPINA consistently outperforms other baselines on most tasks (10 out of 13). Additionally, REPINA improves out-of-distribution performance. We also demonstrate its effectiveness in few-shot settings and robustness to label perturbation. As a by-product, we extend previous studies of representation collapse and propose several metrics to quantify it. Our empirical findings show that our approach is significantly more effective at mitigating representation collapse.", "keywords": "representation learning;generalization;representation collapse", "primary_area": "", "supplementary_material": "", "author": "Anastasia Razdaibiedina;Ashish Khetan;Zohar Karnin;Daniel Khashabi;Vivek Madan", "authorids": "~Anastasia_Razdaibiedina1;~Ashish_Khetan1;~Zohar_Karnin1;~Daniel_Khashabi2;~Vivek_Madan2", "gender": "F;M;;M;M", "homepage": "https://ca.linkedin.com/in/anastasia-razdaibiedina-438929197;http://khetan2.web.engr.illinois.edu/;;http://danielkhashabi.com/;", "dblp": "251/9666;175/1775;16/4051;71/10515;52/11466.html", "google_scholar": "1whPOfwAAAAJ;AaauqDAAAAAJ;;pK2kQvgAAAAJ;", "or_profile": "~Anastasia_Razdaibiedina1;~Ashish_Khetan1;~Zohar_Karnin1;~Daniel_Khashabi2;~Vivek_Madan2", "aff": "Toronto University;Amazon;Amazon;Johns Hopkins University;Amazon", "aff_domain": "utoronto.ca;amazon.com;amazon.com;jhu.edu;amazon.com", "position": "PhD student;Applied Scientist;Principal Researcher;Assistant Professor;Scientist", "bibtex": "@inproceedings{\nrazdaibiedina2023representation,\ntitle={Representation Projection Invariance Mitigates Representation Collapse},\nauthor={Anastasia Razdaibiedina and Ashish Khetan and Zohar Karnin and Daniel Khashabi and Vivek Madan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=58jpJdPgKi}\n}", "github": "", "project": "", "reviewers": "1NmN;DTwA;4e8F;E5eQ;HLED", "site": "https://openreview.net/forum?id=58jpJdPgKi", "pdf_size": 0, "rating": "2;2;2;2;2", "confidence": "2;3;4;4;4", "excitement": "3;4;3;3;3", "reproducibility": "4;4;3;5;4", "correctness": "4;3;2;4;3", "rating_avg": 2.0, "confidence_avg": 3.4, "excitement_avg": 3.2, "reproducibility_avg": 4.0, "correctness_avg": 3.2, "replies_avg": 7, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";ashishkhetan09/;;;", "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "University of Toronto;Amazon;Johns Hopkins University", "aff_unique_dep": ";Amazon.com, Inc.;", "aff_unique_url": "https://www.utoronto.ca;https://www.amazon.com;https://www.jhu.edu", "aff_unique_abbr": "U of T;Amazon;JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "Canada;United States" }, { "id": "59gI2XQPmH", "title": "Alignment Precedes Fusion: Open-Vocabulary Named Entity Recognition as Context-Type Semantic Matching", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Despite the significant progress in developing named entity recognition models, scaling to novel-emerging types still remains challenging in real-world scenarios. Continual learning and zero-shot learning approaches have been explored to handle novel-emerging types with less human supervision, but they have not been as successfully adopted as supervised approaches. Meanwhile, humans possess a much larger vocabulary size than these approaches and have the ability to learn the alignment between entities and concepts effortlessly through natural supervision. In this paper, we consider a more realistic and challenging setting called open-vocabulary named entity recognition (OVNER) to imitate human-level ability. OVNER aims to recognize entities in novel types by their textual names or descriptions. Specifically, we formulate OVNER as a semantic matching task and propose a novel and scalable two-stage method called Context-Type SemAntiC Alignment and FusiOn (CACAO). In the pre-training stage, we adopt Dual-Encoder for context-type semantic alignment and pre-train Dual-Encoder on 80M context-type pairs which are easily accessible through natural supervision. In the fine-tuning stage, we use Cross-Encoder for context-type semantic fusion and fine-tune Cross-Encoder on base types with human supervision. Experimental results show that our method outperforms the previous state-of-the-art methods on three challenging OVNER benchmarks by 9.7%, 9.5%, and 1.8% F1-score of novel types. Moreover, CACAO also demonstrates its flexible transfer ability in cross-domain NER.", "keywords": "Open-Vocabulary;Named Entity Recognition", "primary_area": "", "supplementary_material": "", "author": "Zhuoran Jin;Pengfei Cao;Zhitao He;Yubo Chen;Kang Liu;Jun Zhao", "authorids": "~Zhuoran_Jin1;~Pengfei_Cao1;~Zhitao_He1;~Yubo_Chen1;~Kang_Liu1;~Jun_Zhao4", "gender": "M;;M;M;M;M", "homepage": "https://scholar.google.com/citations?user=Am8WsCkAAAAJ;https://cpf-nlpr.github.io/;;http://www.nlpr.ia.ac.cn/cip/yubochen/index.html;http://www.nlpr.ia.ac.cn/cip/~liukang/index.html;http://nlpr-web.ia.ac.cn/cip/english/~junzhao/index.html", "dblp": "320/9888;182/7941;;https://dblp.uni-trier.de/pid/90/7879.html;42/4903.html;https://dblp.uni-trier.de/pid/47/2026-1.html", "google_scholar": "Am8WsCkAAAAJ;lP5_LJIAAAAJ;ULvoYXgAAAAJ;https://scholar.google.com.hk/citations?user=9z7GPxIAAAAJ;DtZCfl0AAAAJ;https://scholar.google.com.hk/citations?user=HljRttwAAAAJ", "or_profile": "~Zhuoran_Jin1;~Pengfei_Cao1;~Zhitao_He1;~Yubo_Chen1;~Kang_Liu1;~Jun_Zhao4", "aff": "Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of automation, Chinese academy of science;Institute of Automation, Chinese Academy of Sciences;Institute of automation, Chinese academy of science", "aff_domain": "nlpr.ia.ac.cn;ia.ac.cn;ia.cas.cn;nlpr.ia.ac.cn;ia.ac.cn;nlpr.ia.ac.cn", "position": "PhD student;PhD student;MS student;Associate Professor;Professor;Full Professor", "bibtex": "@inproceedings{\njin2023alignment,\ntitle={Alignment Precedes Fusion: Open-Vocabulary Named Entity Recognition as Context-Type Semantic Matching},\nauthor={Zhuoran Jin and Pengfei Cao and Zhitao He and Yubo Chen and Kang Liu and Jun Zhao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=59gI2XQPmH}\n}", "github": "", "project": "", "reviewers": "79yH;3cyn;tqWV", "site": "https://openreview.net/forum?id=59gI2XQPmH", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;3", "excitement": "3;3;4", "reproducibility": "3;4;3", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0009-0003-3317-1260;;;", "linkedin": ";;;;;", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation", "aff_unique_url": "http://www.ia.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "5Az3d5TkMJ", "title": "LIMIT: Language Identification, Misidentification, and Translation using Hierarchical Models in 350+ Languages", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Knowing the language of an input text/audio is a necessary first step for using almost every NLP tool such as taggers, parsers, or translation systems. Language identification is a well-studied problem, sometimes even considered solved; in reality, due to lack of data and computational challenges, current systems cannot accurately identify most of the world's 7000 languages. To tackle this bottleneck, we first compile a corpus, MCS-350, of 50K multilingual and parallel children's stories in 350+ languages. MCS-350 can serve as a benchmark for language identification of short texts and for 1400+ new translation directions in low-resource Indian and African languages. Second, we propose a novel misprediction-resolution hierarchical model, LIMIT, for language identification that reduces error by 55% (from 0.71 to 0.32) on our compiled children's stories dataset and by 40% (from 0.23 to 0.14) on the FLORES-200 benchmark. Our method can expand language identification coverage into low-resource languages by relying solely on systemic misprediction patterns, bypassing the need to retrain large models from scratch.", "keywords": "language identification;resource creation;machine translation;low-resource languages", "primary_area": "", "supplementary_material": "", "author": "Milind Agarwal;Md Mahfuz Ibn Alam;Antonios Anastasopoulos", "authorids": "~Milind_Agarwal1;~Md_Mahfuz_Ibn_Alam1;~Antonios_Anastasopoulos1", "gender": "M;M;M", "homepage": "https://milind-agarwal.github.io/;https://mahfuzibnalam.github.io/;http://www.cs.gmu.edu/~antonis/", "dblp": "280/9306;281/0378;148/9479", "google_scholar": "eRaPEZ0AAAAJ;6khjEYoAAAAJ;g_G_SNAAAAAJ", "or_profile": "~Milind_Agarwal1;~Md_Mahfuz_Ibn_Alam1;~Antonios_Anastasopoulos1", "aff": "George Mason University;George Mason University;George Mason University", "aff_domain": "gmu.edu;gmu.edu;gmu.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nagarwal2023limit,\ntitle={{LIMIT}: Language Identification, Misidentification, and Translation using Hierarchical Models in 350+ Languages},\nauthor={Milind Agarwal and Md Mahfuz Ibn Alam and Antonios Anastasopoulos},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=5Az3d5TkMJ}\n}", "github": "", "project": "", "reviewers": "g5ek;Ri3q;BgTR", "site": "https://openreview.net/forum?id=5Az3d5TkMJ", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;5;3", "excitement": "4;3;4", "reproducibility": "4;4;5", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-8544-246X", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "George Mason University", "aff_unique_dep": "", "aff_unique_url": "https://www.gmu.edu", "aff_unique_abbr": "GMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "5BWvVIa5Uz", "title": "Emergent Inabilities? Inverse Scaling Over the Course of Pretraining", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Does inverse scaling only occur as a function of model size, or can it also occur over the course of training? We carry out an exploratory study investigating whether the performance of language models on specific tasks can decrease (while general performance remains high) during training on the language modeling task. We find 8 tasks on which Pythia 12B (Biderman et al., 2023) shows decreased performance over the course of training. Five of these tasks (TruthfulQA-MC1, TruthfulQA-MC2, Hindsight Neglect, Memo Trap, and Pattern Match Suppression) additionally show a consistent relationship whereby larger language models show a greater decrease in performance the more they are trained, despite showing standard (positive) scaling overall. This highlights the importance of testing performance at all relevant benchmarks any time models are trained on additional data, even if their overall performance improves.", "keywords": "language models;inverse scaling;transformers;training dynamics", "primary_area": "", "supplementary_material": "", "author": "James Michaelov;Ben Bergen", "authorids": "~James_Michaelov1;~Ben_Bergen1", "gender": "M;M", "homepage": "https://jmichaelov.com/;https://cogsci.ucsd.edu/~bkbergen/", "dblp": "276/5493;12/3783-1.html", "google_scholar": "https://scholar.google.co.uk/citations?user=_Urm8X4AAAAJ;pJ8u7AQAAAAJ", "or_profile": "~James_Michaelov1;~Benjamin_Bergen1", "aff": "University of California, San Diego;University of California, San Diego", "aff_domain": "ucsd.edu;ucsd.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nmichaelov2023emergent,\ntitle={Emergent Inabilities? Inverse Scaling Over the Course of Pretraining},\nauthor={James Michaelov and Ben Bergen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=5BWvVIa5Uz}\n}", "github": "", "project": "", "reviewers": "7HVU;qpm9;K1CC", "site": "https://openreview.net/forum?id=5BWvVIa5Uz", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "2;4;4", "reproducibility": "5;4;5", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.666666666666667, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-2913-1103;0000-0002-9395-9151", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "5DUhBxRqKR", "title": "Drilling Down into the Discourse Structure with LLMs for Long Document Question Answering", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We address the task of evidence retrieval for long document question answering, which involves locating relevant paragraphs within a document to answer a question. We aim to assess the applicability of large language models (LLMs) in the task of zero-shot long document evidence retrieval, owing to their unprecedented performance across various NLP tasks. However, currently the LLMs can consume limited context lengths as input, thus providing document chunks as inputs might overlook the global context while missing out on capturing the inter-segment dependencies. Moreover, directly feeding the large input sets can incur significant computational costs, particularly when processing the entire document (and potentially incurring monetary expenses with enterprise APIs like OpenAI's GPT variants). To address these challenges, we propose a suite of techniques that exploit the discourse structure commonly found in documents. By utilizing this structure, we create a condensed representation of the document, enabling a more comprehensive understanding and analysis of relationships between different parts. We retain $99.6$% of the best zero-shot approach's performance, while processing only $26$% of the total tokens used by the best approach in the information seeking evidence retrieval setup. We also show how our approach can be combined with *self-ask* reasoning agent to achieve best zero-shot performance in complex multi-hop question answering, just $\\approx 4$% short of zero-shot performance using gold evidence.", "keywords": "Long Document Question Answering;Large Language Model;Zero-shot Prompting;Evidence Retrieval", "primary_area": "", "supplementary_material": "", "author": "Inderjeet Jayakumar Nair;Shwetha S;Apoorv Saxena;Koustava Goswami", "authorids": "~Inderjeet_Jayakumar_Nair1;~Shwetha_S1;~Apoorv_Saxena1;~Koustava_Goswami1", "gender": "M;F;M;M", "homepage": "https://inderjeetnair.github.io/;;;https://apoorvumang.github.io", "dblp": "308/3471;361/7441;266/1147;225/2859", "google_scholar": "_C4CkDEAAAAJ;;cGrM2NQAAAAJ;", "or_profile": "~Inderjeet_Jayakumar_Nair1;~Shwetha_S1;~Koustava_Goswami1;~Apoorv_Umang_Saxena1", "aff": "Adobe Systems;Adobe Systems;Insight Centre for Data Analytics;Adobe Systems", "aff_domain": "adobe.com;adobe.com;insight-centre.org;adobe.com", "position": "Researcher;Researcher;PhD student;Researcher", "bibtex": "@inproceedings{\nnair2023drilling,\ntitle={Drilling Down into the Discourse Structure with {LLM}s for Long Document Question Answering},\nauthor={Inderjeet Jayakumar Nair and Shwetha S and Apoorv Saxena and Koustava Goswami},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=5DUhBxRqKR}\n}", "github": "", "project": "", "reviewers": "nCeH;yp1i;SsPB;4fXv", "site": "https://openreview.net/forum?id=5DUhBxRqKR", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;4;4;4", "excitement": "4;3;3;4", "reproducibility": "4;4;3;4", "correctness": "3;3;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.5, "reproducibility_avg": 3.75, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "inderjeet-nair-145b62177/;shwetha-s-0312/;koustava-goswami-0952a3116;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Adobe;Insight Centre for Data Analytics", "aff_unique_dep": "Adobe Systems Incorporated;", "aff_unique_url": "https://www.adobe.com;https://insight-centre.org", "aff_unique_abbr": "Adobe;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;Ireland" }, { "id": "5EHI2FGf1D", "title": "Unsupervised Binary Code Translation with Application to Code Clone Detection and Vulnerability Discovery", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Binary code analysis has immense importance in the research domain of software security. Today, software is very often compiled for various Instruction Set Architectures (ISAs). As a result, cross-architecture binary code analysis has become an emerging problem. Recently, deep learning-based binary analysis has shown promising success. It is widely known that training a deep learning model requires a massive amount of data. However, for some low-resource ISAs, an adequate amount of data is hard to find, preventing deep learning from being widely adopted for binary analysis. To overcome the data scarcity problem and facilitate cross-architecture binary code analysis, we propose to apply the ideas and techniques in Neural Machine Translation (NMT) to binary code analysis. Our insight is that a binary, after disassembly, is represented in some assembly language. Given a binary in a low-resource ISA, we translate it to a binary in a high-resource ISA (e.g., x86). Then we can use a model that has been trained on the high-resource ISA to test the translated binary. We have implemented the model called UNSUPERBINTRANS, and conducted experiments to evaluate its performance. Specifically, we conducted two downstream tasks, including code similarity detection and vulnerability discovery. In both tasks, we achieved high accuracies.", "keywords": "NLP;Neural Machine Translation;Binary Code Analysis;Vulnerability Discovery;Code Clone Detection", "primary_area": "", "supplementary_material": "", "author": "Iftakhar Ahmad;Lannan Luo", "authorids": "~Iftakhar_Ahmad1;~Lannan_Luo3", "gender": "M;F", "homepage": ";https://lannan.github.io", "dblp": "184/6560;153/5297", "google_scholar": "I8ePaCwAAAAJ;https://scholar.google.com.tw/citations?user=JPXjw04AAAAJ", "or_profile": "~Iftakhar_Ahmad1;~Lannan_Luo3", "aff": "University of South Carolina;George Mason University", "aff_domain": "sc.edu;gmu.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nahmad2023unsupervised,\ntitle={Unsupervised Binary Code Translation with Application to Code Clone Detection and Vulnerability Discovery},\nauthor={Iftakhar Ahmad and Lannan Luo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=5EHI2FGf1D}\n}", "github": "", "project": "", "reviewers": "qjhC;SuGi;6b4d", "site": "https://openreview.net/forum?id=5EHI2FGf1D", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;4;3", "excitement": "3;4;2", "reproducibility": "3;4;3", "correctness": "3;4;2", "rating_avg": 2.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": "iftakharahmad/;", "aff_unique_index": "0;1", "aff_unique_norm": "University of South Carolina;George Mason University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sc.edu;https://www.gmu.edu", "aff_unique_abbr": "USC;GMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "5IFMe8TuSy", "title": "Exploring Jiu-Jitsu Argumentation for Writing Peer Review Rebuttals", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In many domains of argumentation, people\u2019s arguments are driven by so-called attitude roots, i.e., underlying beliefs and world views, and their corresponding attitude themes. Given the strength of these latent drivers of arguments, recent work in psychology suggests that instead of directly countering surface-level reasoning (e.g., falsifying the premises), one should follow an argumentation style inspired by the Jiu-Jitsu ``soft'' combat system: first, identify an arguer's attitude roots and themes, and then choose a prototypical rebuttal that is aligned with those drivers instead of trying to invalidate those. In this work, we are the first to explore Jiu-Jitsu argumentation for peer reviews by proposing the novel task of attitude and theme-guided rebuttal generation. To this end, we enrich an existing dataset for discourse structure in peer reviews with attitude roots, attitude themes, and canonical rebuttals. To facilitate this process, we recast established annotation concepts from the domain of peer reviews (e.g., aspects a review sentence is relating to) and train domain-specific models. We then propose strong rebuttal generation strategies, which we benchmark on our novel dataset for the task of end-to-end attitude and theme-guided rebuttal generation and two subtasks.", "keywords": "Peer Reviews;Rebuttals;Attitude Roots;Jiu-Jitsu Persuasion", "primary_area": "", "supplementary_material": "", "author": "Sukannya Purkayastha;Anne Lauscher;Iryna Gurevych", "authorids": "~Sukannya_Purkayastha1;~Anne_Lauscher1;~Iryna_Gurevych1", "gender": "F;;", "homepage": ";;", "dblp": "255/8545;209/6857;", "google_scholar": "SAhTZJIAAAAJ;https://scholar.google.it/citations?user=IbJS3UEAAAAJ;", "or_profile": "~Sukannya_Purkayastha1;~Anne_Lauscher1;~Iryna_Gurevych1", "aff": "Technische Universit\u00e4t Darmstadt;Universit\u00e4t Hamburg;", "aff_domain": "tu-darmstadt.de;uni-hamburg.de;", "position": "PhD student;Associate Professor;", "bibtex": "@inproceedings{\npurkayastha2023exploring,\ntitle={Exploring Jiu-Jitsu Argumentation for Writing Peer Review Rebuttals},\nauthor={Sukannya Purkayastha and Anne Lauscher and Iryna Gurevych},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=5IFMe8TuSy}\n}", "github": "", "project": "", "reviewers": "b1L5;7PTc;5iUR", "site": "https://openreview.net/forum?id=5IFMe8TuSy", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "sukannya-purkayastha-5144a3118/;;", "aff_unique_index": "0;1", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt;University of Hamburg", "aff_unique_dep": ";", "aff_unique_url": "https://www.tu-darmstadt.de;https://www.uni-hamburg.de", "aff_unique_abbr": "TUD;UHH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "5K2fiOlcGG", "title": "Sparse Frame Grouping Network with Action Centered for Untrimmed Video Paragraph Captioning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Generating paragraph captions for untrimmed videos without event annotations is challenging, especially when aiming to enhance precision and minimize repetition at the same time. To address this challenge, we propose a module called Sparse Frame Grouping (SFG). It dynamically groups event information with the help of action information for the entire video and excludes redundant frames within pre-defined clips. To enhance the performance, an Intra Contrastive Learning technique is designed to align the SFG module with the core event content in the paragraph, and an Inter Contrastive Learning technique is employed to learn action-guided context with reduced static noise simultaneously. Extensive experiments are conducted on two benchmark datasets (ActivityNet Captions and YouCook2). Results demonstrate that SFG outperforms the state-of-the-art methods on all metrics.", "keywords": "video paragraph captioning;transformer;grouping;action centered;contrastive learning", "primary_area": "", "supplementary_material": "", "author": "Guorui Yu;Yimin Hu;Yuejie Zhang;Rui Feng;Tao Zhang;Shang Gao", "authorids": "~Guorui_Yu1;~Yimin_Hu1;~Yuejie_Zhang2;~Rui_Feng2;~Tao_Zhang11;~Shang_Gao5", "gender": "F;F;F;;M;F", "homepage": "https://github.com/GuoruiYuh;;http://www.cs.fudan.edu.cn/?page_id=5518;;https://sime.sufe.edu.cn/teacher/show/33/main.psp;https://experts.deakin.edu.au/719-shang-gao/about", "dblp": ";;09/5786;;15/4777-22;28/435-3", "google_scholar": ";;;;;https://scholar.google.com.au/citations?user=lkgneeAAAAAJ", "or_profile": "~Guorui_Yu1;~Yimin_Hu1;~Yuejie_Zhang2;~Rui_Feng2;~Tao_Zhang11;~Shang_Gao5", "aff": "Fudan University;Fudan University;Fudan University;;Shanghai University of Finance and Economics;Deakin University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;;shufe.edu.cn;deakin.edu.au", "position": "MS student;MS student;Full Professor;;Full Professor;senior lecturer", "bibtex": "@inproceedings{\nyu2023sparse,\ntitle={Sparse Frame Grouping Network with Action Centered for Untrimmed Video Paragraph Captioning},\nauthor={Guorui Yu and Yimin Hu and Yuejie Zhang and Rui Feng and Tao Zhang and Shang Gao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=5K2fiOlcGG}\n}", "github": "", "project": "", "reviewers": "UFoE;UvCg;71yn;tkpR", "site": "https://openreview.net/forum?id=5K2fiOlcGG", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "5;3;3;4", "excitement": "2;4;4;3", "reproducibility": "3;4;4;3", "correctness": "2;3;4;3", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 3.25, "reproducibility_avg": 3.5, "correctness_avg": 3.0, "replies_avg": 13, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-7993-7223;;0000-0001-7561-0143;0000-0002-2947-7780", "linkedin": ";yimin-hu-564019185;;;;shang-gao-4a633a5/", "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Fudan University;Shanghai University of Finance and Economics;Deakin University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.fudan.edu.cn;http://www.sufe.edu.cn;https://www.deakin.edu.au", "aff_unique_abbr": "Fudan;SUFE;Deakin", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;Australia" }, { "id": "5NMl0TYLey", "title": "InfoCL: Alleviating Catastrophic Forgetting in Continual Text Classification from An Information Theoretic Perspective", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Continual learning (CL) aims to constantly learn new knowledge over time while avoiding catastrophic forgetting on old tasks.\nWe focus on continual text classification under the class-incremental setting.\nRecent CL studies have identified the severe performance decrease on analogous classes as a key factor for catastrophic forgetting.\nIn this paper, through an in-depth exploration of the representation learning process in CL, we discover that the compression effect of the information bottleneck leads to confusion on analogous classes.\nTo enable the model learn more sufficient representations, we propose a novel replay-based continual text classification method, InfoCL.\nOur approach utilizes fast-slow and current-past contrastive learning to perform mutual information maximization and better recover the previously learned representations. \nIn addition, InfoCL incorporates an adversarial memory augmentation strategy to alleviate the overfitting problem of replay.\nExperimental results demonstrate that InfoCL effectively mitigates forgetting and achieves state-of-the-art performance on three text classification tasks.", "keywords": "continual learning;text classification", "primary_area": "", "supplementary_material": "", "author": "Yifan Song;Peiyi Wang;Weimin Xiong;Dawei Zhu;Tianyu Liu;Zhifang Sui;Sujian Li", "authorids": "~Yifan_Song2;~Peiyi_Wang1;~Weimin_Xiong1;~Dawei_Zhu2;~Tianyu_Liu3;~Zhifang_Sui1;~Sujian_Li1", "gender": "M;M;M;;M;F;F", "homepage": "https://yifan-song793.github.io/;;https://github.com/WeiminXiong;;;http://eecs.pku.edu.cn/EN/People/Faculty/Detail/?ID=6024;https://pku-tangent.github.io/", "dblp": ";236/6569.html;342/9246;;134/1099-1;;05/4288", "google_scholar": ";K0uQ3ygAAAAJ;UwYq5tgAAAAJ;oD2HPaYAAAAJ;https://scholar.google.com.hk/citations?user=6hHbBwwAAAAJ;;https://scholar.google.com.tw/citations?user=RvBDhSwAAAAJ", "or_profile": "~Yifan_Song2;~Peiyi_Wang1;~Weimin_Xiong1;~Dawei_Zhu2;~Tianyu_Liu3;~Zhifang_Sui1;~Sujian_Li1", "aff": "Peking University;Peking University;Peking University;Peking University;Tencent Cloud AI (LLM);Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;tencent.com;pku.edu.cn;pku.edu.cn", "position": "PhD student;PhD student;Undergrad student;PhD student;Senior Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nsong2023infocl,\ntitle={Info{CL}: Alleviating Catastrophic Forgetting in Continual Text Classification from An Information Theoretic Perspective},\nauthor={Yifan Song and Peiyi Wang and Weimin Xiong and Dawei Zhu and Tianyu Liu and Zhifang Sui and Sujian Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=5NMl0TYLey}\n}", "github": "", "project": "", "reviewers": "JheW;AarQ;EZuw", "site": "https://openreview.net/forum?id=5NMl0TYLey", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;3;3", "excitement": "3;3;3", "reproducibility": "4;2;3", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 9, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;", "linkedin": ";;;;;;", "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "Peking University;Tencent", "aff_unique_dep": ";LLM", "aff_unique_url": "http://www.pku.edu.cn;https://cloud.tencent.com", "aff_unique_abbr": "Peking U;Tencent AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "5Ob6DsDv2V", "title": "A Comprehensive Evaluation of Biomedical Entity Linking Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Biomedical entity linking (BioEL) is the process of connecting entities referenced in documents to entries in biomedical databases such as the Unified Medical Language System (UMLS) or Medical Subject Headings (MeSH). The study objective was to comprehensively evaluate nine recent state-of-the-art biomedical entity linking models under a unified framework. We compare these models along axes of (1) accuracy, (2) speed, (3) ease of use, (4) generalization, and (5) adaptability to new ontologies and datasets. We additionally quantify the impact of various preprocessing choices such as abbreviation detection. Systematic evaluation reveals several notable gaps in current methods. In particular, current methods struggle to correctly link genes and proteins and often have difficulty effectively incorporating context into linking decisions. To expedite future development and baseline testing, we release our unified evaluation framework and all included models on GitHub at https://github.com/davidkartchner/biomedical-entity-linking", "keywords": "Entity Linking;Entity Normalization;Candidate Generation;Biomedical Natural Language Processing", "primary_area": "", "supplementary_material": "", "author": "David Kartchner;Jennifer Deng;Shubham Lohiya;Tejasri Kopparthi;Prasanth Bathala;Daniel Domingo-Fern\u00e1ndez;Cassie S. Mitchell", "authorids": "~David_Kartchner1;~Jennifer_Deng1;~Shubham_Lohiya1;~Tejasri_Kopparthi1;~Prasanth_Bathala1;~Daniel_Domingo-Fern\u00e1ndez1;~Cassie_S._Mitchell1", "gender": "M;F;M;F;M;M;F", "homepage": "https://davidkartchner.com/;;https://shubhlohiya.github.io/;https://www.linkedin.com/in/tejasri-kopparthi-7483a9132/;;;", "dblp": ";;;;;;222/3800", "google_scholar": "di5ncfUAAAAJ;;UEZIZVcAAAAJ;;87tlD5wAAAAJ;U87tAVgAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~David_Kartchner1;~Jennifer_Deng1;~Shubham_Lohiya1;~Tejasri_Kopparthi1;~Prasanth_Bathala1;~Daniel_Domingo-Fern\u00e1ndez1;~Cassie_S._Mitchell1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;;Georgia Institute of Technology;Enveda;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;gatech.edu;;gatech.edu;uni-bonn.de;gatech.edu", "position": "PhD student;Undergrad student;MS student;;MS student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nkartchner2023a,\ntitle={A Comprehensive Evaluation of Biomedical Entity Linking Models},\nauthor={David Kartchner and Jennifer Deng and Shubham Lohiya and Tejasri Kopparthi and Prasanth Bathala and Daniel Domingo-Fern{\\'a}ndez and Cassie S. Mitchell},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=5Ob6DsDv2V}\n}", "github": "", "project": "", "reviewers": "wM6u;LFDp;iWzH", "site": "https://openreview.net/forum?id=5Ob6DsDv2V", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;5;4", "excitement": "3;4;3", "reproducibility": "4;4;3", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 9, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-1937-1840;;0009-0002-7703-0462;;0009-0003-4457-5412;;", "linkedin": "david-s-kartchner/;jennifer-deng-316359185/;lohiya-shubham/;tejasri-kopparthi-7483a9132/;prasanthbathala/;;", "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Georgia Institute of Technology;Enveda", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;", "aff_unique_abbr": "Georgia Tech;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States;" }, { "id": "5PvFFNRTbp", "title": "A Frustratingly Easy Post-Training Quantization Scheme for LLMs", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Efficient inference has become crucial for hyper-scale AI models, including large language models, as their parameter count continues to increase for enhanced performance.\nThis necessity holds true regardless of the computing environment, whether it be mobile devices or cloud servers.\nQuantization emerges as a solution to alleviate the computational burden during inference.\nBy representing models with a reduced bit-width, quantization minimizes the frequency of DRAM access while fully exploiting the parallelism of operations through a dense matrix format.\nConsequently, quantized models achieve low end-to-end latency and optimize resource utilization by addressing both memory and computing bottlenecks.\nIn this paper, we propose a straightforward post-training quantization scheme, called \\textsc{Z-Fold}, that fully utilizes the feature of the Transformer structure widely employed in large language models.", "keywords": "Quantization;Efficient LLM;Model Compression", "primary_area": "", "supplementary_material": "", "author": "Yongkweon Jeon;Chungman Lee;Kyungphil Park;Ho-young Kim", "authorids": "~Yongkweon_Jeon1;~Chungman_Lee1;~Kyungphil_Park1;~Ho-young_Kim1", "gender": ";;;", "homepage": ";;;", "dblp": ";245/8270;;", "google_scholar": ";https://scholar.google.co.kr/citations?user=cyYuNF8AAAAJ;;", "or_profile": "~Yongkweon_Jeon1;~Chungman_Lee1;~Kyungphil_Park1;~Ho-young_Kim1", "aff": ";Samsung Research;Samsung Research;Samsung Electronics", "aff_domain": ";samsung.com;samsung.com;samsung.com", "position": ";Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\njeon2023a,\ntitle={A Frustratingly Easy Post-Training Quantization Scheme for {LLM}s},\nauthor={Yongkweon Jeon and Chungman Lee and Kyungphil Park and Ho-young Kim},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=5PvFFNRTbp}\n}", "github": "", "project": "", "reviewers": "UHNK;GYyE;XxAx", "site": "https://openreview.net/forum?id=5PvFFNRTbp", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;2;4", "excitement": "3;3;3", "reproducibility": "4;4;3", "correctness": "4;4;2", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-1843-4627;;", "linkedin": ";;kyung-phil-park-7b5a2a162/;ho-young-kim-b406301bb", "aff_unique_index": "0;0;0", "aff_unique_norm": "Samsung", "aff_unique_dep": "Samsung Research", "aff_unique_url": "https://research.samsung.com", "aff_unique_abbr": "Samsung", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "5QNpjtdjD8", "title": "Exploring the Boundaries of GPT-4 in Radiology", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The recent success of general-domain large language models (LLMs) has significantly changed the natural language processing paradigm towards a unified foundation model across domains and applications. In this paper, we focus on assessing the performance of GPT-4, the most capable LLM so far, on the text-based applications for radiology reports, comparing against state-of-the-art (SOTA) radiology-specific models. Exploring various prompting strategies, we evaluated GPT-4 on a diverse range of common radiology tasks and we found GPT-4 either outperforms or is on par with current SOTA radiology models. With zero-shot prompting, GPT-4 already obtains substantial gains \n($\\approx$ 10\\% absolute improvement) over radiology models in temporal sentence similarity classification (accuracy) and natural language inference ($F_1$). For tasks that require learning dataset-specific style or schema (e.g. findings summarisation), GPT-4 improves with example-based prompting and matches supervised SOTA. Our extensive error analysis with a board-certified radiologist shows GPT-4 has a sufficient level of radiology knowledge with only occasional errors in complex context that require nuanced domain knowledge. For findings summarisation, GPT-4 outputs are found to be overall comparable with existing manually-written impressions.", "keywords": "benchmarking GPT-4;radiology;evaluation;large language model", "primary_area": "", "supplementary_material": "", "author": "Qianchu Liu;Stephanie Hyland;Shruthi Bannur;Kenza Bouzid;Daniel C. Castro;Maria Teodora Wetscherek;Robert Tinn;Harshita Sharma;Fernando P\u00e9rez-Garc\u00eda;Anton Schwaighofer;Pranav Rajpurkar;Sameer Tajdin Khanna;Hoifung Poon;Naoto Usuyama;Anja Thieme;Aditya V. Nori;Matthew P. Lungren;Ozan Oktay;Javier Alvarez-Valle", "authorids": "~Qianchu_Liu1;~Stephanie_Hyland1;~Shruthi_Bannur1;~Kenza_Bouzid1;~Daniel_C._Castro1;~Maria_Teodora_Wetscherek1;~Robert_Tinn1;~Harshita_Sharma1;~Fernando_P\u00e9rez-Garc\u00eda1;~Anton_Schwaighofer1;~Pranav_Rajpurkar1;~Sameer_Tajdin_Khanna1;~Hoifung_Poon1;~Naoto_Usuyama1;~Anja_Thieme1;~Aditya_V._Nori1;~Matthew_P._Lungren1;~Ozan_Oktay3;~Javier_Alvarez-Valle1", "gender": ";F;;;;F;;F;M;M;;;M;M;F;M;;;M", "homepage": "https://qianchu.github.io/;https://sthy.land;https://www.microsoft.com/en-us/research/people/shbannur/;;;;;https://www.drharshitasharma.com/;http://www.fepegar.com;;;;https://www.microsoft.com/en-us/research/people/hoifung/;https://www.microsoft.com/en-us/research/people/naotous/;https://www.designandwellbeing.com/;https://www.microsoft.com/en-us/research/people/adityan/;;https://www.microsoft.com/en-us/research/people/ozoktay/;", "dblp": "219/5575.html;170/0022;227/8068;;255/6967;;https://dblp.dagstuhl.de/pid/271/4302.html;166/4771;260/6785;21/4279;;;78/4609;154/3752;;n/AdityaVNori;;;281/7037", "google_scholar": "https://scholar.google.co.uk/citations?user=xkRPN6gAAAAJ;https://scholar.google.ch/citations?user=0xSZkHoAAAAJ;;;UT-RATkAAAAJ;https://scholar.google.co.uk/citations?user=GrXVUD8AAAAJ;;https://scholar.google.co.uk/citations?user=sTHDnuEAAAAJ;https://scholar.google.co.uk/;;;79PQ_bMAAAAJ;yqqmVbkAAAAJ;;UjbEYJQAAAAJ;qXTt3dUAAAAJ;z1UtMSYAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?user=ojoRDc4AAAAJ", "or_profile": "~Qianchu_Liu1;~Stephanie_Hyland1;~Shruthi_Bannur1;~Kenza_Bouzid1;~Daniel_C._Castro1;~Maria_Teodora_Wetscherek1;~Robert_Tinn1;~Harshita_Sharma1;~Fernando_P\u00e9rez-Garc\u00eda1;~Anton_Schwaighofer1;~Pranav_Rajpurkar1;~Sameer_Tajdin_Khanna1;~Hoifung_Poon1;~Naoto_Usuyama1;~Anja_Thieme1;~Aditya_V._Nori1;~Matthew_P._Lungren1;~Ozan_Oktay3;~Javier_Alvarez-Valle1", "aff": "Microsoft Research;Microsoft Research;Microsoft;Microsoft;Imperial College London;Cambridge University Hospitals NHS Foundation Trust;Microsoft;Microsoft;;Microsoft;;Fortinet;Microsoft;Microsoft;Microsoft;Microsoft Research;Microsoft;Microsoft Research;Microsoft", "aff_domain": "research.microsoft.com;microsoft.com;microsoft.com;microsoft.com;imperial.ac.uk;cuh.nhs.uk;microsoft.com;microsoft.com;;microsoft.com;;fortinet.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;research.microsoft.com;microsoft.com", "position": "Researcher;Researcher;Applied Researcher;Researcher;Honorary Research Fellow;Consultant Cardiothoracic Radiologist;Researcher;Researcher;;Researcher;;Researcher;General Manager;Researcher;Researcher;Researcher;Principal Researcher;Principal Researcher;Senior Director of Biomedical Imaging", "bibtex": "@inproceedings{\nliu2023exploring,\ntitle={Exploring the Boundaries of {GPT}-4 in Radiology},\nauthor={Qianchu Liu and Stephanie Hyland and Shruthi Bannur and Kenza Bouzid and Daniel C. Castro and Maria Teodora Wetscherek and Robert Tinn and Harshita Sharma and Fernando P{\\'e}rez-Garc{\\'\\i}a and Anton Schwaighofer and Pranav Rajpurkar and Sameer Tajdin Khanna and Hoifung Poon and Naoto Usuyama and Anja Thieme and Aditya V. Nori and Matthew P. Lungren and Ozan Oktay and Javier Alvarez-Valle},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=5QNpjtdjD8}\n}", "github": "", "project": "", "reviewers": "cPeU;Qzoi;rA88", "site": "https://openreview.net/forum?id=5QNpjtdjD8", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;5", "excitement": "4;4;3", "reproducibility": "3;4;5", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 19, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-5750-7628;;0000-0002-6829-7045;0000-0003-2924-7587;;0000-0003-4683-2606;0000-0001-9090-3024;;;;0000-0002-9067-0918;0000-0003-0888-929X;0000-0002-9639-5531;;;;0000-0003-0906-4177", "linkedin": ";;;kenza-bouzid/;;;robert-tinn/;sharmaharshita1/;fernandoperezgarcia/;;;sameer-khanna/;hoifung-poon-9559943/;;;adityanori/;;;javieralvarezvalle/", "aff_unique_index": "0;0;0;0;1;2;0;0;0;3;0;0;0;0;0;0;0", "aff_unique_norm": "Microsoft;Imperial College London;Cambridge University Hospitals NHS Foundation Trust;Fortinet", "aff_unique_dep": "Microsoft Research;;;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.imperial.ac.uk;https://www.cuh.nhs.uk;https://www.fortinet.com", "aff_unique_abbr": "MSR;ICL;;Fortinet", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0;0;1;1;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "5TEfD2GBUc", "title": "FANToM: A Benchmark for Stress-testing Machine Theory of Mind in Interactions", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Theory of mind (ToM) evaluations currently focus on testing models using passive narratives that inherently lack interactivity. We introduce FANToM, a new benchmark designed to stress-test ToM within information-asymmetric conversational contexts via question answering. Our benchmark draws upon important theoretical requisites from psychology and necessary empirical considerations when evaluating large language models (LLMs). In particular, we formulate multiple types of questions that demand the same underlying reasoning to identify illusory or false sense of ToM capabilities in LLMs. We show that FANToM is challenging for state-of-the-art LLMs, which perform significantly worse than humans even with chain-of-thought reasoning or fine-tuning.", "keywords": "theory of mind;benchmark;interaction;conversation;large language model;llm", "primary_area": "", "supplementary_material": "", "author": "Hyunwoo Kim;Melanie Sclar;Xuhui Zhou;Ronan Le Bras;Gunhee Kim;Yejin Choi;Maarten Sap", "authorids": "~Hyunwoo_Kim3;~Melanie_Sclar1;~Xuhui_Zhou1;~Ronan_Le_Bras1;~Gunhee_Kim1;~Yejin_Choi1;~Maarten_Sap1", "gender": "M;F;M;M;M;F;M", "homepage": "http://hyunwookim.com;https://msclar.github.io;https://xuhuizhou.github.io/;https://rlebras.github.io/index.html;http://vision.snu.ac.kr/gunhee/;https://yejinc.github.io/;http://maartensap.com", "dblp": "02/8768-2;274/6796;;;45/115;89/579-1;153/9519", "google_scholar": "https://scholar.google.co.kr/citations?user=PAXFuxsAAAAJ;4uNPtZgAAAAJ;CKyX_Y8AAAAJ;8dXLDSsAAAAJ;https://scholar.google.co.kr/citations?user=CiSdOV0AAAAJ;vhP-tlcAAAAJ;gFN4QUYAAAAJ", "or_profile": "~Hyunwoo_Kim3;~Melanie_Sclar1;~Xuhui_Zhou1;~Ronan_Le_Bras1;~Gunhee_Kim1;~Yejin_Choi1;~Maarten_Sap1", "aff": "Seoul National University;University of Washington, Seattle;Carnegie Mellon University;Allen Institute for Artificial Intelligence;Seoul National University;Department of Computer Science, University of Washington;Carnegie Mellon University", "aff_domain": "snu.ac.kr;uw.edu;andrew.cmu.edu;allenai.org;snu.ac.kr;cs.washington.edu;cmu.edu", "position": "PhD student;PhD student;PhD student;Researcher;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nkim2023fantom,\ntitle={{FANT}oM: A Benchmark for Stress-testing Machine Theory of Mind in Interactions},\nauthor={Hyunwoo Kim and Melanie Sclar and Xuhui Zhou and Ronan Le Bras and Gunhee Kim and Yejin Choi and Maarten Sap},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=5TEfD2GBUc}\n}", "github": "", "project": "", "reviewers": "8JV5;iirp;o4fC;we1R", "site": "https://openreview.net/forum?id=5TEfD2GBUc", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;3;1;2", "excitement": "4;2;3;4", "reproducibility": "3;5;4;4", "correctness": "4;4;3;3", "rating_avg": 4.0, "confidence_avg": 2.25, "excitement_avg": 3.25, "reproducibility_avg": 4.0, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0002-2714-1287;;;;0000-0002-9543-7453;;", "linkedin": "hyunw-kim/;melanie-sclar-077047b5/;;;;;", "aff_unique_index": "0;1;2;3;0;1;2", "aff_unique_norm": "Seoul National University;University of Washington;Carnegie Mellon University;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.snu.ac.kr;https://www.washington.edu;https://www.cmu.edu;https://allenai.org", "aff_unique_abbr": "SNU;UW;CMU;AI2", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;1;1;1;0;1;1", "aff_country_unique": "South Korea;United States" }, { "id": "5UW6Mivj9M", "title": "Let GPT be a Math Tutor: Teaching Math Word Problem Solvers with Customized Exercise Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In this paper, we present a novel approach for distilling math word problem solving capabilities from large language models (LLMs) into smaller, more efficient student models. Our approach is designed to consider the student model's weaknesses and foster a tailored learning experience by generating targeted exercises aligned with educational science principles, such as knowledge tracing and personalized learning. Concretely, we let GPT-3 be a math tutor and run two steps iteratively: 1) assessing the student model's current learning status on a GPT-generated exercise book, and 2) improving the student model by training it with tailored exercise samples generated by GPT-3. Experimental results reveal that our approach outperforms LLMs (e.g., GPT-3 and PaLM) in accuracy across three distinct benchmarks while employing significantly fewer parameters. Furthermore, we provide a comprehensive analysis of the various components within our methodology to substantiate their efficacy.", "keywords": "Mathematical Reasoning;Large Languague Models;Customized Learning", "primary_area": "", "supplementary_material": "", "author": "Zhenwen Liang;Wenhao Yu;Tanmay Rajpurohit;Peter Clark;Xiangliang Zhang;Ashwin Kalyan", "authorids": "~Zhenwen_Liang1;~Wenhao_Yu2;~Tanmay_Rajpurohit1;~Peter_Clark1;~Xiangliang_Zhang1;~Ashwin_Kalyan6", "gender": "M;M;M;M;F;M", "homepage": "https://zhenwen-nlp.github.io/;https://wyu97.github.io/;;https://allenai.org/team/peterc;https://sites.nd.edu/xiangliang-zhang/;http://ashwinkalyan.com/", "dblp": "226/6083;159/8117-2.html;;34/1184;74/1890-1;173/5217", "google_scholar": "4rKhF2AAAAAJ;z4qSdX8AAAAJ;B4NztA8AAAAJ;o-5vyEsAAAAJ;BhRJe4wAAAAJ;KYHL9aIAAAAJ", "or_profile": "~Zhenwen_Liang1;~Wenhao_Yu2;~Tanmay_Rajpurohit1;~Peter_Clark1;~Xiangliang_Zhang1;~Ashwin_Kalyan_Vijayakumar1", "aff": "University of Notre Dame;University of Notre Dame;Independent Researcher;Allen Institute for Artificial Intelligence;University of Notre Dame;Allen Institute for Artificial Intelligence", "aff_domain": "nd.edu;nd.edu;tanmay.one;allenai.org;nd.edu;allenai.org", "position": "PhD student;PhD student;Researcher;Senior Research Manager;Associate Professor;Research Scientist", "bibtex": "@inproceedings{\nliang2023let,\ntitle={Let {GPT} be a Math Tutor: Teaching Math Word Problem Solvers with Customized Exercise Generation},\nauthor={Zhenwen Liang and Wenhao Yu and Tanmay Rajpurohit and Peter Clark and Xiangliang Zhang and Ashwin Kalyan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=5UW6Mivj9M}\n}", "github": "", "project": "", "reviewers": "weic;xeUf;Stk4", "site": "https://openreview.net/forum?id=5UW6Mivj9M", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;5;3", "excitement": "4;3;4", "reproducibility": "3;2;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-4075-5980;;;0000-0002-3574-5665;", "linkedin": ";;tanmay-rajpurohit-b13942125/;peter-clark-a8b556/;;", "aff_unique_index": "0;0;1;2;0;2", "aff_unique_norm": "University of Notre Dame;Independent Researcher;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nd.edu;;https://allenai.org", "aff_unique_abbr": "Notre Dame;;AI2", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States;" }, { "id": "5ZHznxXCIb", "title": "Context-faithful Prompting for Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models (LLMs) encode parametric knowledge about world facts and have shown remarkable performance in knowledge-driven NLP tasks. However, their reliance on parametric knowledge may cause them to overlook contextual cues, leading to incorrect predictions in context-sensitive NLP tasks (e.g., knowledge acquisition tasks). In this paper, we seek to assess and enhance LLMs' contextual faithfulness in two aspects: knowledge conflict and prediction with abstention. We demonstrate that LLMs' faithfulness can be significantly improved using carefully designed prompting strategies. In particular, we identify opinion-based prompts and counterfactual demonstrations as the most effective methods. Opinion-based prompts reframe the context as a narrator's statement and inquire about the narrator's opinions, while counterfactual demonstrations use instances containing false facts to improve faithfulness in knowledge conflict situations. Neither technique requires additional training. We conduct experiments on three datasets of two standard NLP tasks, machine reading comprehension and relation extraction, and the results demonstrate significant improvement in faithfulness to contexts. Code and data are released at https://github.com/wzhouad/context-faithful-llm.", "keywords": "Large language models;knowledge update;prompt", "primary_area": "", "supplementary_material": "", "author": "Wenxuan Zhou;Sheng Zhang;Hoifung Poon;Muhao Chen", "authorids": "~Wenxuan_Zhou2;~Sheng_Zhang9;~Hoifung_Poon1;~Muhao_Chen1", "gender": "M;M;M;M", "homepage": "https://wzhouad.github.io/;https://sheng-z.github.io/;https://www.microsoft.com/en-us/research/people/hoifung/;https://muhaochen.github.io/", "dblp": ";69/6137-12;78/4609;173/2608", "google_scholar": "https://scholar.google.com/citations?hl=en;-LVEXQ8AAAAJ;yqqmVbkAAAAJ;k79yEZkAAAAJ", "or_profile": "~Wenxuan_Zhou2;~Sheng_Zhang9;~Hoifung_Poon1;~Muhao_Chen1", "aff": "University of Southern California;Microsoft;Microsoft;University of Southern California", "aff_domain": "usc.edu;microsoft.com;microsoft.com;usc.edu", "position": "PhD student;Researcher;General Manager;Assistant Research Professor", "bibtex": "@inproceedings{\nzhou2023contextfaithful,\ntitle={Context-faithful Prompting for Large Language Models},\nauthor={Wenxuan Zhou and Sheng Zhang and Hoifung Poon and Muhao Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=5ZHznxXCIb}\n}", "github": "", "project": "", "reviewers": "RhhF;FEGa;8jFq", "site": "https://openreview.net/forum?id=5ZHznxXCIb", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;1", "excitement": "3;4;3", "reproducibility": "5;4;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-9067-0918;0000-0003-0118-3147", "linkedin": ";sheng-z/;hoifung-poon-9559943/;", "aff_unique_index": "0;1;1;0", "aff_unique_norm": "University of Southern California;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.usc.edu;https://www.microsoft.com", "aff_unique_abbr": "USC;Microsoft", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "5jc17fMzqf", "title": "1-PAGER: One Pass Answer Generation and Evidence Retrieval", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We present 1-Pager the first system that answers a question and retrieves evidence using a single Transformer-based model and decoding process.\n1-Pager incrementally partitions the retrieval corpus using constrained decoding to select a document and answer string, and we show that this is competitive with comparable retrieve-and-read alternatives according to both retrieval and answer accuracy metrics. 1-Pager also outperforms the equivalent `closed-book' question answering model, by grounding predictions in an evidence corpus.\nWhile 1-Pager is not yet on-par with more expensive systems that read many more documents before generating an answer, we argue that it provides an important step toward attributed generation by folding retrieval into the sequence-to-sequence paradigm that is currently dominant in NLP.\nWe also show that the search paths used to partition the corpus are easy to read and understand, paving a way forward for interpretable neural retrieval.", "keywords": "retrieval;openbook qa;generative retrieval", "primary_area": "", "supplementary_material": "", "author": "Palak Jain;Livio Baldini Soares;Tom Kwiatkowski", "authorids": "~Palak_Jain3;~Livio_Baldini_Soares2;~Tom_Kwiatkowski1", "gender": "F;M;M", "homepage": ";https://research.google.com/pubs/105075.html;https://liviosoares.github.io/", "dblp": "221/3845-6;33/9012;178/3562", "google_scholar": "Y6rXrwsAAAAJ;https://scholar.google.no/citations?user=MpZ6dTEAAAAJ;C3s1jqIAAAAJ", "or_profile": "~Palak_Jain3;~Tom_Kwiatkowski1;~Livio_Baldini_Soares1", "aff": "Google;;Google Deepmind", "aff_domain": "google.com;;google.com", "position": "Researcher;;Software Engineer", "bibtex": "@inproceedings{\njain2023pager,\ntitle={1-{PAGER}: One Pass Answer Generation and Evidence Retrieval},\nauthor={Palak Jain and Livio Baldini Soares and Tom Kwiatkowski},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=5jc17fMzqf}\n}", "github": "", "project": "", "reviewers": "Sf64;waPh;RLpb", "site": "https://openreview.net/forum?id=5jc17fMzqf", "pdf_size": 0, "rating": "4;4;4", "confidence": "1;4;4", "excitement": "3;3;4", "reproducibility": "3;4;4", "correctness": "2;3;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-4344-9356;;", "linkedin": "palak-jain-a083a879/;;", "aff_unique_index": "0;1", "aff_unique_norm": "Google;DeepMind", "aff_unique_dep": "Google;DeepMind", "aff_unique_url": "https://www.google.com;https://deepmind.com", "aff_unique_abbr": "Google;DeepMind", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "5kV1ZwKMeQ", "title": "A Confederacy of Models: a Comprehensive Evaluation of LLMs on Creative Writing", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We evaluate a range of recent LLMs on English creative writing, a challenging and complex task that requires imagination, coherence, and style. We use a difficult, open-ended scenario chosen to avoid training data reuse: an epic narration of a single combat between Ignatius J. Reilly, the protagonist of the Pulitzer Prize-winning novel A Confederacy of Dunces (1980), and a pterodactyl, a prehistoric flying reptile. We ask several LLMs and humans to write such a story and conduct a human evalution involving various criteria such as fluency, coherence, originality, humor, and style. Our results show that some state-of-the-art commercial LLMs match or slightly outperform our writers in most dimensions; whereas open-source LLMs lag behind. Humans retain an edge in creativity, while humor shows a binary divide between LLMs that can handle it comparably to humans and those that fail at it. We discuss the implications and limitations of our study and suggest directions for future research.", "keywords": "LLMs;language models;creative writing;evaluation;text generation;storytelling;creativity", "primary_area": "", "supplementary_material": "", "author": "Carlos G\u00f3mez-Rodr\u00edguez;Paul Williams", "authorids": "~Carlos_G\u00f3mez-Rodr\u00edguez1;~Paul_Williams1", "gender": "M;M", "homepage": "http://www.grupolys.org/~cgomezr;https://www.paulwilliamsauthor.com", "dblp": "95/3319;", "google_scholar": "BeNhySQAAAAJ;-MoFHNQAAAAJ", "or_profile": "~Carlos_G\u00f3mez-Rodr\u00edguez1;~Paul_Williams1", "aff": "Universidade da Coru\u00f1a;University of the Sunshine Coast", "aff_domain": "udc.es;usc.edu.au", "position": "Full Professor;Lecturer", "bibtex": "@inproceedings{\ng{\\'o}mez-rodr{\\'\\i}guez2023a,\ntitle={A Confederacy of Models: a Comprehensive Evaluation of {LLM}s on Creative Writing},\nauthor={Carlos G{\\'o}mez-Rodr{\\'\\i}guez and Paul Williams},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=5kV1ZwKMeQ}\n}", "github": "", "project": "", "reviewers": "FQuN;UZhg;35Yu", "site": "https://openreview.net/forum?id=5kV1ZwKMeQ", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;4", "excitement": "4;2;4", "reproducibility": "4;3;4", "correctness": "5;2;4", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-0752-8812;0000-0001-8250-4744", "linkedin": ";", "aff_unique_index": "0;1", "aff_unique_norm": "University of A Coru\u00f1a;University of the Sunshine Coast", "aff_unique_dep": ";", "aff_unique_url": "https://www.udc.es;https://www.usc.edu.au", "aff_unique_abbr": "UDC;USC", "aff_campus_unique_index": "0", "aff_campus_unique": "A Coru\u00f1a;", "aff_country_unique_index": "0;1", "aff_country_unique": "Spain;Australia" }, { "id": "5nHLFcj7Y9", "title": "Text Representation Distillation via Information Bottleneck Principle", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Pre-trained language models (PLMs) have recently shown great success in text representation field. However, the high computational cost and high-dimensional representation of PLMs pose significant challenges for practical applications. To make models more accessible, an effective method is to distill large models into smaller representation models. In order to relieve the issue of performance degradation after distillation, we propose a novel Knowledge Distillation method called \\textbf{IBKD}. This approach is motivated by the Information Bottleneck principle and aims to maximize the mutual information between the final representation of the teacher and student model, while simultaneously reducing the mutual information between the student model's representation and the input data. This enables the student model to preserve important learned information while avoiding unnecessary information, thus reducing the risk of over-fitting. Empirical studies on two main downstream applications of text representation (Semantic Textual Similarity and Dense Retrieval tasks) demonstrate the effectiveness of our proposed approach.", "keywords": "knowledge distillation;text representation;text retrieval;language model", "primary_area": "", "supplementary_material": "", "author": "Yanzhao Zhang;Dingkun Long;Zehan Li;Pengjun Xie", "authorids": "~Yanzhao_Zhang1;~Dingkun_Long1;~Zehan_Li1;~Pengjun_Xie2", "gender": "M;M;M;M", "homepage": ";;https://github.com/jordane95;", "dblp": "244/0823;190/7094.html;;212/1755.html", "google_scholar": ";;;", "or_profile": "~Yanzhao_Zhang1;~Dingkun_Long1;~Zehan_Li1;~Pengjun_Xie2", "aff": "Alibaba Group;Alibaba Group;Beihang University;Alibaba Group", "aff_domain": "alibaba-inc.com;alibaba-inc.com;buaa.edu.cn;alibaba-inc.com", "position": "Researcher;Researcher;MS student;Researcher", "bibtex": "@inproceedings{\nzhang2023text,\ntitle={Text Representation Distillation via Information Bottleneck Principle},\nauthor={Yanzhao Zhang and Dingkun Long and Zehan Li and Pengjun Xie},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=5nHLFcj7Y9}\n}", "github": "", "project": "", "reviewers": "gS4Y;BBAJ;QEfC", "site": "https://openreview.net/forum?id=5nHLFcj7Y9", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "3;4;4", "reproducibility": "5;4;4", "correctness": "3;4;5", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0003-6581-7783;0000-0001-6570-9406;;", "linkedin": ";;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Alibaba Group;Beihang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.alibaba.com;http://www.buaa.edu.cn/", "aff_unique_abbr": "Alibaba;BUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "5o4a4OjhQW", "title": "What Comes Next? Evaluating Uncertainty in Neural Text Generators Against Human Production Variability", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In Natural Language Generation (NLG) tasks, for any input, multiple communicative goals are plausible, and any goal can be put into words, or produced, in multiple ways. We characterise the extent to which human production varies lexically, syntactically, and semantically across four NLG tasks, connecting human production variability to aleatoric or data uncertainty. We then inspect the space of output strings shaped by a generation system's predicted probability distribution and decoding algorithm to probe its uncertainty. For each test input, we measure the generator's calibration to human production variability. Following this instance-level approach, we analyse NLG models and decoding strategies, demonstrating that probing a generator with multiple samples and, when possible, multiple references, provides the level of detail necessary to gain understanding of a model's representation of uncertainty.", "keywords": "uncertainty;NLG;variability;language production;text generation", "primary_area": "", "supplementary_material": "", "author": "Mario Giulianelli;Joris Baan;Wilker Aziz;Raquel Fern\u00e1ndez;Barbara Plank", "authorids": "~Mario_Giulianelli1;~Joris_Baan1;~Wilker_Aziz1;~Raquel_Fern\u00e1ndez1;~Barbara_Plank2", "gender": "M;M;M;F;", "homepage": "https://glnmario.github.io;https://jorisbaan.nl;http://wilkeraziz.github.io;http://www.illc.uva.nl/~raquel;https://bplank.github.io/", "dblp": "205/2569;242/8448.html;51/10489;02/5384;46/521", "google_scholar": "https://scholar.google.it/citations?user=ABZghWYAAAAJ;https://scholar.google.be/citations?user=wYjlvvwAAAAJ;phgBJXYAAAAJ;https://scholar.google.com/citations?hl=en;", "or_profile": "~Mario_Giulianelli1;~Joris_Baan1;~Wilker_Aziz1;~Raquel_Fern\u00e1ndez1;~Barbara_Plank2", "aff": "University of Amsterdam;University of Amsterdam;University of Amsterdam;University of Amsterdam;IT University of Copenhagen", "aff_domain": "uva.nl;uva.nl;uva.nl;uva.nl;itu.dk", "position": "PhD student;PhD student;Assistant Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\ngiulianelli2023what,\ntitle={What Comes Next? Evaluating Uncertainty in Neural Text Generators Against Human Production Variability},\nauthor={Mario Giulianelli and Joris Baan and Wilker Aziz and Raquel Fern{\\'a}ndez and Barbara Plank},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=5o4a4OjhQW}\n}", "github": "", "project": "", "reviewers": "mAJR;hk61;haSd", "site": "https://openreview.net/forum?id=5o4a4OjhQW", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "4;4;3", "reproducibility": "2;3;3", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-5540-5943;", "linkedin": ";joris-baan-669324b3/;;raquel-fernandez-13578148/;", "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "University of Amsterdam;IT University of Copenhagen", "aff_unique_dep": ";", "aff_unique_url": "https://www.uva.nl;https://itu.dk", "aff_unique_abbr": "UvA;ITU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "Netherlands;Denmark" }, { "id": "5sGLPiG1vE", "title": "When are Lemons Purple? The Concept Association Bias of Vision-Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large-scale vision-language models such as CLIP have shown impressive performance on zero-shot image classification and image-to-text retrieval. However, such performance does not realize in tasks that require a finer-grained correspondence between vision and language, such as Visual Question Answering (VQA). We investigate why this is the case, and report an interesting phenomenon of vision-language models, which we call the Concept Association Bias (CAB), as a potential cause of the difficulty of applying these models to VQA and similar tasks. We find that models with CAB tend to treat input as a bag of concepts and attempt to fill in the other missing concept crossmodally, leading to an unexpected zero-shot prediction. We demonstrate CAB by showing that CLIP's zero-shot classification performance greatly suffers when there is a strong concept association between an object (e.g. eggplant) and an attribute (e.g. color purple). We also show that the strength of CAB predicts the performance on VQA. We observe that CAB is prevalent in vision-language models trained with contrastive losses, even when autoregressive losses are jointly employed. However, a model that solely relies on autoregressive loss seems to exhibit minimal or no signs of CAB.", "keywords": "vision and language;bias", "primary_area": "", "supplementary_material": "", "author": "Yingtian Tang;Yutaro Yamada;Yoyo Minzhi Zhang;Ilker Yildirim", "authorids": "~Yingtian_Tang1;~Yutaro_Yamada1;~Yoyo_Minzhi_Zhang1;~Ilker_Yildirim2", "gender": "M;;F;M", "homepage": "https://yingtiandt.github.io/;;;http://cncl.yale.edu/", "dblp": "295/0111;172/1440;;", "google_scholar": ";0ktnXXMAAAAJ;;", "or_profile": "~Yingtian_Tang1;~Yutaro_Yamada1;~Yoyo_Minzhi_Zhang1;~Ilker_Yildirim2", "aff": "University of Pennsylvania;Yale University;Yale University;Yale University", "aff_domain": "upenn.edu;yale.edu;yale.edu;yale.edu", "position": "MS student;PhD student;Intern;Assistant Professor", "bibtex": "@inproceedings{\ntang2023when,\ntitle={When are Lemons Purple? The Concept Association Bias of Vision-Language Models},\nauthor={Yingtian Tang and Yutaro Yamada and Yoyo Minzhi Zhang and Ilker Yildirim},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=5sGLPiG1vE}\n}", "github": "", "project": "", "reviewers": "9Cqi;ftk9;JUDd", "site": "https://openreview.net/forum?id=5sGLPiG1vE", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;2", "excitement": "4;3;4", "reproducibility": "4;3;5", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0004-9870-5574;;;", "linkedin": ";;minzhi-yoyo-zhang;", "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University of Pennsylvania;Yale University", "aff_unique_dep": ";", "aff_unique_url": "https://www.upenn.edu;https://www.yale.edu", "aff_unique_abbr": "UPenn;Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "5uZQ6spv9u", "title": "BRAINTEASER: Lateral Thinking Puzzles for Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The success of language models has inspired the NLP community to attend to tasks that require implicit and complex reasoning, relying on human-like commonsense mechanisms. While such vertical thinking tasks have been relatively popular, lateral thinking puzzles have received little attention. To bridge this gap, we devise BrainTeaser: a multiple-choice Question Answering task designed to test the model\u2019s ability to exhibit lateral thinking and defy default commonsense associations. We design a three-step procedure for creating the first lateral thinking benchmark, consisting of data collection, distractor generation, and generation of adversarial examples, leading to 1,100 puzzles with high-quality annotations. To assess the consistency of lateral reasoning by models, we enrich BrainTeaser based on a semantic and contextual reconstruction of its questions. Our experiments with state-of-the-art instruction- and commonsense language models reveal a significant gap between human and model performance, which is further widened when consistency across adversarial formats is considered. We make all of our code and data available to stimulate work on developing and evaluating lateral thinking models.", "keywords": "commonsense reasoning;adversarial robustness;computational creativity", "primary_area": "", "supplementary_material": "", "author": "Yifan Jiang;Filip Ilievski;Kaixin Ma;Zhivar Sourati", "authorids": "~Yifan_Jiang4;~Filip_Ilievski1;~Kaixin_Ma1;~Zhivar_Sourati1", "gender": "M;M;;M", "homepage": "https://yifanjiang-921.github.io//;http://www.ilievski.info;;https://zhpinkman.github.io/", "dblp": ";167/4770;203/9347;317/2968", "google_scholar": "npRM7lYAAAAJ;4ZScBc0AAAAJ;gDIMQp4AAAAJ;giqWNAwAAAAJ", "or_profile": "~Yifan_Jiang4;~Filip_Ilievski1;~Kaixin_Ma1;~Zhivar_Sourati1", "aff": "University of Southern California;University of Southern California;Carnegie Mellon University;University of Southern California", "aff_domain": "usc.edu;usc.edu;cmu.edu;usc.edu", "position": "MS student;Assistant Professor;PhD student;PhD student", "bibtex": "@inproceedings{\njiang2023brainteaser,\ntitle={{BRAINTEASER}: Lateral Thinking Puzzles for Large Language Models},\nauthor={Yifan Jiang and Filip Ilievski and Kaixin Ma and Zhivar Sourati},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=5uZQ6spv9u}\n}", "github": "", "project": "", "reviewers": "KrFG;c7pC;aqde", "site": "https://openreview.net/forum?id=5uZQ6spv9u", "pdf_size": 0, "rating": "5;5;5", "confidence": "2;3;2", "excitement": "4;4;4", "reproducibility": "4;5;4", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 2.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-2851-9210;;;0000-0003-2129-6165", "linkedin": "yifan-jiang-29199122a/;;;zhivarsourati/", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Southern California;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.usc.edu;https://www.cmu.edu", "aff_unique_abbr": "USC;CMU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "5vOHRbLNE7", "title": "HierarchicalContrast: A Coarse-to-Fine Contrastive Learning Framework for Cross-Domain Zero-Shot Slot Filling", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In task-oriented dialogue scenarios, cross-domain zero-shot slot filling plays a vital role in leveraging source domain knowledge to learn a model with high generalization ability in unknown target domain where annotated data is unavailable. However, the existing state-of-the-art zero-shot slot filling methods have limited generalization ability in target domain, they only show effective knowledge transfer on seen slots and perform poorly on unseen slots. To alleviate this issue, we present a novel Hierarchical Contrastive Learning Framework (HiCL) for zero-shot slot filling. Specifically, we propose a coarse- to fine-grained contrastive learning based on Gaussian-distributed embedding to learn the generalized deep semantic relations between utterance-tokens, by optimizing inter- and intra-token distribution distance. This encourages HiCL to generalize to the slot types unseen at training phase. Furthermore, we present a new iterative label set semantics inference method to unbiasedly and separately evaluate the performance of unseen slot types which entangled with their counterparts (i.e., seen slot types) in the previous zero-shot slot filling evaluation methods. The extensive empirical experiments on four datasets demonstrate that the proposed method achieves comparable or even better performance than the current state-of-the-art zero-shot slot filling approaches.", "keywords": "slot-filling;task-oriented dialogue;contrastive learning;cross-domain adaption;zero-shot learning", "primary_area": "", "supplementary_material": "", "author": "Junwen Zhang;Yin Zhang", "authorids": "~Junwen_Zhang2;~Yin_Zhang1", "gender": "M;", "homepage": "https://github.com/ai-agi/;https://person.zju.edu.cn/en/0099160", "dblp": ";", "google_scholar": ";", "or_profile": "~Junwen_Zhang2;~Yin_Zhang1", "aff": "Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nzhang2023hierarchicalcontrast,\ntitle={HierarchicalContrast: A Coarse-to-Fine Contrastive Learning Framework for Cross-Domain Zero-Shot Slot Filling},\nauthor={Junwen Zhang and Yin Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=5vOHRbLNE7}\n}", "github": "", "project": "", "reviewers": "8SoH;ohcU;GdPM;vCNz", "site": "https://openreview.net/forum?id=5vOHRbLNE7", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;4;4", "excitement": "3;3;2;4", "reproducibility": "4;3;2;4", "correctness": "3;2;1;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.25, "correctness_avg": 2.25, "replies_avg": 14, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "5x5Vxclc1K", "title": "SMoP: Towards Efficient and Effective Prompt Tuning with Sparse Mixture-of-Prompts", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Prompt tuning has emerged as a successful parameter-efficient alternative to the full fine-tuning of language models. However, prior works on prompt tuning often utilize long soft prompts of up to 100 tokens to improve performance, overlooking the inefficiency associated with extended inputs. In this paper, we propose a novel prompt tuning method $SMoP$ ($S$parse $M$ixture-$o$f-$P$rompts) that utilizes short soft prompts for efficient training and inference while maintaining performance gains typically induced from longer soft prompts. To achieve this, $SMoP$ employs a gating mechanism to train multiple short soft prompts specialized in handling different subsets of the data, providing an alternative to relying on a single long soft prompt to cover the entire data. Experimental results demonstrate that $SMoP$ outperforms baseline methods while reducing training and inference costs. We release our code at https://github.com/jyjohnchoi/SMoP.", "keywords": "Natural Language Processing;Prompt Tuning;Parameter-Efficient Fine-tuning;Mixture-of-Experts", "primary_area": "", "supplementary_material": "", "author": "Joon-Young Choi;Junho Kim;Jun-Hyung Park;Wing-Lam Mok;SangKeun Lee", "authorids": "~Joon-Young_Choi1;~Junho_Kim6;~Jun-Hyung_Park1;~Wing-Lam_Mok1;~SangKeun_Lee1", "gender": "M;M;;;M", "homepage": ";;https://www.jhpark.info;;http://dilab.korea.ac.kr", "dblp": ";;16/716;;73/3458-1", "google_scholar": "a-wQlOEAAAAJ;8BpIZoUAAAAJ;https://scholar.google.com/citations?hl=en;;BGSUpLgAAAAJ", "or_profile": "~Joon-Young_Choi1;~Junho_Kim6;~Jun-Hyung_Park1;~Wing-Lam_Mok1;~SangKeun_Lee1", "aff": "Korea University;Korea University;Korea University;;Korea University", "aff_domain": "korea.ac.kr;korea.ac.kr;korea.ac.kr;;korea.ac.kr", "position": "MS student;PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nchoi2023smop,\ntitle={{SM}oP: Towards Efficient and Effective Prompt Tuning with Sparse Mixture-of-Prompts},\nauthor={Joon-Young Choi and Junho Kim and Jun-Hyung Park and Wing-Lam Mok and SangKeun Lee},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=5x5Vxclc1K}\n}", "github": "", "project": "", "reviewers": "xer5;k8Ru;qdfJ;hgaH", "site": "https://openreview.net/forum?id=5x5Vxclc1K", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;4;4;3", "excitement": "3;4;2;3", "reproducibility": "4;3;4;4", "correctness": "3;4;3;3", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 3.0, "reproducibility_avg": 3.75, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-7900-3743;;0000-0002-6249-8217", "linkedin": "joon-young-choi-894317220/;junho-kim-637383253/;jun-hyung-park-901a62252;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Korea University", "aff_unique_dep": "", "aff_unique_url": "https://www.korea.ac.kr", "aff_unique_abbr": "KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "63UKbaiyAe", "title": "Discourse Sense Flows: Modelling the Rhetorical Style of Documents across Various Domains", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recent research on shallow discourse parsing has given renewed attention to the role of discourse relation signals, in particular explicit connectives and so-called alternative lexicalizations.\nIn our work, we first develop new models for extracting signals and classifying their senses, both for explicit connectives and alternative lexicalizations, based on the Penn Discourse Treebank v3 corpus. Thereafter, we apply these models to various raw corpora, and\nwe introduce 'discourse sense flows', a new way of modeling the rhetorical style of a document by the linear order of coherence relations, as captured by the PDTB senses. The corpora span several genres and domains, and we undertake comparative analyses of the sense flows, as well as experiments on automatic genre/domain discrimination using discourse sense flow patterns as features.\nWe find that n-gram patterns are indeed stronger predictors than simple sense (unigram) distributions.", "keywords": "rhetorical style;cross-domain;discourse parsing;discourse signals;connecting phrases;sense recognition", "primary_area": "", "supplementary_material": "", "author": "Rene Knaebel;Manfred Stede", "authorids": "~Rene_Knaebel1;~Manfred_Stede1", "gender": "M;M", "homepage": "https://www.ling.uni-potsdam.de/~stede/;https://www.reneknaebel.de", "dblp": "30/5655;https://dblp.uni-trier.de/pid/243/2670", "google_scholar": "https://scholar.google.de/citations?user=I1wvHnIAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Manfred_Stede1;~Ren\u00e9_Knaebel1", "aff": "Universit\u00e4t Potsdam;Universit\u00e4t Potsdam", "aff_domain": "uni-potsdam.de;uni-potsdam.de", "position": "Full Professor;PhD student", "bibtex": "@inproceedings{\nknaebel2023discourse,\ntitle={Discourse Sense Flows: Modelling the Rhetorical Style of Documents across Various Domains},\nauthor={Rene Knaebel and Manfred Stede},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=63UKbaiyAe}\n}", "github": "", "project": "", "reviewers": "zypT;jZkU;n3Y1", "site": "https://openreview.net/forum?id=63UKbaiyAe", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "4;3;3", "reproducibility": "4;4;3", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-6819-2043;", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "University of Potsdam", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-potsdam.de", "aff_unique_abbr": "UP", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "65teZsn7HR", "title": "Exploring Linguistic Properties of Monolingual BERTs with Typological Classification among Languages", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The impressive achievements of transformers force NLP researchers to delve into how these models represent the underlying structure of natural language.\nIn this paper, we propose a novel standpoint to investigate the above issue: using typological similarities among languages to observe how their respective monolingual models encode structural information.\nWe aim to layer-wise compare transformers for typologically similar languages to observe whether these similarities emerge for particular layers. For this investigation, we propose to use Centered Kernel Alignment to measure similarity among weight matrices. We found that syntactic typological similarity is consistent with the similarity between the weights in the middle layers, which are the pretrained BERT layers to which syntax encoding is generally attributed. Moreover, we observe that a domain adaptation on semantically equivalent texts enhances this similarity among weight matrices.", "keywords": "interpretability;CKA;typological similarity;BERT", "primary_area": "", "supplementary_material": "", "author": "Elena Sofia Ruzzetti;Federico Ranaldi;Felicia Logozzo;Michele Mastromattei;Leonardo Ranaldi;Fabio Massimo Zanzotto", "authorids": "~Elena_Sofia_Ruzzetti1;~Federico_Ranaldi1;~Felicia_Logozzo1;~Michele_Mastromattei1;~Leonardo_Ranaldi1;~Fabio_Massimo_Zanzotto1", "gender": "F;M;F;M;M;M", "homepage": ";;https://online.unistrasi.it/docenti/Persona.asp?ID=347;https://itsmattei.github.io/;;http://art.uniroma2.it/zanzotto", "dblp": "302/4055;330/3541.html;;302/4594.html;278/7831;32/797", "google_scholar": "XRi2_woAAAAJ;4hU1e4AAAAAJ;;CmQYOW0AAAAJ;https://scholar.google.com/citations?hl=ien;https://scholar.google.it/citations?user=azv7Qr4AAAAJ", "or_profile": "~Elena_Sofia_Ruzzetti1;~Federico_Ranaldi1;~Felicia_Logozzo1;~Michele_Mastromattei1;~Leonardo_Ranaldi1;~Fabio_Massimo_Zanzotto1", "aff": "Universit\u00e0 degli Studi di Roma Tor Vergata;;University of Italian Studies for Foreigners of Siena;Campus Bio-Medico University of Rome;Universit\u00e0 degli studi Roma Tor Vergata;University of Rome Tor Vergata", "aff_domain": "uniroma2.it;;unistrasi.it;unicampus.it;uniroma2.it;uniroma2.it", "position": "PhD student;;Associate Professor;PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nruzzetti2023exploring,\ntitle={Exploring Linguistic Properties of Monolingual {BERT}s with Typological Classification among Languages},\nauthor={Elena Sofia Ruzzetti and Federico Ranaldi and Felicia Logozzo and Michele Mastromattei and Leonardo Ranaldi and Fabio Massimo Zanzotto},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=65teZsn7HR}\n}", "github": "", "project": "", "reviewers": "DrBj;WpkQ;j8oq", "site": "https://openreview.net/forum?id=65teZsn7HR", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "4;3;3", "reproducibility": "4;3;3", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-4504-087X;;0000-0001-8488-4146;0000-0002-7301-3596", "linkedin": ";;;;;fabio-massimo-zanzotto-b027831/", "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Universit\u00e0 degli Studi di Roma Tor Vergata;University of Siena;Campus Bio-Medico University;University of Rome Tor Vergata", "aff_unique_dep": ";Italian Studies for Foreigners;;", "aff_unique_url": "https://www.uniroma2.it;https://www.unisi.it;https://www.unicampus.it;https://www.uniroma2.it", "aff_unique_abbr": "Uniroma2;Unisi;;UniRoma2", "aff_campus_unique_index": "0;2;0;0", "aff_campus_unique": "Tor Vergata;;Rome", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Italy" }, { "id": "68A4GE4nqf", "title": "Evaluating Subjective Cognitive Appraisals of Emotions from Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The emotions we experience involve complex processes; besides physiological aspects, research in psychology has studied cognitive appraisals where people assess their situations subjectively, according to their own values (Scherer, 2005). Thus, the same situation can often result in different emotional experiences. While the detection of emotion is a well-established task, there is very limited work so far on the automatic prediction of cognitive appraisals. This work fills the gap by presenting CovidET-Appraisals, the most comprehensive dataset to-date that assesses 24 appraisal dimensions, each with a natural language rationale, across 241 Reddit posts. CovidET-Appraisals presents an ideal testbed to evaluate the ability of large language models \u2014 excelling at a wide range of NLP tasks \u2014 to automatically assess and explain cognitive appraisals. We found that while the best models are performant, open-sourced LLMs fall short at this task, presenting a new challenge in the future development of emotionally intelligent models. We release our dataset at https://github.com/honglizhan/CovidET-Appraisals-Public.", "keywords": "Emotion;Cognitive Appraisal;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "Hongli Zhan;Desmond Ong;Junyi Jessy Li", "authorids": "~Hongli_Zhan1;~Desmond_Ong1;~Junyi_Jessy_Li2", "gender": "M;M;F", "homepage": "https://honglizhan.github.io/;https://cascoglab.psy.utexas.edu/desmond;https://jessyli.com", "dblp": "331/8489;176/0245.html;148/9553", "google_scholar": "https://scholar.google.com/citations?hl=en;;tJGm3-YAAAAJ", "or_profile": "~Hongli_Zhan1;~Desmond_Ong1;~Junyi_Jessy_Li2", "aff": "University of Texas at Austin;University of Texas at Austin;University of Texas at Austin", "aff_domain": "utexas.edu;utexas.edu;utexas.edu", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhan2023evaluating,\ntitle={Evaluating Subjective Cognitive Appraisals of Emotions from Large Language Models},\nauthor={Hongli Zhan and Desmond Ong and Junyi Jessy Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=68A4GE4nqf}\n}", "github": "", "project": "", "reviewers": "8u8A;TGaY;jg4L", "site": "https://openreview.net/forum?id=68A4GE4nqf", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;3", "excitement": "4;3;3", "reproducibility": "4;4;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "6DKS4tb387", "title": "Gradually Excavating External Knowledge for Implicit Complex Question Answering", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recently, large language models (LLMs) have gained much attention for the emergence of human-comparable capabilities and huge potential. However, for open-domain implicit question-answering problems, LLMs may not be the ultimate solution due to the reasons of: 1) uncovered or out-of-date domain knowledge, 2) one-shot generation and hence restricted comprehensiveness. To this end, this work proposes a gradual knowledge excavation framework for open-domain complex question answering, where LLMs iteratively and actively acquire extrinsic information, then reason based on acquired historical knowledge. Specifically, during each step of the solving process, the model selects an action to execute, such as querying external knowledge or performing a single logical reasoning step, to gradually progress toward a final answer. Our method can effectively leverage plug-and-play external knowledge and dynamically adjust the strategy for solving complex questions. Evaluated on the StrategyQA dataset, our method achieves 78.17% accuracy with less than 6% parameters of its competitors, setting new SOTA in the ~10B LLM class.", "keywords": "Question answering;Knowledge Retrieval;Multi-step question answering;Large Language Model", "primary_area": "", "supplementary_material": "", "author": "Chang Liu;Xiaoguang Li;Lifeng Shang;Xin Jiang;Qun Liu;Edmund Y. Lam;Ngai Wong", "authorids": "~Chang_Liu35;~Xiaoguang_Li1;~Lifeng_Shang1;~Xin_Jiang1;~Qun_Liu1;~Edmund_Y._Lam1;~Ngai_Wong1", "gender": ";;M;M;M;M;M", "homepage": ";;;;http://liuquncn.github.io/;https://www.eee.hku.hk/~elam/;https://www.eee.hku.hk/~nwong/", "dblp": ";;70/4288;42/4142-2;75/4402-1;87/5852;88/3656", "google_scholar": ";;https://scholar.google.com.hk/citations?user=jMQIjYoAAAAJ;DUfcez0AAAAJ;2HhiGzcAAAAJ;;PM_uMYIAAAAJ", "or_profile": "~Chang_Liu35;~Xiaoguang_Li1;~Lifeng_Shang1;~Xin_Jiang1;~Qun_Liu1;~Edmund_Y._Lam1;~Ngai_Wong1", "aff": ";;Huawei Technologies Ltd.;Noah\u2019s Ark Lab, Huawei Technologies;Huawei Noah's Ark Lab;The University of Hong Kong;The University of Hong Kong", "aff_domain": ";;huawei.com;huawei.com;huawei.com;hku.hk;hku.hk", "position": ";;Researcher;Principal Researcher;Chief Scientist of Speech and Language Computing;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nliu2023gradually,\ntitle={Gradually Excavating External Knowledge for Implicit Complex Question Answering},\nauthor={Chang Liu and Xiaoguang Li and Lifeng Shang and Xin Jiang and Qun Liu and Edmund Y. Lam and Ngai Wong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=6DKS4tb387}\n}", "github": "", "project": "", "reviewers": "4HSn;2gMb;DMUD;Ab27", "site": "https://openreview.net/forum?id=6DKS4tb387", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;5;4;3", "excitement": "3;3;3;3", "reproducibility": "4;4;3;2", "correctness": "3;3;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.25, "correctness_avg": 3.0, "replies_avg": 6, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-9117-8247;0000-0002-7000-1792;0000-0001-6268-950X;0000-0002-3026-0108", "linkedin": ";;;xin-jiang-9577b76/;qunliu/;;", "aff_unique_index": "0;0;0;1;1", "aff_unique_norm": "Huawei;University of Hong Kong", "aff_unique_dep": "Huawei Technologies;", "aff_unique_url": "https://www.huawei.com;https://www.hku.hk", "aff_unique_abbr": "Huawei;HKU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "6DMhUhx5oy", "title": "Fighting Fire with Fire: The Dual Role of LLMs in Crafting and Detecting Elusive Disinformation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recent ubiquity and disruptive impacts of large language models (LLMs) have raised concerns about their potential to be misused (*.i.e, generating large-scale harmful and misleading content*). To combat this emerging risk of LLMs, we propose a novel \"***Fighting Fire with Fire***\" (F3) strategy that harnesses modern LLMs' generative and emergent reasoning capabilities to counter human-written and LLM-generated disinformation. First, we leverage GPT-3.5-turbo to synthesize authentic and deceptive LLM-generated content through paraphrase-based and perturbation-based prefix-style prompts, respectively. Second, we apply zero-shot in-context semantic reasoning techniques with cloze-style prompts to discern genuine from deceptive posts and news articles. In our extensive experiments, we observe GPT-3.5-turbo's zero-shot superiority for both in-distribution and out-of-distribution datasets, where GPT-3.5-turbo consistently achieved accuracy at 68-72%, unlike the decline observed in previous customized and fine-tuned disinformation detectors. Our codebase and dataset are available at https://github.com/mickeymst/F3.", "keywords": "LLM;Prompt Engineering;Disinformation Detection;Natural Language Inference;Semantic Reasoning;In-context Learning", "primary_area": "", "supplementary_material": "", "author": "Jason S Lucas;Adaku Uchendu;Michiharu Yamashita;Jooyoung Lee;Shaurya Rohatgi;Dongwon Lee", "authorids": "~Jason_S_Lucas1;~Adaku_Uchendu1;~Michiharu_Yamashita1;~Jooyoung_Lee4;~Shaurya_Rohatgi1;~Dongwon_Lee1", "gender": "M;F;M;F;M;M", "homepage": "https://www.jasonslucas.com/;https://adauchendu.github.io/;https://mickeymst.github.io/;https://brit7777.github.io;;https://pike.psu.edu/dongwon", "dblp": "359/3158.html;244/0488;234/2706;;;l/DongwonLee", "google_scholar": "XU1WN6YAAAAJ;A4be1l4AAAAJ;https://scholar.google.co.jp/citations?user=rHmPeHQAAAAJ;iDFc3nAAAAAJ;UpHQFasAAAAJ;MzL-WnEAAAAJ", "or_profile": "~Jason_S_Lucas1;~Adaku_Uchendu1;~Michiharu_Yamashita1;~Jooyoung_Lee4;~Shaurya_Rohatgi1;~Dongwon_Lee1", "aff": "Pennsylvania State University;Pennsylvania State University;Pennsylvania State University;Pennsylvania State University;;The Pennsylvania State University", "aff_domain": "psu.edu;psu.edu;psu.edu;psu.edu;;psu.edu", "position": "PhD student;PhD student;PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nlucas2023fighting,\ntitle={Fighting Fire with Fire: The Dual Role of {LLM}s in Crafting and Detecting Elusive Disinformation},\nauthor={Jason S Lucas and Adaku Uchendu and Michiharu Yamashita and Jooyoung Lee and Shaurya Rohatgi and Dongwon Lee},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=6DMhUhx5oy}\n}", "github": "", "project": "", "reviewers": "xt8f;r37Z;h75L", "site": "https://openreview.net/forum?id=6DMhUhx5oy", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "2;4;3", "reproducibility": "4;4;0", "correctness": "2;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0000-3494-6935;;0009-0002-3802-8618;;;0000-0001-8371-7629", "linkedin": "jslu/;;;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Pennsylvania State University", "aff_unique_dep": "", "aff_unique_url": "https://www.psu.edu", "aff_unique_abbr": "PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "6Jqa4YmUMf", "title": "Investigating the Effectiveness of Multiple Expert Models Collaboration", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "This paper aims to investigate the effectiveness of several machine translation (MT) models and aggregation methods in a multi-domain setting under fair conditions and explore a direction for tackling multi-domain MT. We mainly compare the performance of the single model approach by jointly training all domains and the multi-expert models approach with a particular aggregation strategy. We conduct experiments on multiple domain datasets and demonstrate that a combination of smaller domain expert models can outperform a larger model trained for all domain data.", "keywords": "machine translation;multi-domain translation;multiple model collaboration", "primary_area": "", "supplementary_material": "", "author": "Ikumi Ito;Takumi Ito;Jun Suzuki;Kentaro Inui", "authorids": "~Ikumi_Ito1;~Takumi_Ito2;~Jun_Suzuki1;~Kentaro_Inui1", "gender": "M;M;M;M", "homepage": "https://ikumi-ito.github.io/;https://www.takumi-ito.com/;https://www.nlp.ecei.tohoku.ac.jp/~jun/;http://www.cl.ecei.tohoku.ac.jp/~inui/", "dblp": ";95/261.html;78/6923;90/3315", "google_scholar": ";https://scholar.google.com/citations?authuser=2;https://scholar.google.co.jp/citations?user=XO5CrIsAAAAJ;https://scholar.google.co.jp/citations?user=38_o3-kAAAAJ", "or_profile": "~Ikumi_Ito1;~Takumi_Ito2;~Jun_Suzuki1;~Kentaro_Inui1", "aff": "Tohoku University;Tohoku University;Tohoku University;Tohoku University", "aff_domain": "tohoku.ac.jp;tohoku.ac.jp;tohoku.ac.jp;tohoku.ac.jp", "position": "Undergrad student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nito2023investigating,\ntitle={Investigating the Effectiveness of Multiple Expert Models Collaboration},\nauthor={Ikumi Ito and Takumi Ito and Jun Suzuki and Kentaro Inui},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=6Jqa4YmUMf}\n}", "github": "", "project": "", "reviewers": "Va6z;6uVB;wUg4", "site": "https://openreview.net/forum?id=6Jqa4YmUMf", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;4;4", "excitement": "3;2;3", "reproducibility": "5;3;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-7380-2587;0000-0003-2108-1340;0000-0001-6510-604X", "linkedin": ";;;kentaro-inui-52401a31/", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tohoku University", "aff_unique_dep": "", "aff_unique_url": "https://www.tohoku.ac.jp", "aff_unique_abbr": "Tohoku U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Japan" }, { "id": "6KyZrSp8y3", "title": "Unnatural language processing: How do language models handle machine-generated prompts?", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Language model prompt optimization research has shown that semantically and grammatically well-formed manually crafted prompts are routinely outperformed by automatically generated token sequences with no apparent meaning or syntactic structure, including sequences of vectors from a model's embedding space. We use machine-generated prompts to probe how models respond to input that is not composed of natural language expressions. We study the behavior of models of different sizes in multiple semantic tasks in response to both continuous and discrete machine-generated prompts, and compare it to the behavior in response to human-generated natural-language prompts. Even when producing a similar output, machine-generated and human prompts trigger different response patterns through the network processing pathways, including different perplexities, different attention and output entropy distributions, and different unit activation profiles. \nWe provide preliminary insight into the nature of the units activated by different prompt types, suggesting that only natural language prompts recruit a genuinely linguistic circuit.", "keywords": "prompting;interpretability;large language modelling;unnatural language processing", "primary_area": "", "supplementary_material": "", "author": "Corentin Kervadec;Francesca Franzon;Marco Baroni", "authorids": "~Corentin_Kervadec2;~Francesca_Franzon1;~Marco_Baroni1", "gender": "M;;M", "homepage": "https://corentinkervadec.github.io/;https://franfranz.github.io/;http://marcobaroni.org", "dblp": "224/0222;;http://dblp.uni-trier.de/pers/hd/b/Baroni:Marco", "google_scholar": "https://scholar.google.fr/citations?user=Rx507eQAAAAJ;https://scholar.google.it/citations?user=5csBiuYAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Corentin_Kervadec2;~Francesca_Franzon1;~Marco_Baroni1", "aff": "Universitat Pompeu Fabra;Universitat Pompeu Fabra;Universitat Pompeu Fabra", "aff_domain": "upf.edu;upf.edu;upf.edu", "position": "Postdoc;Postdoc;Full Professor", "bibtex": "@inproceedings{\nkervadec2023unnatural,\ntitle={Unnatural language processing: How do language models handle machine-generated prompts?},\nauthor={Corentin Kervadec and Francesca Franzon and Marco Baroni},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=6KyZrSp8y3}\n}", "github": "", "project": "", "reviewers": "oqtz;ZpBf;jWaw", "site": "https://openreview.net/forum?id=6KyZrSp8y3", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "4;4;2", "reproducibility": "3;4;5", "correctness": "4;4;2", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-0503-2792;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Universitat Pompeu Fabra", "aff_unique_dep": "", "aff_unique_url": "https://www.upf.edu/", "aff_unique_abbr": "UPF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Spain" }, { "id": "6LkytBaTy9", "title": "Bias Neutralization in Non-Parallel Texts: A Cyclic Approach with Auxiliary Guidance", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Objectivity is a goal for Wikipedia and many news sites, as well as a guiding principle of many large language models. Indeed, several methods have recently been developed for automatic subjective bias neutralization. These methods, however, typically rely on parallel text for training (i.e. a biased sentence coupled with a non-biased sentence), demonstrate poor transfer to new domains, and can lose important bias-independent context. Toward expanding the reach of bias neutralization, we propose in this paper a new approach called FairBalance. Three of its unique features are: i) a cycle consistent adversarial network enables bias neutralization without the need for parallel text; ii) the model design preserves bias-independent content; and iii) through auxiliary guidance, the model highlights sequences of bias-inducing words, yielding strong results in terms of bias neutralization quality. Extensive experiments demonstrate how FairBalance significantly improves subjective bias neutralization compared to other methods.", "keywords": "Bias Correction;Subjective Bias;Generative Adversarial Networks;Unsupervised Learning;Auxiliary Guidance", "primary_area": "", "supplementary_material": "", "author": "Karthic Madanagopal;James Caverlee", "authorids": "~Karthic_Madanagopal1;~James_Caverlee2", "gender": ";M", "homepage": ";https://people.engr.tamu.edu/caverlee/", "dblp": "159/1366.html;55/3697.html", "google_scholar": "3tz3LCkAAAAJ;LB1dq_sAAAAJ", "or_profile": "~Karthic_Madanagopal1;~James_Caverlee2", "aff": "Texas A&M University - College Station;Google", "aff_domain": "tamu.edu;google.com", "position": "PhD student;Researcher", "bibtex": "@inproceedings{\nmadanagopal2023bias,\ntitle={Bias Neutralization in Non-Parallel Texts: A Cyclic Approach with Auxiliary Guidance},\nauthor={Karthic Madanagopal and James Caverlee},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=6LkytBaTy9}\n}", "github": "", "project": "", "reviewers": "fXMK;htDX;THyJ", "site": "https://openreview.net/forum?id=6LkytBaTy9", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;2;3", "excitement": "3;4;3", "reproducibility": "3;3;2", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-8060-7985;0000-0001-8350-8528", "linkedin": "karthic-madanagopal-8b40b915/;", "aff_unique_index": "0;1", "aff_unique_norm": "Texas A&M University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.tamu.edu;https://www.google.com", "aff_unique_abbr": "TAMU;Google", "aff_campus_unique_index": "0;1", "aff_campus_unique": "College Station;Mountain View", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "6P32h3LTC1", "title": "A Multi-Modal Multilingual Benchmark for Document Image Classification", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Document image classification is different from plain-text document classification and consists of classifying a document by understanding the content and structure of documents such as forms, emails, and other such documents. We show that the only existing dataset for this task (Lewis et al., 2006) has several limitations and we introduce two newly curated multilingual datasets WIKI-DOC and MULTIEURLEX-DOC that overcome these limitations. We further undertake a comprehensive study of popular visually-rich document understanding or Document AI models in previously untested setting in document image classification such as 1) multi-label classification, and 2) zero-shot cross-lingual transfer setup. Experimental results show limitations of multilingual Document AI models on cross-lingual transfer across typologically distant languages. Our datasets and findings open the door for future research into improving Document AI models.", "keywords": "Document AI;layout-aware models;visually-rich document understanding;multilingual document image classification", "primary_area": "", "supplementary_material": "", "author": "Yoshinari Fujinuma;Siddharth Varia;Nishant Sankaran;Srikar Appalaraju;Bonan Min;Yogarshi Vyas", "authorids": "~Yoshinari_Fujinuma1;~Siddharth_Varia2;~Nishant_Sankaran1;~Srikar_Appalaraju2;~Bonan_Min1;~Yogarshi_Vyas1", "gender": "M;;M;;M;M", "homepage": ";;;;https://bnmin.github.io/;http://www.cs.umd.edu/~yogarshi/", "dblp": "174/7392;;171/5622;;69/5238;147/9150", "google_scholar": ";;;;RHK03FAAAAAJ;k6k7i1IAAAAJ", "or_profile": "~Yoshinari_Fujinuma1;~Siddharth_Varia2;~Nishant_Sankaran1;~Srikar_Appalaraju2;~Bonan_Min1;~Yogarshi_Vyas1", "aff": "AWS AI Labs;;Amazon;;Tufts University;Amazon", "aff_domain": "amazon.com;;amazon.com;;tufts.edu;amazon.com", "position": "Applied Scientist;;Researcher;;Adjunct faculty;Applied Scientist", "bibtex": "@inproceedings{\nfujinuma2023a,\ntitle={A Multi-Modal Multilingual Benchmark for Document Image Classification},\nauthor={Yoshinari Fujinuma and Siddharth Varia and Nishant Sankaran and Srikar Appalaraju and Bonan Min and Yogarshi Vyas},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=6P32h3LTC1}\n}", "github": "", "project": "", "reviewers": "VmrD;BLSd;choU", "site": "https://openreview.net/forum?id=6P32h3LTC1", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;5;3", "excitement": "3;3;3", "reproducibility": "4;4;3", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";;;;bonan-min-b3bb90a;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Amazon;Tufts University", "aff_unique_dep": "AWS AI Labs;", "aff_unique_url": "https://aws.amazon.com;https://www.tufts.edu", "aff_unique_abbr": "AWS;Tufts", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "6RQTvSLbgi", "title": "IEKG: A Commonsense Knowledge Graph for Idiomatic Expressions", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Idiomatic expression (IE) processing and comprehension have challenged pre-trained language models (PTLMs) because their meanings are non-compositional. Unlike prior works that enable IE comprehension through fine-tuning PTLMs with sentences containing IEs, in this work, we construct IEKG, a commonsense knowledge graph for figurative interpretations of IEs. This extends the established ${ATOMIC}_{20}^{20}$ converting PTLMs into knowledge models (KMs) that encode and infer commonsense knowledge related to IE use. Experiments show that various PTLMs can be converted into KMs with IEKG. We verify the quality of IEKG and the ability of the trained KMs with automatic and human evaluation. Through applications in natural language understanding, we show that a PTLM injected with knowledge from IEKG exhibits improved IE comprehension ability and can generalize to IEs unseen during training.", "keywords": "idiomatic expression;figurative semantics;commonsense knowledge;idiomatic expression comprehension;natural language understanding", "primary_area": "", "supplementary_material": "", "author": "Ziheng Zeng;Kellen Tan Cheng;Srihari Venkat Nanniyur;Jianing Zhou;Suma Bhat", "authorids": "~Ziheng_Zeng1;~Kellen_Tan_Cheng1;~Srihari_Venkat_Nanniyur1;~Jianing_Zhou1;~Suma_Bhat1", "gender": "M;;M;M;", "homepage": ";https://kellentan.github.io;;https://www.zhjjn.com/;", "dblp": ";364/0264;;159/6589;66/9013", "google_scholar": ";https://scholar.google.com/citations?hl=en;;5LrgBS8AAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Ziheng_Zeng1;~Kellen_Tan_Cheng1;~Srihari_Venkat_Nanniyur1;~Jianing_Zhou1;~Suma_Bhat1", "aff": "University of Illinois, Urbana Champaign;Princeton University;Washington University, Saint Louis;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;princeton.edu;wustl.edu;illinois.edu;illinois.edu", "position": "PhD student;PhD student;Undergrad student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzeng2023iekg,\ntitle={{IEKG}: A Commonsense Knowledge Graph for Idiomatic Expressions},\nauthor={Ziheng Zeng and Kellen Tan Cheng and Srihari Venkat Nanniyur and Jianing Zhou and Suma Bhat},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=6RQTvSLbgi}\n}", "github": "", "project": "", "reviewers": "qKwD;vAg2;Ch1Z", "site": "https://openreview.net/forum?id=6RQTvSLbgi", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;2;2", "excitement": "3;3;4", "reproducibility": "5;4;4", "correctness": "4;3;5", "rating_avg": 4.0, "confidence_avg": 2.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";ktcheng1/;sriharinanniyur/;;", "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;Princeton University;Washington University in St. Louis", "aff_unique_dep": ";;", "aff_unique_url": "https://illinois.edu;https://www.princeton.edu;https://wustl.edu", "aff_unique_abbr": "UIUC;Princeton;WUSTL", "aff_campus_unique_index": "0;2;0;0", "aff_campus_unique": "Urbana-Champaign;;Saint Louis", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "6RuXWFEQzg", "title": "Open-world Semi-supervised Generalized Relation Discovery Aligned in a Real-world Setting", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Open-world Relation Extraction (OpenRE) has recently garnered significant attention. However, existing approaches tend to oversimplify the problem by assuming that all instances of unlabeled data belong to novel classes, thereby limiting the practicality of these methods. We argue that the OpenRE setting should be more aligned with the characteristics of real-world data. Specifically, we propose two key improvements: (a) unlabeled data should encompass known and novel classes, including negative instances; and (b) the set of novel classes should represent long-tail relation types. Furthermore, we observe that popular relations can often be implicitly inferred through specific patterns, while long-tail relations tend to be explicitly expressed. Motivated by these insights, we present a method called KNoRD (Known and Novel Relation Discovery), which effectively classifies explicitly and implicitly expressed relations from known and novel classes within unlabeled data. Experimental evaluations on several Open-world RE benchmarks demonstrate that KNoRD consistently outperforms other existing methods, achieving significant performance gains.", "keywords": "Information extraction;relation extraction", "primary_area": "", "supplementary_material": "", "author": "William P Hogan;Jiacheng Li;Jingbo Shang", "authorids": "~William_P_Hogan1;~Jiacheng_Li2;~Jingbo_Shang2", "gender": "M;M;M", "homepage": "https://wphogan.github.io/;https://jiachengli1995.github.io/;https://shangjingbo1226.github.io/", "dblp": "180/3567.html;18/5576-3;151/3145.html", "google_scholar": "wEzTKU4AAAAJ;Vdm6jq4AAAAJ;0SkFI4MAAAAJ", "or_profile": "~William_P_Hogan1;~Jiacheng_Li2;~Jingbo_Shang2", "aff": "University of California, San Diego;University of California, San Diego;University of California, San Diego", "aff_domain": "ucsd.edu;ucsd.edu;ucsd.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nhogan2023openworld,\ntitle={Open-world Semi-supervised Generalized Relation Discovery Aligned in a Real-world Setting},\nauthor={William P Hogan and Jiacheng Li and Jingbo Shang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=6RuXWFEQzg}\n}", "github": "", "project": "", "reviewers": "ZBDY;sj25;h8YH", "site": "https://openreview.net/forum?id=6RuXWFEQzg", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;4", "excitement": "4;4;4", "reproducibility": "5;3;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "wphogan/;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "6UklbMESHZ", "title": "An Empirical Investigation of Implicit and Explicit Knowledge-Enhanced Methods for Ad Hoc Dataset Retrieval", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Ad hoc dataset retrieval has become an important way of finding data on the Web, where the underlying problem is how to measure the relevance of a dataset to a query. State-of-the-art solutions for this task are still lexical methods, which cannot capture semantic similarity. Semantics-aware knowledge-enhanced retrieval methods, which achieved promising results on other tasks, have yet to be systematically studied on this specialized task. To fill the gap, in this paper, we present an empirical investigation of the task where we implement and evaluate, on two test collections, a set of implicit and explicit knowledge-enhancement retrieval methods in various settings. Our results reveal the unique features of the task and suggest an interpolation of different kinds of methods as the current best practice.", "keywords": "ad hoc dataset retrieval;dataset search;dense retrieval;semantic search", "primary_area": "", "supplementary_material": "", "author": "Weiqing Luo;Qiaosheng Chen;Zhiyang Zhang;Zixian Huang;Gong Cheng", "authorids": "~Weiqing_Luo1;~Qiaosheng_Chen1;~Zhiyang_Zhang2;~Zixian_Huang1;~Gong_Cheng3", "gender": "M;M;M;M;M", "homepage": ";https://cqsss.github.io;;;http://ws.nju.edu.cn/~gcheng", "dblp": "313/4765;323/9650;;;69/1215-1", "google_scholar": "jkqWLN4AAAAJ;https://scholar.google.com/citations?hl=zh-CN;JBL6iKkAAAAJ;FiqiYDUAAAAJ;_ncKAiwAAAAJ", "or_profile": "~Weiqing_Luo1;~Qiaosheng_Chen1;~Zhiyang_Zhang2;~Zixian_Huang1;~Gong_Cheng3", "aff": "Tencent;Nanjing University;Nanjing University;Nanjing University;Nanjing University", "aff_domain": "tencent.com;nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "Intern;MS student;MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nluo2023an,\ntitle={An Empirical Investigation of Implicit and Explicit Knowledge-Enhanced Methods for Ad Hoc Dataset Retrieval},\nauthor={Weiqing Luo and Qiaosheng Chen and Zhiyang Zhang and Zixian Huang and Gong Cheng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=6UklbMESHZ}\n}", "github": "", "project": "", "reviewers": "Afe6;RyqX;SVC3;Pb56", "site": "https://openreview.net/forum?id=6UklbMESHZ", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;4;4;4", "excitement": "3;2;2;3", "reproducibility": "4;3;3;4", "correctness": "4;3;2;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 2.5, "reproducibility_avg": 3.5, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0002-0610-7725;;;0000-0003-3539-7776", "linkedin": ";qiaosheng-chen/;;;gongcheng/", "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Tencent;Nanjing University", "aff_unique_dep": "Tencent Holdings Limited;", "aff_unique_url": "https://www.tencent.com;https://www.nju.edu.cn", "aff_unique_abbr": "Tencent;Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "6WcsVlZE5I", "title": "Towards a Deep Understanding of Multilingual End-to-End Speech Translation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In this paper, we employ Singular Value Canonical Correlation Analysis (SVCCA) to analyze representations learnt in a multilingual end-to-end speech translation model trained over 22 languages. SVCCA enables us to estimate representational similarity across languages and layers, enhancing our understanding of the functionality of multilingual speech translation and its potential connection to multilingual neural machine translation. The multilingual speech translation model is trained on the CoVoST 2 dataset in all possible directions, and we utilize LASER to extract parallel bitext data for SVCCA analysis. We derive three major findings from our analysis: (I) Linguistic similarity loses its efficacy in multilingual speech translation when the training data for a specific language is limited. (II) Enhanced encoder representations and well-aligned audio-text data significantly improve translation quality, surpassing the bilingual counterparts when the training data is not compromised. (III) The encoder representations of multilingual speech translation demonstrate superior performance in predicting phonetic features in linguistic typology prediction. With these findings, we propose that releasing the constraint of limited data for low-resource languages and subsequently combining them with linguistically related high-resource languages could offer a more effective approach for multilingual end-to-end speech translation.", "keywords": "Multilingual Speech Translation;Multilinguality", "primary_area": "", "supplementary_material": "", "author": "Haoran Sun;Xiaohu Zhao;Yikun Lei;shaolin Zhu;Deyi Xiong", "authorids": "~Haoran_Sun7;~Xiaohu_Zhao1;~Yikun_Lei1;~shaolin_Zhu1;~Deyi_Xiong2", "gender": ";M;M;M;M", "homepage": ";;https://tjunlp-lab.github.io/members/students/;https://zsl-nlp.github.io/;https://dyxiong.github.io", "dblp": ";;293/8759;206/8937;55/6548", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;mxpXRBYAAAAJ;https://scholar.google.com/citations?hl=zh-CN;QPLO3myO5PkC", "or_profile": "~Haoran_Sun7;~Xiaohu_Zhao1;~Yikun_Lei1;~shaolin_Zhu1;~Deyi_Xiong2", "aff": "Tianjin University;Tianjin University;Tianjin University;Tianjin University;Tianjin University", "aff_domain": "tju.edu.cn;tju.edu.cn;tju.edu;tju.edu.cn;tju.edu.cn", "position": "MS student;MS student;MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nsun2023towards,\ntitle={Towards a Deep Understanding of Multilingual End-to-End Speech Translation},\nauthor={Haoran Sun and Xiaohu Zhao and Yikun Lei and shaolin Zhu and Deyi Xiong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=6WcsVlZE5I}\n}", "github": "", "project": "", "reviewers": "JjpA;bZDJ;RGSM", "site": "https://openreview.net/forum?id=6WcsVlZE5I", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;5;4", "excitement": "4;3;3", "reproducibility": "4;4;4", "correctness": "3;3;2", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-2353-5038", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Tianjin University", "aff_unique_dep": "", "aff_unique_url": "http://www.tju.edu.cn", "aff_unique_abbr": "TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "6YQ1uh9IGG", "title": "A Survey on Out-of-Distribution Detection in NLP", "track": "main", "status": "Reject", "tldr": "", "abstract": "Out-of-distribution (OOD) detection is essential for the reliable and safe deployment of machine learning systems in the real world. Great progress has been made over the past years. This paper presents the first review of recent advances in OOD detection with a particular focus on natural language processing approaches. First, we provide a formal definition of OOD detection and discuss several related fields. We then categorize recent algorithms into three classes according to the data they used: (1) OOD data available, (2) OOD data unavailable + in-distribution (ID) label available, and (3) OOD data unavailable + ID label unavailable. Third, we introduce datasets, applications, and metrics. Finally, we summarize existing work and present potential future research topics.", "keywords": "OOD Detection", "primary_area": "", "supplementary_material": "", "author": "Hao Lang;Yinhe Zheng;Yixuan Li;Jian SUN;Fei Huang;Yongbin Li", "authorids": "~Hao_Lang1;~Yinhe_Zheng1;~Yixuan_Li1;~Jian_SUN9;~Fei_Huang1;~Yongbin_Li2", "gender": "M;;F;;;M", "homepage": "https://github.com/langhaobeijing;;http://pages.cs.wisc.edu/~sharonli/;;;https://yongbin-li.github.io/", "dblp": "71/6934.html;;144/6087-1;;;", "google_scholar": "0UGQL9QAAAAJ;;https://scholar.google.com/citations?hl=en;;;xF5VrokAAAAJ", "or_profile": "~Hao_Lang1;~Yinhe_Zheng1;~Yixuan_Li1;~Jian_SUN9;~Fei_Huang1;~Yongbin_Li2", "aff": "Tongyi Lab, Alibaba Group;;Cornell University;;;Alibaba Group", "aff_domain": "alibaba-inc.com;;cornell.edu;;;alibaba-inc.com", "position": "Researcher;;Graduate Student;;;Researcher", "bibtex": "@misc{\nlang2023a,\ntitle={A Survey on Out-of-Distribution Detection in {NLP}},\nauthor={Hao Lang and Yinhe Zheng and Yixuan Li and Jian SUN and Fei Huang and Yongbin Li},\nyear={2023},\nurl={https://openreview.net/forum?id=6YQ1uh9IGG}\n}", "github": "", "project": "", "reviewers": "1ooe;iVQ5;aNPD", "site": "https://openreview.net/forum?id=6YQ1uh9IGG", "pdf_size": 0, "rating": "2;2;2", "confidence": "3;4;3", "excitement": "3;3;3", "reproducibility": "", "correctness": "4;3;3", "rating_avg": 2.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";;liyixuan;;;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Alibaba Group;Cornell University", "aff_unique_dep": "Tongyi Lab;", "aff_unique_url": "https://www.alibaba.com;https://www.cornell.edu", "aff_unique_abbr": "Alibaba;Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "id": "6c2s6HddQ4", "title": "The Locality and Symmetry of Positional Encodings", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Positional Encodings (PEs) are used to inject word-order information into\ntransformer-based language models. While they can significantly enhance\nthe quality of sentence representations, their specific contribution to\nlanguage models is not fully understood, \nespecially given recent findings that various positional encodings are insensitive to word order.\nIn this work, we conduct a systematic study of positional encodings in \\textbf{Bidirectional Masked Language Models} (BERT-style)\n, which complements existing work in three aspects: (1) We uncover the core function of PEs by identifying two common properties, Locality and Symmetry; \n(2) We show that the two properties are closely correlated with the performances of downstream tasks;\n(3) We quantify the weakness of current PEs by introducing two new probing tasks, on which current PEs perform poorly. \nWe believe that these results are the basis for developing better PEs for transformer-based language models.", "keywords": "Positional Encodings;Sentence Representations;Pre-trained Language Models", "primary_area": "", "supplementary_material": "", "author": "Lihu Chen;Gael Varoquaux;Fabian M. Suchanek", "authorids": "~Lihu_Chen1;~Gael_Varoquaux1;~Fabian_M._Suchanek1", "gender": "M;M;", "homepage": "https://chenlihu.com;http://gael-varoquaux.info;", "dblp": ";36/7585;", "google_scholar": "oRs8regAAAAJ;https://scholar.google.fr/citations?user=OGGu384AAAAJ;", "or_profile": "~Lihu_Chen1;~Gael_Varoquaux1;~Fabian_M._Suchanek1", "aff": "Institut Polytechnique de Paris;INRIA;", "aff_domain": "telecom-paristech.fr;inria.fr;", "position": "PhD student;Full Professor;", "bibtex": "@inproceedings{\nchen2023the,\ntitle={The Locality and Symmetry of Positional Encodings},\nauthor={Lihu Chen and Gael Varoquaux and Fabian M. Suchanek},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=6c2s6HddQ4}\n}", "github": "", "project": "", "reviewers": "R38Q;ftCy;5QeA", "site": "https://openreview.net/forum?id=6c2s6HddQ4", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;3;3", "excitement": "3;3;3", "reproducibility": "4;3;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;1", "aff_unique_norm": "Institut Polytechnique de Paris;INRIA", "aff_unique_dep": ";", "aff_unique_url": "https://www.ipparis.fr;https://www.inria.fr", "aff_unique_abbr": "IP Paris;INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "id": "6dyvFZLRX8", "title": "BotPercent: Estimating Bot Populations in Twitter Communities", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Twitter bot detection is vital in combating misinformation and safeguarding the integrity of social media discourse. While malicious bots are becoming more and more sophisticated and personalized, standard bot detection approaches are still agnostic to social environments (henceforth, communities) the bots operate at. In this work, we introduce community-specific bot detection, estimating the percentage of bots given the context of a community. Our method---BotPercent---is an amalgamation of Twitter bot detection datasets and feature-, text-, and graph-based models, adjusted to a particular community on Twitter. We introduce an approach that performs confidence calibration across bot detection models, which addresses generalization issues in existing community-agnostic models targeting individual bots and leads to more accurate community-level bot estimations. Experiments demonstrate that BotPercent achieves state-of-the-art performance in community-level Twitter bot detection across both balanced and imbalanced class distribution settings, presenting a less biased estimator of Twitter bot populations within the communities we analyze. We then analyze bot rates in several Twitter groups, including users who engage with partisan news media, political communities in different countries, and more. Our results reveal that the presence of Twitter bots is not homogeneous, but exhibiting a spatial-temporal distribution with considerable heterogeneity that should be taken into account for content moderation and social media policy making. The implementation of BotPercent is available at https://github.com/TamSiuhin/BotPercent.", "keywords": "Twitter bot detection;social network analysis", "primary_area": "", "supplementary_material": "", "author": "Zhaoxuan Tan;Shangbin Feng;Melanie Sclar;Herun Wan;Minnan Luo;Yejin Choi;Yulia Tsvetkov", "authorids": "~Zhaoxuan_Tan1;~Shangbin_Feng1;~Melanie_Sclar1;~Herun_Wan1;~Minnan_Luo1;~Yejin_Choi1;~Yulia_Tsvetkov1", "gender": "M;M;F;M;F;F;F", "homepage": "https://tamsiuhin.github.io/;https://bunsenfeng.github.io/;https://msclar.github.io;;https://gr.xjtu.edu.cn/web/minnluo;https://yejinc.github.io/;https://homes.cs.washington.edu/~yuliats/", "dblp": "301/7706;295/9571;274/6796;295/9587;99/10051;89/579-1;75/8157", "google_scholar": "0KE2CZsAAAAJ;Y3rLP9UAAAAJ;4uNPtZgAAAAJ;2Mrur7QAAAAJ;https://scholar.google.com/citations?hl=zh-CN;vhP-tlcAAAAJ;SEDPkrsAAAAJ", "or_profile": "~Zhaoxuan_Tan1;~Shangbin_Feng1;~Melanie_Sclar1;~Herun_Wan1;~Minnan_Luo1;~Yejin_Choi1;~Yulia_Tsvetkov1", "aff": "Xi'an Jiaotong University;University of Washington;University of Washington, Seattle;Xi'an Jiaotong University;Xi'an Jiaotong University;Department of Computer Science, University of Washington;Department of Computer Science, University of Washington", "aff_domain": "xjtu.edu.cn;cs.washington.edu;uw.edu;xjtu.edu.cn;xjtu.edu.cn;cs.washington.edu;cs.washington.edu", "position": "Undergrad student;PhD student;PhD student;PhD student;Full Professor;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ntan2023botpercent,\ntitle={BotPercent: Estimating Bot Populations in Twitter Communities},\nauthor={Zhaoxuan Tan and Shangbin Feng and Melanie Sclar and Herun Wan and Minnan Luo and Yejin Choi and Yulia Tsvetkov},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=6dyvFZLRX8}\n}", "github": "", "project": "", "reviewers": "Rurt;q2rU;TcPZ", "site": "https://openreview.net/forum?id=6dyvFZLRX8", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;4;2", "reproducibility": "4;5;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-8230-6238;0000-0002-4133-1987;;0000-0002-3294-3383;0000-0002-0140-7860;;0000-0002-4634-7128", "linkedin": "zhaoxuan-tan-927132213/;;melanie-sclar-077047b5/;;;;", "aff_unique_index": "0;1;1;0;0;1;1", "aff_unique_norm": "Xi'an Jiao Tong University;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "https://www.xjtu.edu.cn;https://www.washington.edu", "aff_unique_abbr": "XJTU;UW", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;1;1;0;0;1;1", "aff_country_unique": "China;United States" }, { "id": "6eBgIRnlGA", "title": "Mitigating Temporal Misalignment by Discarding Outdated Facts", "track": "main", "status": "Long Main", "tldr": "", "abstract": "While large language models are able to retain vast amounts of world knowledge seen during pretraining, such knowledge is prone to going out of date and is nontrivial to update. Furthermore, these models are often used under temporal misalignment, tasked with answering questions about the present, despite having only been trained on data collected in the past. To mitigate the effects of temporal misalignment, we propose fact duration prediction: the task of predicting how long a given fact will remain true. In our experiments, we demonstrate that identifying which facts are prone to rapid change can help models avoid reciting outdated information and determine which predictions require seeking out up-to-date knowledge sources. We also show how modeling fact duration improves calibration for knowledge-intensive tasks, such as open-retrieval question answering, under temporal misalignment, by discarding volatile facts.", "keywords": "Question Answering;Temporal", "primary_area": "", "supplementary_material": "", "author": "Michael JQ Zhang;Eunsol Choi", "authorids": "~Michael_JQ_Zhang1;~Eunsol_Choi1", "gender": ";", "homepage": "https://eunsol.github.io/;https://mikejqzhang.github.io/", "dblp": "116/2765;301/8020", "google_scholar": "6wulN88AAAAJ;https://scholar.google.com/citations?view_op=list_works", "or_profile": "~Eunsol_Choi1;~Michael_J_Zhang1", "aff": "University of Texas, Austin;University of Texas at Austin", "aff_domain": "cs.utexas.edu;utexas.edu", "position": "Assistant Professor;PhD student", "bibtex": "@inproceedings{\nzhang2023mitigating,\ntitle={Mitigating Temporal Misalignment by Discarding Outdated Facts},\nauthor={Michael JQ Zhang and Eunsol Choi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=6eBgIRnlGA}\n}", "github": "", "project": "", "reviewers": "TZQm;BbjM;Cm26", "site": "https://openreview.net/forum?id=6eBgIRnlGA", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;2;3", "excitement": "4;3;4", "reproducibility": "4;4;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-3607-9104;", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "6i98agKoZ1", "title": "Self-Improvement of Non-autoregressive Model via Sequence-Level Distillation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Although Non-autoregressive Transformer (NAT) models have achieved great success in terms of fast inference speed, this speedup comes with a performance drop due to the inherent \\emph{multi-modality} problem of the NAT model. Previous works commonly alleviate this problem by replacing the target side of the raw data with distilled data generated by Autoregressive Transformer (AT) models. However, the multi-modality problem in the distilled data is still significant and thus limits further improvement of the NAT models. In this paper, we propose a method called Sequence-Level Self-Distillation (SLSD), which aims to generate distilled data by the NAT model itself, eliminating the need for additional teacher networks. Furthermore, SLSD can adapt to different NAT models without precise adjustments since the self-distilled data is generated from the same types of NAT models. We conduct extensive experiments on WMT14 EN$\\leftrightarrow$DE and WMT16 EN$\\leftrightarrow$RO and choose four classic NAT models as the backbones to validate the generality and effectiveness of SLSD. The results show that our approach can consistently improve all models on both raw data and distilled data without sacrificing the inference speed.", "keywords": "non-autoregressive transformers;self-improvement;distillation", "primary_area": "", "supplementary_material": "", "author": "Yusheng Liao;Shuyang Jiang;Yiqi Li;Yu Wang;Yanfeng Wang", "authorids": "~Yusheng_Liao1;~Shuyang_Jiang2;~Yiqi_Li1;~Yu_Wang40;~Yanfeng_Wang1", "gender": "M;M;;M;M", "homepage": ";;;https://mediabrain.sjtu.edu.cn/yuwang/;https://cmic.sjtu.edu.cn/wangyanfeng/", "dblp": "37/4774.html;153/1949;17/10445;02/5889-27.html;55/5407-1.html", "google_scholar": "ErjimggAAAAJ;slwTiOUAAAAJ;;;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Yusheng_Liao1;~Shuyang_Jiang2;~Yiqi_Li1;~Yu_Wang40;~Yanfeng_Wang1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;cs.sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "PhD student;Undergrad student;Undergrad student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nliao2023selfimprovement,\ntitle={Self-Improvement of Non-autoregressive Model via Sequence-Level Distillation},\nauthor={Yusheng Liao and Shuyang Jiang and Yiqi Li and Yu Wang and Yanfeng Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=6i98agKoZ1}\n}", "github": "", "project": "", "reviewers": "Gn82;orC1;nVwZ", "site": "https://openreview.net/forum?id=6i98agKoZ1", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;2", "excitement": "4;3;3", "reproducibility": "3;3;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-7549-3944;;;0000-0001-9500-081X;0000-0002-3196-2347", "linkedin": ";%E4%B9%A6%E6%B4%8B-%E6%B1%9F-b8288223a/;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "6j7JZnEzf4", "title": "Language Models with Rationality", "track": "main", "status": "Long Main", "tldr": "", "abstract": "While large language models (LLMs) are proficient at question-answering (QA), it is not always clear how (or even if) an answer follows from their latent \"beliefs\". This lack of interpretability is a growing impediment to widespread use of LLMs. To address this, our goals are to make model beliefs and their inferential relationships explicit, and to resolve inconsistencies that may exist, so that answers are supported by interpretable chains of reasoning drawn from a consistent network of beliefs. Our approach, which we call REFLEX, is to add a **rational, self-reflecting layer** on top of the LLM. First, given a question, we construct a **belief graph** using a backward-chaining process to materialize relevant model beliefs (including beliefs about answer candidates) and their inferential relationships. Second, we identify and minimize contradictions in that graph using a formal constraint reasoner. We find that REFLEX significantly improves consistency (by 8%-11% absolute) without harming overall answer accuracy, resulting in answers supported by faithful chains of reasoning drawn from a more consistent belief system. This suggests a new style of system architecture in which an LLM extended with a rational layer can provide an interpretable window into system beliefs, add a systematic reasoning capability, and repair latent inconsistencies present in the LLM.", "keywords": "Interpretability;question answering;belief;entailment;belief graphs;consistency", "primary_area": "", "supplementary_material": "", "author": "Nora Kassner;Oyvind Tafjord;Ashish Sabharwal;Kyle Richardson;Hinrich Schuetze;Peter Clark", "authorids": "~Nora_Kassner1;~Oyvind_Tafjord2;~Ashish_Sabharwal1;~Kyle_Richardson1;~Hinrich_Schuetze3;~Peter_Clark1", "gender": ";M;M;M;M;M", "homepage": ";;;https://www.nlp-kyle.com/;https://www.cis.uni-muenchen.de/schuetze/;https://allenai.org/team/peterc", "dblp": ";178/8640;13/154;38/9169;s/HinrichSchutze;34/1184", "google_scholar": ";https://scholar.google.com/citations?hl=en;7VspfeAAAAAJ;LmJN-n4AAAAJ;;o-5vyEsAAAAJ", "or_profile": "~Nora_Kassner1;~Oyvind_Tafjord2;~Ashish_Sabharwal1;~Kyle_Richardson1;~Hinrich_Schuetze3;~Peter_Clark1", "aff": ";Allen Institute for Artificial Intelligence;Allen Institute for AI;Allen Institute for Artificial Intelligence;Center for Information and Language Processing;Allen Institute for Artificial Intelligence", "aff_domain": ";allenai.org;allenai.org;allenai.org;lmu.de;allenai.org", "position": ";Researcher;Principal Researcher;Research Scientist;Full Professor;Senior Research Manager", "bibtex": "@inproceedings{\nkassner2023language,\ntitle={Language Models with Rationality},\nauthor={Nora Kassner and Oyvind Tafjord and Ashish Sabharwal and Kyle Richardson and Hinrich Schuetze and Peter Clark},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=6j7JZnEzf4}\n}", "github": "", "project": "", "reviewers": "mJrZ;udQV;jJr9", "site": "https://openreview.net/forum?id=6j7JZnEzf4", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;5;4", "reproducibility": "3;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.333333333333333, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-4190-5618;;;;", "linkedin": ";;ashish-sabharwal-82a2b661;;;peter-clark-a8b556/", "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Allen Institute for Artificial Intelligence;Allen Institute for AI;Center for Information and Language Processing", "aff_unique_dep": ";;", "aff_unique_url": "https://allenai.org;https://allenai.org;", "aff_unique_abbr": "AI2;AI2;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States;" }, { "id": "6jik3wCbTr", "title": "Mitigating Data Imbalance and Representation Degeneration in Multilingual Machine Translation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Despite advances in multilingual neural machine translation (MNMT), we argue that there are still two major challenges in this area: data imbalance and representation degeneration. The data imbalance problem refers to the imbalance in the amount of parallel corpora for all language pairs, especially for long-tail languages (i.e., very low-resource languages). The representation degeneration problem refers to the problem of encoded tokens tending to appear only in a small subspace of the full space available to the MNMT model. To solve these two issues, we propose Bi-ACL, a framework which only requires target-side monolingual data and a bilingual dictionary to improve the performance of the MNMT model. We define two modules, named bidirectional autoencoder and bidirectional contrastive learning, which we combine with an online constrained beam search and a curriculum learning sampling strategy. Extensive experiments show that our proposed method is more effective than strong baselines both in long-tail languages and in high-resource languages. We also demonstrate that our approach is capable of transferring knowledge between domains and languages in zero-shot scenarios.", "keywords": "Data Imbalance;Representation Degeneration;Multilingual Machine Translation", "primary_area": "", "supplementary_material": "", "author": "Wen Lai;Alexandra Chronopoulou;Alexander Fraser", "authorids": "~Wen_Lai1;~Alexandra_Chronopoulou1;~Alexander_Fraser1", "gender": "M;F;M", "homepage": "https://wenlai-lavine.github.io;https://alexandra-chron.github.io/;https://alexfraser.github.io/", "dblp": "223/0040;178/7008;145/8377.html", "google_scholar": "TKnzMwMAAAAJ;XiwRCRIAAAAJ;4ZIZK08AAAAJ", "or_profile": "~Wen_Lai1;~Alexandra_Chronopoulou1;~Alexander_Fraser1", "aff": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;LMU Munich", "aff_domain": "lmu.de;lmu.de;lmu.de", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nlai2023mitigating,\ntitle={Mitigating Data Imbalance and Representation Degeneration in Multilingual Machine Translation},\nauthor={Wen Lai and Alexandra Chronopoulou and Alexander Fraser},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=6jik3wCbTr}\n}", "github": "", "project": "", "reviewers": "G5qY;LrBM;G7Tr", "site": "https://openreview.net/forum?id=6jik3wCbTr", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;3", "excitement": "3;4;3", "reproducibility": "3;3;3", "correctness": "2;4;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";alexandra-chronopoulou/;", "aff_unique_index": "0;0;1", "aff_unique_norm": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Ludwig Maximilian University of Munich", "aff_unique_dep": ";", "aff_unique_url": "https://www.lmu.de;https://www.lmu.de", "aff_unique_abbr": "LMU;LMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Munich", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "6lXuQBMsyM", "title": "DetGPT: Detect What You Need via Reasoning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In recent years, the field of computer vision has seen significant advancements thanks to the development of large language models (LLMs). These models have enabled more effective and sophisticated interactions between humans and machines, paving the way for novel techniques that blur the lines between human and machine intelligence. In this paper, we introduce a new paradigm for object detection that we call reasoning-based object detection. Unlike conventional object detection methods that rely on specific object names, our approach enables users to interact with the system using natural language instructions, allowing for a higher level of interactivity. Our proposed method, called DetGPT, leverages state-of-the-art multi-modal models and open-vocabulary object detectors to perform reasoning within the context of the user\u2019s instructions and the visual scene. This enables DetGPT to automatically locate the object of interest based on the user\u2019s expressed desires, even if the object is not explicitly mentioned. For instance, if a user expresses a desire for a cold beverage, DetGPT can analyze the image, identify a fridge, and use its knowledge of typical fridge contents to locate the beverage. This flexibility makes our system applicable across a wide range of fields, from robotics and automation to autonomous driving. Overall, our proposed paradigm and DetGPT demonstrate the potential for more sophisticated and intuitive interactions between humans and machines. We hope that our proposed paradigm and approach will provide inspiration to the community and open the door to more interactive and versatile object detection systems.", "keywords": "embodied AI;multi-modal learning;object detection", "primary_area": "", "supplementary_material": "", "author": "Renjie Pi;Jiahui Gao;Shizhe Diao;Rui Pan;Hanze Dong;Jipeng Zhang;Lewei Yao;Jianhua Han;Hang Xu;Lingpeng Kong;Tong Zhang", "authorids": "~Renjie_Pi1;~Jiahui_Gao2;~Shizhe_Diao2;~Rui_Pan4;~Hanze_Dong1;~Jipeng_Zhang1;~Lewei_Yao1;~Jianhua_Han1;~Hang_Xu1;~Lingpeng_Kong1;~Tong_Zhang2", "gender": "M;;;M;M;M;M;M;M;M;M", "homepage": ";;;;https://hendrydong.github.io/;https://2003pro.github.io/;;;;https://ikekonglp.github.io/;http://tongzhang-ml.org", "dblp": "67/2156;;;74/9957;228/7798;;254/1943.html;29/6207;;144/7656;07/4227-1", "google_scholar": "XUq0HwcAAAAJ;;;;g9WLzWoAAAAJ;q0De288AAAAJ;hqDyTg8AAAAJ;OEPMQEMAAAAJ;https://scholar.google.com.hk/citations?user=J_8TX6sAAAAJ;f1hBi5wAAAAJ;LurWtuYAAAAJ", "or_profile": "~Renjie_Pi1;~Jiahui_Gao2;~Shizhe_Diao2;~Rui_Pan4;~Hanze_Dong1;~Jipeng_Zhang1;~Lewei_Yao1;~Jianhua_Han1;~Hang_Xu1;~Lingpeng_Kong1;~Tong_Zhang2", "aff": "Hong Kong University of Science and Technology;;;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Department of Computer Science and Engineering, The Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Huawei Technologies Ltd.;Huawei Noah\u2018s Ark Lab;Department of Computer Science, The University of Hong Kong;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;;;ust.hk;ust.hk;cse.ust.hk;ust.hk;huawei.com;huawei.com;cs.hku.hk;ust.hk", "position": "PhD student;;;MS student;PhD student;PhD student;PhD student;Researcher;Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\npi2023detgpt,\ntitle={Det{GPT}: Detect What You Need via Reasoning},\nauthor={Renjie Pi and Jiahui Gao and Shizhe Diao and Rui Pan and Hanze Dong and Jipeng Zhang and Lewei Yao and Jianhua Han and Hang Xu and Lingpeng Kong and Tong Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=6lXuQBMsyM}\n}", "github": "", "project": "", "reviewers": "Hwbg;MiAP;5jA8", "site": "https://openreview.net/forum?id=6lXuQBMsyM", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;3", "excitement": "3;4;4", "reproducibility": "3;4;3", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 11, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-7217-0656;;;;;0000-0003-3645-8972;;0000-0002-5511-2558", "linkedin": ";;;;hanze-dong/;;;;;;", "aff_unique_index": "0;0;0;0;0;1;1;2;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Huawei;University of Hong Kong", "aff_unique_dep": ";Huawei Technologies;Department of Computer Science", "aff_unique_url": "https://www.ust.hk;https://www.huawei.com;https://www.hku.hk", "aff_unique_abbr": "HKUST;Huawei;HKU", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "6mPs06irie", "title": "GlobalBench: A Benchmark for Global Progress in Natural Language Processing", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Despite the major advances in NLP, significant disparities in NLP system performance across languages still exist. Arguably, these are due to uneven resource allocation and sub-optimal incentives to work on less resourced languages. To track and further incentivize the global development of equitable language technology, we introduce GlobalBench. Prior multilingual benchmarks are static and have focused on a limited number of tasks and languages. In contrast, GlobalBench is an ever-expanding collection that aims to dynamically track progress on all NLP datasets in all languages. Rather than solely measuring accuracy, GlobalBench also tracks the estimated per-speaker utility and equity of technology across all languages, providing a multi-faceted view of how language technology is serving people of the world. Furthermore, GlobalBench is designed to identify the most under-served languages, and rewards research efforts directed towards those languages. At present, the most under-served languages are the ones with a relatively high population, but nonetheless overlooked by composite multilingual benchmarks (like Punjabi, Portuguese, and Wu Chinese). Currently, GlobalBench covers 966 datasets in 190 languages, and has 1,128 system submissions spanning 62 languages.", "keywords": "Multilingual benchmark;Leaderboard", "primary_area": "", "supplementary_material": "", "author": "Yueqi Song;Simran Khanuja;Pengfei Liu;Fahim Faisal;Alissa Ostapenko;Genta Indra Winata;Alham Fikri Aji;Samuel Cahyawijaya;Yulia Tsvetkov;Antonios Anastasopoulos;Graham Neubig", "authorids": "~Yueqi_Song1;~Simran_Khanuja1;~Pengfei_Liu1;~Fahim_Faisal1;~Alissa_Ostapenko1;~Genta_Indra_Winata1;~Alham_Fikri_Aji1;~Samuel_Cahyawijaya1;~Yulia_Tsvetkov1;~Antonios_Anastasopoulos1;~Graham_Neubig1", "gender": "F;F;M;M;;M;M;M;F;M;M", "homepage": "https://yueqis.github.io/;https://simran-khanuja.github.io/;http://pfliu.com/;;https://ostapen.github.io/;https://gentawinata.com/;;https://samuelcahyawijaya.github.io/;https://homes.cs.washington.edu/~yuliats/;http://www.cs.gmu.edu/~antonis/;http://phontron.com", "dblp": "348/6067;255/5469;34/3381-3;245/7489;259/6258.html;https://dblp.uni-trier.de/pers/hd/w/Winata:Genta_Indra;188/8762;235/2988.html;75/8157;148/9479;03/8155", "google_scholar": "https://scholar.google.com/citations?hl=en;yInhszwAAAAJ;oIz_CYEAAAAJ;4NgtQ2EAAAAJ;;7QxkToIAAAAJ;0Cyfqv4AAAAJ;w5w_WZEAAAAJ;SEDPkrsAAAAJ;g_G_SNAAAAAJ;wlosgkoAAAAJ", "or_profile": "~Yueqi_Song1;~Simran_Khanuja1;~Pengfei_Liu1;~Fahim_Faisal1;~Alissa_Ostapenko1;~Genta_Indra_Winata1;~Alham_Fikri_Aji1;~Samuel_Cahyawijaya1;~Yulia_Tsvetkov1;~Antonios_Anastasopoulos1;~Graham_Neubig1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;, George Mason University;;Bloomberg;Amazon;Hong Kong University of Science and Technology;Department of Computer Science, University of Washington;George Mason University;Carnegie Mellon University", "aff_domain": "cmu.edu;andrew.cmu.edu;cmu.edu;cs.gmu.edu;;bloomberg.net;amazon.com;ust.hk;cs.washington.edu;gmu.edu;cmu.edu", "position": "Undergrad student;PhD student;Postdoc;PhD student;;Researcher;Researcher;PhD student;Assistant Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nsong2023globalbench,\ntitle={GlobalBench: A Benchmark for Global Progress in Natural Language Processing},\nauthor={Yueqi Song and Simran Khanuja and Pengfei Liu and Fahim Faisal and Alissa Ostapenko and Genta Indra Winata and Alham Fikri Aji and Samuel Cahyawijaya and Yulia Tsvetkov and Antonios Anastasopoulos and Graham Neubig},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=6mPs06irie}\n}", "github": "", "project": "", "reviewers": "8Lhb;kyHY;bbS1", "site": "https://openreview.net/forum?id=6mPs06irie", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;4;4", "reproducibility": "4;0;4", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 11, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;0000-0002-9891-1608;0000-0002-4634-7128;0000-0002-8544-246X;", "linkedin": "yueqi-song-26664622b;simran-khanuja-6b80b6144/;;;alissaostapenko/;gentaiscool/;;samuelcahyawijaya/;;;", "aff_unique_index": "0;0;0;1;2;3;4;5;1;0", "aff_unique_norm": "Carnegie Mellon University;George Mason University;Bloomberg;Amazon;Hong Kong University of Science and Technology;University of Washington", "aff_unique_dep": ";;;Amazon.com, Inc.;;Department of Computer Science", "aff_unique_url": "https://www.cmu.edu;https://www.gmu.edu;https://www.bloomberg.com;https://www.amazon.com;https://www.ust.hk;https://www.washington.edu", "aff_unique_abbr": "CMU;GMU;Bloomberg;Amazon;HKUST;UW", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Hong Kong SAR;Seattle", "aff_country_unique_index": "0;0;0;0;0;0;1;0;0;0", "aff_country_unique": "United States;China" }, { "id": "6mZIF4OxSq", "title": "K-HATERS: A Hate Speech Detection Corpus in Korean with Target-Specific Ratings", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Numerous datasets have been proposed to combat the spread of online hate. Despite these efforts, a majority of these resources are English-centric, primarily focusing on overt forms of hate. This research gap calls for developing high-quality corpora in diverse languages that also encapsulate more subtle hate expressions. This study introduces K-HATERS, a new corpus for hate speech detection in Korean, comprising approximately 192K news comments with target-specific offensiveness ratings. This resource is the largest offensive language corpus in Korean and is the first to offer target-specific ratings on a three-point Likert scale, enabling the detection of hate expressions in Korean across varying degrees of offensiveness. We conduct experiments showing the effectiveness of the proposed corpus, including a comparison with existing datasets. Additionally, to address potential noise and bias in human annotations, we explore a novel idea of adopting the Cognitive Reflection Test, which is widely used in social science for assessing an individual's cognitive ability, as a proxy of labeling quality. Findings indicate that annotations from individuals with the lowest test scores tend to yield detection models that make biased predictions toward specific target groups and are less accurate. This study contributes to the NLP research on hate speech detection and resource construction. The code and dataset can be accessed at https://github.com/ssu-humane/K-HATERS.", "keywords": "Hate speech;Offensive language;Dataset construction;Fairness;Explainability", "primary_area": "", "supplementary_material": "", "author": "Chaewon Park;Soohwan Kim;Kyubyong Park;Kunwoo Park", "authorids": "~Chaewon_Park3;~Soohwan_Kim1;~Kyubyong_Park1;~Kunwoo_Park1", "gender": "F;M;M;M", "homepage": "https://github.com/BakChaewon;https://sooftware.io/;https://github.com/kyubyong;https://bywords.github.io", "dblp": ";;;48/6841", "google_scholar": ";;;xiZ1ImoAAAAJ", "or_profile": "~Chaewon_Park3;~Soohwan_Kim1;~Kyubyong_Park1;~Kunwoo_Park1", "aff": "Soongsil University;;;Soongsil University", "aff_domain": "ssu.ac.kr;;;ssu.ac.kr", "position": "Undergrad student;;;Assistant Professor", "bibtex": "@inproceedings{\npark2023khaters,\ntitle={K-{HATERS}: A Hate Speech Detection Corpus in Korean with Target-Specific Ratings},\nauthor={Chaewon Park and Soohwan Kim and Kyubyong Park and Kunwoo Park},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=6mZIF4OxSq}\n}", "github": "", "project": "", "reviewers": "vzY9;dMtf;9cnA", "site": "https://openreview.net/forum?id=6mZIF4OxSq", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;5;4", "excitement": "2;3;4", "reproducibility": "3;3;2", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-2913-9711", "linkedin": ";Soo-hwan/;;", "aff_unique_index": "0;0", "aff_unique_norm": "Soongsil University", "aff_unique_dep": "", "aff_unique_url": "https://www.soongsil.ac.kr", "aff_unique_abbr": "SSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "6muz29kMQu", "title": "Multilingual Holistic Bias: Extending Descriptors and Patterns to Unveil Demographic Biases in Languages at Scale", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We introduce a multilingual extension of the HolisticBias dataset, the largest English template-based taxonomy of textual people references: Multilingual HolisticBias. This extension consists of 20,459 sentences in 50 languages distributed across 13 demographic axes. Source sentences are built from combinations of 118 demographic descriptors and three patterns, excluding nonsensical combinations. Multilingual translations include alternatives for gendered languages that cover gendered translations when there is ambiguity in English. Our dataset is intended to uncover demographic imbalances and be the tool to quantify mitigations towards them. Our initial findings show that translation quality for EN-to-XX translations is an average of almost 8 spBLEU better when evaluating with the masculine human reference compared to feminine. In the opposite direction, XX-to-EN, we compare the robustness of the model when the source input only differs in gender (masculine or feminine) and masculine translations are an average of almost 4 spBLEU better than feminine. When embedding sentences to a joint multilingual sentence representations space, we find that for most languages masculine translations are significantly closer to the English neutral sentences when embedded.", "keywords": "Multilingual holistic bias;Machine Translation;Sentence Embeddings", "primary_area": "", "supplementary_material": "", "author": "Marta R. Costa-juss\u00e0;Pierre Andrews;Eric Michael Smith;Prangthip Hansanti;Christophe Ropers;Elahe Kalbassi;Cynthia Gao;Daniel Edward Licht;Carleigh Wood", "authorids": "~Marta_R._Costa-juss\u00e01;~Pierre_Andrews1;~Eric_Michael_Smith1;~Prangthip_Hansanti1;~Christophe_Ropers1;~Elahe_Kalbassi1;~Cynthia_Gao1;~Daniel_Edward_Licht1;~Carleigh_Wood1", "gender": "F;M;Non-Binary;;;F;F;M;", "homepage": "https://www.costa-jussa.com;;;;http://www.chrisropers.net;;https://www.linkedin.com/in/cynthiarfgao/;;https://www.linkedin.com/in/carleigh-wood/", "dblp": "17/2183;46/3930;;;324/2505;;;;", "google_scholar": "ESqQ7FoAAAAJ;DiJPt0EAAAAJ;uOK8DfQAAAAJ;;;;;;", "or_profile": "~Marta_R._Costa-juss\u00e01;~Pierre_Andrews1;~Eric_Michael_Smith1;~Prangthip_Hansanti1;~Christophe_Ropers1;~Elahe_Kalbassi1;~Cynthia_Gao1;~Daniel_Edward_Licht1;~Carleigh_Wood1", "aff": "Meta;Meta;Meta AI;;Syntexys Inc;;;Meta [FAIR];", "aff_domain": "fb.com;meta.com;meta.com;;syntexys.com;;;facebook.com;", "position": "Research Scientist;Researcher;Researcher;;Linguist, CRO;;;Researcher;", "bibtex": "@inproceedings{\ncosta-juss{\\`a}2023multilingual,\ntitle={Multilingual Holistic Bias: Extending Descriptors and Patterns to Unveil Demographic Biases in Languages at Scale},\nauthor={Marta R. Costa-juss{\\`a} and Pierre Andrews and Eric Michael Smith and Prangthip Hansanti and Christophe Ropers and Elahe Kalbassi and Cynthia Gao and Daniel Edward Licht and Carleigh Wood},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=6muz29kMQu}\n}", "github": "", "project": "", "reviewers": "seQ7;QhKn;Uryd", "site": "https://openreview.net/forum?id=6muz29kMQu", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;4;4", "reproducibility": "3;4;4", "correctness": "3;3;3", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-6780-7798;;;;;;;", "linkedin": ";pandrews/;;prangthip-hansanti-ba477913/;;ekalbassi;;lichtphyz/;carleigh-wood/", "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Meta;Syntexys Inc", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;", "aff_unique_abbr": "Meta;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States;" }, { "id": "6nLdWdTeos", "title": "Learning Dynamic Representations for Discourse Dependency Parsing", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Transition systems have been widely used for the discourse dependency parsing task. Existing works often characterize transition states by examining a certain number of elementary discourse units (EDUs), while neglecting the arcs obtained from the transition history. In this paper, we propose to employ GAT-based encoder to learn dynamic representations for sub-trees constructed in previous transition steps. By incorporating these representations, our model is able to retain accessibility to all parsed EDUs through the obtained arcs, thus better utilizing the structural information of the document, particularly when handling lengthy text spans with complex structures. For the discourse relation recognition task, we employ edge-featured GATs to derive better representations for EDU pairs. Experimental results show that our model can achieve state-of-the-art performance on widely adopted datasets including RST-DT, SciDTB and CDTB. Our code is available at $\\href{https://github.com/lty-lty/Discourse-Dependency-Parsing}{https://github.com/lty-lty/Discourse-Dependency-Parsing}$.", "keywords": "Discourse dependency parsing;Transition systems;Dynamic sub-tree representations;Graph attention networks", "primary_area": "", "supplementary_material": "", "author": "Tianyi Liu;Yansong Feng;Dongyan Zhao", "authorids": "~Tianyi_Liu8;~Yansong_Feng1;~Dongyan_Zhao2", "gender": "M;M;M", "homepage": "https://github.com/lty-lty;https://yansongfeng.github.io/;https://www.wict.pku.edu.cn/zhaodongyan/en/", "dblp": ";25/2643-2.html;63/1870", "google_scholar": ";https://scholar.google.com.tw/citations?user=67qAw_wAAAAJ;lhR8-68AAAAJ", "or_profile": "~Tianyi_Liu8;~Yansong_Feng1;~Dongyan_Zhao2", "aff": "Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "MS student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nliu2023learning,\ntitle={Learning Dynamic Representations for Discourse Dependency Parsing},\nauthor={Tianyi Liu and Yansong Feng and Dongyan Zhao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=6nLdWdTeos}\n}", "github": "", "project": "", "reviewers": "pLZG;m6QC;Hmaa", "site": "https://openreview.net/forum?id=6nLdWdTeos", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;2", "excitement": "3;4;3", "reproducibility": "3;4;3", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "6ne78DBkxl", "title": "PaRaDe: Passage Ranking using Demonstrations with LLMs", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Recent studies show that large language models (LLMs) can be instructed to effectively perform zero-shot passage re-ranking, in which the results of a first stage retrieval method, such as BM25, are rated and reordered to improve relevance. In this work, we improve LLM-based re-ranking by algorithmically selecting few-shot demonstrations to include in the prompt. Our analysis investigates the conditions where demonstrations are most helpful, and shows that adding even one demonstration is significantly beneficial. We propose a novel demonstration selection strategy based on difficulty rather than the commonly used semantic similarity. Furthermore, we find that demonstrations helpful for ranking are also effective at question generation. We hope our work will spur more principled research into question generation and passage ranking.", "keywords": "in-context learning;demonstration;query likelihood;re-ranking;question generation", "primary_area": "", "supplementary_material": "", "author": "Andrew Drozdov;Honglei Zhuang;Zhuyun Dai;Zhen Qin;Razieh Rahimi;Xuanhui Wang;Dana Alon;Mohit Iyyer;Andrew McCallum;Donald Metzler;Kai Hui", "authorids": "~Andrew_Drozdov1;~Honglei_Zhuang1;~Zhuyun_Dai1;~Zhen_Qin5;~Razieh_Rahimi1;~Xuanhui_Wang1;~Dana_Alon1;~Mohit_Iyyer1;~Andrew_McCallum1;~Donald_Metzler1;~Kai_Hui1", "gender": "M;M;;M;;M;;M;M;M;M", "homepage": "http://mrdrozdov.github.io;https://hongleizhuang.github.io/;;http://alumni.cs.ucr.edu/~zqin001/;;;;http://cs.umass.edu/~miyyer;http://www.cs.umass.edu/~mccallum;https://research.google/people/DonaldMetzler/;https://khui.github.io/", "dblp": "200/8508;10/9988;148/4531;;;67/2661;136/8637;148/9178;m/AndrewMcCallum;95/2272;37/10077", "google_scholar": "glt2HXQAAAAJ;FxEDj4wAAAAJ;9bbHwJIAAAAJ;Kv1yk3YAAAAJ;;;0WEF4fkAAAAJ;rBVA5tcAAAAJ;yILa1y0AAAAJ;bmXpOd8AAAAJ;VorTj3AAAAAJ", "or_profile": "~Andrew_Drozdov1;~Honglei_Zhuang1;~Zhuyun_Dai1;~Zhen_Qin5;~Razieh_Rahimi1;~Xuanhui_Wang1;~Dana_Alon1;~Mohit_Iyyer1;~Andrew_McCallum1;~Donald_Metzler1;~Kai_Hui1", "aff": "Department of Computer Science, University of Massachusetts, Amherst;Google DeepMind;Google;Google Deepmind;;Google;Research, Google;University of Massachusetts Amherst;University of Massachusetts Amherst;Google;Google", "aff_domain": "cs.umass.edu;google.com;google.com;google.com;;google.com;research.google.com;cs.umass.edu;cs.umass.edu;google.com;google.com", "position": "PhD student;Research Scientist;Researcher;Researcher;;Software Engineer;Researcher;Assistant Professor;Distinguished Professor;Research Scientist;Software Engineer", "bibtex": "@inproceedings{\ndrozdov2023parade,\ntitle={PaRaDe: Passage Ranking using Demonstrations with {LLM}s},\nauthor={Andrew Drozdov and Honglei Zhuang and Zhuyun Dai and Zhen Qin and Razieh Rahimi and Xuanhui Wang and Dana Alon and Mohit Iyyer and Andrew McCallum and Donald Metzler and Kai Hui},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=6ne78DBkxl}\n}", "github": "", "project": "", "reviewers": "E2cL;99xq;auEx", "site": "https://openreview.net/forum?id=6ne78DBkxl", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "3;3;3", "reproducibility": "3;3;4", "correctness": "4;3;2", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 11, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-1025-5715;0000-0001-8134-1509;;0000-0001-6739-134X;;;;;0009-0004-5487-2848;0000-0003-4276-6269;0000-0002-3110-7404", "linkedin": ";;;;;;dana-alon;;andrew-mccallum-a412;donmetzler/;", "aff_unique_index": "0;1;1;2;1;1;0;0;1;1", "aff_unique_norm": "University of Massachusetts Amherst;Google;DeepMind", "aff_unique_dep": "Department of Computer Science;Google DeepMind;DeepMind", "aff_unique_url": "https://www.umass.edu;https://deepmind.com;https://deepmind.com", "aff_unique_abbr": "UMass Amherst;DeepMind;DeepMind", "aff_campus_unique_index": "0;2;2;2;0;0;2;2", "aff_campus_unique": "Amherst;;Mountain View", "aff_country_unique_index": "0;1;0;1;0;0;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "6pPCKWzYw4", "title": "Code-Switching Metrics Using Intonation Units", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Code-switching (CS) metrics in NLP that are based on word-level units are misaligned with true bilingual CS behavior. Crucially, CS is not equally likely between any two words, but follows syntactic and prosodic rules. We adapt two metrics, multilinguality and CS probability, and apply them to transcribed bilingual speech, for the first time putting forward Intonation Units (IUs) \u2013 prosodic speech segments \u2013 as basic tokens for NLP tasks. In addition, we calculate these two metrics separately for distinct mixing types: alternating-language multi-word strings and single-word incorporations from one language into another. Results indicate that individual differences according to the two CS metrics are independent. However, there is a shared tendency among bilinguals for multi-word CS to occur across, rather than within, IU boundaries. That is, bilinguals tend to prosodically separate their two languages. This constraint is blurred when metric calculations do not distinguish multi-word and single-word items. These results call for a reconsideration of units of analysis in future development of CS datasets for NLP tasks.", "keywords": "Computationally-aided linguistic analysis;Linguistic Diversity;Multilingualism and Cross-Lingual NLP;Spanish-English Code-Switching", "primary_area": "", "supplementary_material": "", "author": "Rebecca Pattichis;Dora LaCasse;Sonya Mitrovich Trawick;Rena Torres Cacoullos", "authorids": "~Rebecca_Pattichis1;~Dora_LaCasse1;~Sonya_Mitrovich_Trawick1;~Rena_Torres_Cacoullos1", "gender": "F;Not Specified;F;", "homepage": "https://rpattichis.github.io/;https://www.umt.edu/world-languages-culture/spanish/faculty.php?ID=6260;https://www.linkedin.com/in/sonya-trawick;https://sites.psu.edu/rct11/", "dblp": ";;;", "google_scholar": ";;;", "or_profile": "~Rebecca_Pattichis1;~Dora_LaCasse1;~Sonya_Mitrovich_Trawick1;~Rena_Torres_Cacoullos1", "aff": "University of California, Los Angeles;University of Montana;;Pennsylvania State University", "aff_domain": "ucla.edu;umt.edu;;psu.edu", "position": "MS student;Assistant Professor;;Full Professor", "bibtex": "@inproceedings{\npattichis2023codeswitching,\ntitle={Code-Switching Metrics Using Intonation Units},\nauthor={Rebecca Pattichis and Dora LaCasse and Sonya Mitrovich Trawick and Rena Torres Cacoullos},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=6pPCKWzYw4}\n}", "github": "", "project": "", "reviewers": "AZWa;VWCM;CGqL", "site": "https://openreview.net/forum?id=6pPCKWzYw4", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;4;4", "excitement": "3;3;4", "reproducibility": "2;5;4", "correctness": "3;3;4", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-6130-0371", "linkedin": "rebecca-pattichis/;;;", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of California, Los Angeles;University of Montana;Pennsylvania State University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucla.edu;https://www.umt.edu;https://www.psu.edu", "aff_unique_abbr": "UCLA;UM;PSU", "aff_campus_unique_index": "0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "6srsYdjLnV", "title": "Hi Guys or Hi Folks? Benchmarking Gender-Neutral Machine Translation with the GeNTE Corpus", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Gender inequality is embedded in our communication practices and perpetuated in translation technologies. This becomes particularly apparent when translating into grammatical gender languages, where machine translation (MT) often defaults to masculine and stereotypical representations by making undue binary gender assumptions. Our work addresses the rising demand for inclusive language by focusing head-on on gender-neutral translation from English to Italian. We start from the essentials: proposing a dedicated benchmark and exploring automated evaluation methods. First, we introduce GeNTE, a natural, bilingual test set for gender-neutral translation, whose creation was informed by a survey on the perception and use of neutral language. Based on GeNTE, we then overview existing reference-based evaluation approaches, highlight their limits, and propose a reference-free method more suitable to assess gender-neutral translation.", "keywords": "inclusivity;machine translation;gender;non-binary;evaluation;benchmark", "primary_area": "", "supplementary_material": "", "author": "Andrea Piergentili;Beatrice Savoldi;Dennis Fucci;Matteo Negri;Luisa Bentivogli", "authorids": "~Andrea_Piergentili1;~Beatrice_Savoldi2;~Dennis_Fucci1;~Matteo_Negri1;~Luisa_Bentivogli1", "gender": ";M;M;F;F", "homepage": "https://ict.fbk.eu/people/detail/andrea-piergentili/;;https://ict.fbk.eu/people/detail/matteo-negri/;https://mt.fbk.eu/author/bentivogli/;https://ict.fbk.eu/people/detail/beatrice-savoldi/", "dblp": "338/6224;319/9730;95/3678;50/1445;267/2355", "google_scholar": "SceVs8kAAAAJ;https://scholar.google.com/citations?hl=en;NTTQbJsAAAAJ;https://scholar.google.com/citations?hl=en;r4XNIh0AAAAJ", "or_profile": "~Andrea_Piergentili1;~Dennis_Fucci1;~Matteo_Negri1;~Luisa_Bentivogli1;~beatrice_savoldi1", "aff": "Fondazione Bruno Kessler;University of Trento;Fondazione Bruno Kessler;Fondazione Bruno Kessler;Fondazione Bruno Kessler", "aff_domain": "fbk.eu;unitn.it;fbk.eu;fbk.eu;fbk.eu", "position": "PhD student;PhD student;Senior researcher;Researcher;Postdoc", "bibtex": "@inproceedings{\npiergentili2023hi,\ntitle={Hi Guys or Hi Folks? Benchmarking Gender-Neutral Machine Translation with the Ge{NTE} Corpus},\nauthor={Andrea Piergentili and Beatrice Savoldi and Dennis Fucci and Matteo Negri and Luisa Bentivogli},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=6srsYdjLnV}\n}", "github": "", "project": "", "reviewers": "sWdu;VboY;qfqY;yjUc", "site": "https://openreview.net/forum?id=6srsYdjLnV", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "3;4;4;3", "excitement": "4;3;4;4", "reproducibility": "4;3;3;3", "correctness": "4;3;4;4", "rating_avg": 5.0, "confidence_avg": 3.5, "excitement_avg": 3.75, "reproducibility_avg": 3.25, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-2117-1338;0000-0002-0940-5595;0000-0002-8811-4330;0000-0001-7480-2231;0000-0002-3061-8317", "linkedin": ";;negrimatteo/;luisa-bentivogli-89577587/;beatrice-savoldi-67a851101/", "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Fondazione Bruno Kessler;University of Trento", "aff_unique_dep": ";", "aff_unique_url": "https://www.fbk.eu;https://www.unitn.it", "aff_unique_abbr": "FBK;UniTN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Italy" }, { "id": "6tW1WEHIJe", "title": "Is the Answer in the Text? Challenging ChatGPT with Evidence Retrieval from Instructive Text", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Generative language models have recently shown remarkable success in generating answers to questions in a given textual context. \nHowever, these answers may suffer from hallucination, wrongly cite evidence, and spread misleading information.\nIn this work, we address this problem by employing ChatGPT, a state-of-the-art generative model, as a machine-reading system.\nWe ask it to retrieve answers to lexically varied and open-ended questions from trustworthy instructive texts.\n\nWe introduce WHERE (WikiHow Evidence REtrieval), a new high-quality evaluation benchmark of a set of WikiHow articles exhaustively annotated with evidence sentences to questions that comes with a special challenge: All questions are about the article's topic, but not all can be answered using the provided context.\nWe interestingly find that when using a regular question-answering prompt, ChatGPT neglects to detect the unanswerable cases.\nWhen provided with a few examples, it learns to better judge whether a text provides answer evidence or not.\nAlongside this important finding, our dataset defines a new benchmark for evidence retrieval in question answering, which we argue is one of the necessary next steps for making large language models more trustworthy.", "keywords": "question answering;hallucination;evidence retrieval;dataset creation;generative language models;chatgpt", "primary_area": "", "supplementary_material": "", "author": "Sophie Henning;Talita Anthonio;Wei Zhou;Heike Adel;Mohsen Mesgar;Annemarie Friedrich", "authorids": "~Sophie_Henning1;~Talita_Anthonio1;~Wei_Zhou13;~Heike_Adel1;~Mohsen_Mesgar1;~Annemarie_Friedrich2", "gender": "F;F;F;F;M;F", "homepage": ";https://talitaanthonio.github.io/;;https://sites.google.com/view/heikeadel;https://mohsen-mesgar.io;https://annefried.github.io", "dblp": "222/3050;;;132/6980;140/3476;126/8745", "google_scholar": ";;;https://scholar.google.de/citations?user=Fejbq9kAAAAJ;vKwpx9gAAAAJ;https://scholar.google.de/citations?user=8CVIK-UAAAAJ", "or_profile": "~Sophie_Henning1;~Talita_Anthonio1;~Wei_Zhou13;~Heike_Adel1;~Mohsen_Mesgar1;~Annemarie_Friedrich2", "aff": "Bosch Center for Artificial Intelligence;University of Stuttgart, Universit\u00e4t Stuttgart;Universit\u00e4t Stuttgart;Robert Bosch GmbH, Bosch;Bosch;Bosch Center for Artificial Intelligence", "aff_domain": "bosch.com;ims.uni-stuttgart.de;uni-stuttgart.de;de.bosch.com;bosch.com;bosch.com", "position": "PhD student;PhD student;MS student;Research scientist;Researcher;Researcher", "bibtex": "@inproceedings{\nhenning2023is,\ntitle={Is the Answer in the Text? Challenging Chat{GPT} with Evidence Retrieval from Instructive Text},\nauthor={Sophie Henning and Talita Anthonio and Wei Zhou and Heike Adel and Mohsen Mesgar and Annemarie Friedrich},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=6tW1WEHIJe}\n}", "github": "", "project": "", "reviewers": "zJHM;cu6q;Vngq", "site": "https://openreview.net/forum?id=6tW1WEHIJe", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;4", "excitement": "3;3;2", "reproducibility": "3;3;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0009-0006-8617-8337;;;0000-0001-8771-7634", "linkedin": ";;%E7%BB%B4-%E5%91%A8-12518b186/;;mohsen-mesgar/;annemariefriedrich/", "aff_unique_index": "0;1;1;2;2;0", "aff_unique_norm": "Bosch Center for Artificial Intelligence;University of Stuttgart;Robert Bosch GmbH", "aff_unique_dep": "Center for Artificial Intelligence;;", "aff_unique_url": "https://www.bosch-ai.com;https://www.uni-stuttgart.de;https://www.bosch.com", "aff_unique_abbr": "BCAI;Uni Stuttgart;Bosch", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Germany" }, { "id": "6wj8Xczqkn", "title": "INarIG: Iterative Non-autoregressive Instruct Generation Model For Word-Level Auto Completion", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Computer-aided translation (CAT) aims to enhance human translation efficiency and is still important in scenarios where machine translation cannot meet quality requirements. One fundamental task within this field is Word-Level Auto Completion (WLAC). WLAC predicts a target word given a source sentence, translation context, and a human typed character sequence. Previous works either employ word classification models to exploit contextual information from both sides of the target word or directly disregarded the dependencies from the right-side context. Furthermore, the key information, i.e. human typed sequences, is only used as prefix constraints in the decoding module. In this paper, we propose the INarIG (Iterative Non-autoregressive Instruct Generation) model, which constructs the human typed sequence into Instruction Unit and employs iterative decoding with subwords to fully utilize input information given in the task. Our model is more competent in dealing with low-frequency words (core scenario of this task), and achieves state-of-the-art results on the WMT22 and benchmark datasets, with a maximum increase of over 10\\% prediction accuracy.", "keywords": "Word-Level Auto Completion;Computer-Aided Translation", "primary_area": "", "supplementary_material": "", "author": "Hengchao Shang;Zongyao Li;Daimeng Wei;Jiaxin GUO;Minghan Wang;Xiaoyu Chen;lizhi Lei;Hao Yang", "authorids": "~Hengchao_Shang1;~Zongyao_Li1;~Daimeng_Wei1;~Jiaxin_GUO1;~Minghan_Wang1;~Xiaoyu_Chen5;~lizhi_Lei1;~Hao_Yang7", "gender": "M;M;M;M;M;;;M", "homepage": ";https://github.com/lizongyao123;;;;;;https://github.com/yanghaocsg", "dblp": "268/1964.html;244/7588-1;166/0470.html;189/7455;228/4495;;;54/4089-7", "google_scholar": "BkhK0BMAAAAJ;https://scholar.google.com/citations?hl=en;v5eYxNUAAAAJ;RLPmDoUAAAAJ;F6nm6awAAAAJ;https://scholar.google.com/citations?hl=en;;lOsjM5sAAAAJ", "or_profile": "~Hengchao_Shang1;~Zongyao_Li1;~Daimeng_Wei1;~Jiaxin_GUO1;~Minghan_Wang1;~Xiaoyu_Chen5;~lizhi_Lei1;~Hao_Yang7", "aff": ";Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;;Huawei Technologies Ltd.", "aff_domain": ";huawei.com;huawei.com;huawei.com;huawei.com;huawei.com;;huawei.com", "position": ";Researcher;Researcher;Researcher;Researcher;Researcher;;Principal Researcher", "bibtex": "@inproceedings{\nshang2023inarig,\ntitle={{IN}ar{IG}: Iterative Non-autoregressive Instruct Generation Model For Word-Level Auto Completion},\nauthor={Hengchao Shang and Zongyao Li and Daimeng Wei and Jiaxin GUO and Minghan Wang and Xiaoyu Chen and lizhi Lei and Hao Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=6wj8Xczqkn}\n}", "github": "", "project": "", "reviewers": "Tsoo;4ENL;DbUp", "site": "https://openreview.net/forum?id=6wj8Xczqkn", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "4;2;3", "reproducibility": "4;3;3", "correctness": "3;2;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-7196-2782;;;;;;0000-0001-8861-7010", "linkedin": ";%E5%AE%97%E8%80%80-%E6%9D%8E-769a05104/;;;;;;", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Huawei", "aff_unique_dep": "Huawei Technologies", "aff_unique_url": "https://www.huawei.com", "aff_unique_abbr": "Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "6zSuMMtUjO", "title": "IntenDD: A Unified Contrastive Learning Approach for Intent Detection and Discovery", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Identifying intents from dialogue utterances forms an integral component of task-oriented dialogue systems. Intent-related tasks are typically formulated either as a classification task, where the utterances are classified into predefined categories or as a clustering task when new and previously unknown intent categories need to be discovered from these utterances. Further, the intent classification may be modeled in a multiclass (MC) or multilabel (ML) setup. While typically these tasks are modeled as separate tasks, we propose IntenDD a unified approach leveraging a shared utterance encoding backbone. IntenDD uses an entirely unsupervised contrastive learning strategy for representation learning, where pseudo-labels for the unlabeled utterances are generated based on their lexical features. Additionally, we introduce a two-step post-processing setup for the classification tasks using modified adsorption. Here, first, the residuals in the training data are propagated followed by smoothing the labels both modeled in a transductive setting. Through extensive evaluations on various benchmark datasets, we find that our approach consistently outperforms competitive baselines across all three tasks. On average, IntenDD reports percentage improvements of 2.32 %, 1.26 %, and 1.52 % in their respective metrics for few-shot MC, few-shot ML, and the intent discovery tasks respectively.", "keywords": "Intent Discovery;Intent Detection;Contrastive Learning;Modified Adsorption;Label Propagation", "primary_area": "", "supplementary_material": "", "author": "Bhavuk Singhal;Ashim Gupta;Shivasankaran V P;Amrith Krishna", "authorids": "~Bhavuk_Singhal1;~Ashim_Gupta1;~Shivasankaran_V_P1;~Amrith_Krishna1", "gender": "M;M;M;M", "homepage": ";https://ashim95.github.io/;https://shiva-sankaran.github.io/;http://www.amrith.tech", "dblp": ";238/6405;;160/4306", "google_scholar": "s4rtlKcAAAAJ;0ymeLxQAAAAJ;;https://scholar.google.co.in/citations?user=45Ju3DgAAAAJ", "or_profile": "~Bhavuk_Singhal1;~Ashim_Gupta1;~Shivasankaran_V_P1;~Amrith_Krishna1", "aff": "Uniphore;University of Utah;Indian Institute of Technology, Gandhinagar;Uniphore Inc", "aff_domain": "uniphore.com;utah.edu;iitgn.ac.in;uniphore.com", "position": "Associate AI Scientist;PhD student;Undergrad student;Staff NLP Scientist", "bibtex": "@inproceedings{\nsinghal2023intendd,\ntitle={Inten{DD}: A Unified Contrastive Learning Approach for Intent Detection and Discovery},\nauthor={Bhavuk Singhal and Ashim Gupta and Shivasankaran V P and Amrith Krishna},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=6zSuMMtUjO}\n}", "github": "", "project": "", "reviewers": "WzPH;DQWw;MW7X;cGUN", "site": "https://openreview.net/forum?id=6zSuMMtUjO", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;5;5;3", "excitement": "4;3;3;2", "reproducibility": "3;3;3;2", "correctness": "3;3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 2.75, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "bhavuk0909/?trk=opento_sprofile_goalscard;;;amrith-krishna-53183b15/", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Uniphore Software Systems;University of Utah;Indian Institute of Technology Gandhinagar;Uniphore Inc", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.uniphore.com;https://www.utah.edu;https://www.iitgn.ac.in;https://www.uniphore.com", "aff_unique_abbr": "Uniphore;Utah;IITGN;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Gandhinagar", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "India;United States" }, { "id": "71Lz8HW3NE", "title": "Addressing NER Annotation Noises with Uncertainty-Guided Tree-Structured CRFs", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Real-world named entity recognition (NER) datasets are notorious for their noisy nature, attributed to annotation errors, inconsistencies, and subjective interpretations. Such noises present a substantial challenge for traditional supervised learning methods.\nIn this paper, we present a new and unified approach to tackle annotation noises for NER. Our method considers NER as a constituency tree parsing problem, utilizing a tree-structured Conditional Random Fields (CRFs) with uncertainty evaluation for integration.\nThrough extensive experiments conducted on four real-world datasets, we demonstrate the effectiveness of our model in addressing both partial and incorrect annotation errors. Remarkably, our model exhibits superb performance even in extreme scenarios with 90\\% annotation noise.", "keywords": "Named Entity Recognition (NER);Partial and Incorrect Annotation;Uncertainty;constituency tree parsing", "primary_area": "", "supplementary_material": "", "author": "Jian Liu;Weichang Liu;Yufeng Chen;Jinan Xu;Zhe Zhao", "authorids": "~Jian_Liu7;~Weichang_Liu1;~Yufeng_Chen1;~Jinan_Xu1;~Zhe_Zhao1", "gender": "M;;F;M;M", "homepage": "http://jianliu-ml.github.io;https://github.com/feili583;;;http://faculty.bjtu.edu.cn/8300/", "dblp": ";;64/5715;28/6429-6.html;67/3124", "google_scholar": "https://scholar.google.de/citations?hl=en;;;https://scholar.google.com.hk/citations?hl=zh-CN;wMuW0W4AAAAJ", "or_profile": "~Jian_Liu7;~Weichang_Liu1;~Yufeng_Chen1;~Zhe_Zhao1;~Xu_Jinan1", "aff": "Beijing Jiaotong University;Beijing Jiaotong University;Beijing jiaotong univercity;Tencent AI Lab;Beijing Jiaotong University", "aff_domain": "bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;tencent.com;bjtu.edu.cn", "position": "Lecturer;MS student;Assistant Professor;Researcher;Full Professor", "bibtex": "@inproceedings{\nliu2023addressing,\ntitle={Addressing {NER} Annotation Noises with Uncertainty-Guided Tree-Structured {CRF}s},\nauthor={Jian Liu and Weichang Liu and Yufeng Chen and Jinan Xu and Zhe Zhao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=71Lz8HW3NE}\n}", "github": "", "project": "", "reviewers": "hCvH;TyYs;LrZJ", "site": "https://openreview.net/forum?id=71Lz8HW3NE", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;4;4", "reproducibility": "3;2;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;;jinan-xu-3544b137/", "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Beijing Jiao Tong University;Tencent", "aff_unique_dep": ";Tencent AI Lab", "aff_unique_url": "http://www.njtu.edu.cn/en;https://ai.tencent.com", "aff_unique_abbr": "BJTU;Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "73kjtIZ4pt", "title": "TELeR: A General Taxonomy of LLM Prompts for Benchmarking Complex Tasks", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "While LLMs have shown great success in understanding and generating text in traditional conversational settings, their potential for performing ill-defined complex tasks is largely under-studied and yet to be benchmarked. However, conducting such benchmarking studies is challenging because of the large variations in LLMs' performance when different prompt types/styles are used and different degrees of detail are provided in the prompts. To address this issue, this paper proposes a general taxonomy that can be used to design prompts with specific properties in order to perform a wide range of complex tasks. This taxonomy will allow future benchmarking studies to report the specific categories of prompts used as part of the study, enabling meaningful comparisons across different studies. Also, by establishing a common standard through this taxonomy, researchers will be able to draw more accurate conclusions about LLMs' performance on a specific complex task.", "keywords": "Large Language Models;Prompt Engineering;Prompt Taxonomy;Benchmarking", "primary_area": "", "supplementary_material": "", "author": "Shubhra Kanti Karmaker Santu;Dongji Feng", "authorids": "~Shubhra_Kanti_Karmaker_Santu2;~Dongji_Feng1", "gender": "M;M", "homepage": "https://dzf0023.github.io/;https://karmake2.github.io/", "dblp": "329/5925;", "google_scholar": "QPIRdRMAAAAJ;y6pZKT4AAAAJ", "or_profile": "~Dongji_Feng1;~Shubhra_Kanti_Karmaker1", "aff": "Auburn University;Auburn University", "aff_domain": "auburn.edu;auburn.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nsantu2023teler,\ntitle={{TEL}eR: A General Taxonomy of {LLM} Prompts for Benchmarking Complex Tasks},\nauthor={Shubhra Kanti Karmaker Santu and Dongji Feng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=73kjtIZ4pt}\n}", "github": "", "project": "", "reviewers": "SYHs;YfVr;gD5n", "site": "https://openreview.net/forum?id=73kjtIZ4pt", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "2;2;2", "reproducibility": "", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.0, "reproducibility_avg": 0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-2470-4825;0000-0001-5744-6925", "linkedin": "dongjifeng/;shubhra-kanti-karmaker-676893a4/", "aff_unique_index": "0;0", "aff_unique_norm": "Auburn University", "aff_unique_dep": "", "aff_unique_url": "https://www.auburn.edu", "aff_unique_abbr": "Auburn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "77h6pSkw4N", "title": "DocSplit: Simple Contrastive Pretraining for Large Document Embeddings", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Existing model pretraining methods only consider local information. For example, in the popular token masking strategy, the words closer to the masked token are more important for prediction than words far away. This results in pretrained models that generate high-quality sentence embeddings, but low-quality embeddings for large documents. We propose a new pretraining method called DocSplit which forces models to consider the entire global context of a large document. Our method uses a contrastive loss where the positive examples are randomly sampled sections of the input document, and negative examples are randomly sampled sections of unrelated documents. Like previous pretraining methods, DocSplit is fully unsupervised, easy to implement, and can be used to pretrain any model architecture. Our experiments show that DocSplit outperforms other pretraining methods for document classification, few shot learning, and information retrieval tasks.", "keywords": "Natural Language Processing; Machine Learning;Text Embeddings", "primary_area": "", "supplementary_material": "", "author": "Yujie Wang;Mike Izbicki", "authorids": "~Yujie_Wang6;~Mike_Izbicki1", "gender": "F;M", "homepage": ";https://izbicki.me", "dblp": ";164/7267", "google_scholar": "tD2vNacAAAAJ;", "or_profile": "~Yujie_Wang6;~Mike_Izbicki1", "aff": "Claremont Graduate University;", "aff_domain": "cgu.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\nwang2023docsplit,\ntitle={DocSplit: Simple Contrastive Pretraining for Large Document Embeddings},\nauthor={Yujie Wang and Mike Izbicki},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=77h6pSkw4N}\n}", "github": "", "project": "", "reviewers": "c7xp;7Pji;yVuF", "site": "https://openreview.net/forum?id=77h6pSkw4N", "pdf_size": 0, "rating": "2;2;2", "confidence": "3;3;5", "excitement": "3;4;3", "reproducibility": "4;3;4", "correctness": "4;3;3", "rating_avg": 2.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0", "aff_unique_norm": "Claremont Graduate University", "aff_unique_dep": "", "aff_unique_url": "https://www.cgu.edu", "aff_unique_abbr": "CGU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "7CTp2gwqin", "title": "TLM: Token-Level Masking for Transformers", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Structured dropout approaches, such as attention dropout and DropHead, have been investigated to regularize the multi-head attention mechanism in Transformers. In this paper, we propose a new regularization scheme based on token-level rather than structure-level to reduce overfitting. Specifically, we devise a novel Token-Level Masking (TLM) training strategy for Transformers to regularize the connections of self-attention, which consists of two masking techniques that are effective and easy to implement. The underlying idea is to manipulate the connections between tokens in the multi-head attention via masking, where the networks are forced to exploit partial neighbors\u2019 information to produce a meaningful representation. The generality and effectiveness of TLM are thoroughly evaluated via extensive experiments on 4 diversified NLP tasks across 18 datasets, including natural language understanding benchmark GLUE, ChineseGLUE, Chinese Grammatical Error Correction, and data-to-text generation. The results indicate that TLM can consistently outperform attention dropout and DropHead, e.g., it increases by 0.5 points relative to DropHead with BERT-large on GLUE. Moreover, TLM can establish a new record on the data-to-text benchmark Rotowire (18.93 BLEU). Our code will be publicly available at https://github.com/Young1993/tlm.", "keywords": "Token-Level;Masking;Transformers;Overfitting", "primary_area": "", "supplementary_material": "", "author": "Yangjun Wu;Kebin Fang;Dongxiang Zhang;Han Wang;Hao Zhang;Gang Chen", "authorids": "~Yangjun_Wu1;~Kebin_Fang1;~Dongxiang_Zhang2;~Han_Wang27;~Hao_Zhang65;~Gang_Chen6", "gender": "M;M;M;M;M;M", "homepage": ";https://dblp.org/pid/317/1382.html;https://person.zju.edu.cn/zhangdongxiang;;https://www.linkedin.com/in/hao-zhang-a2a28627b/;", "dblp": "304/5895.html;317/1382.html;89/6013;;;67/6383-1", "google_scholar": ";;nYN9A3IAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;", "or_profile": "~Yangjun_Wu1;~Kebin_Fang1;~Dongxiang_Zhang2;~Han_Wang27;~Hao_Zhang65;~Gang_Chen6", "aff": ";;Zhejiang University;Zhejiang University;University of Sussex;College of Computer Science and Technology, Zhejiang University", "aff_domain": ";;zju.edu.cn;zju.edu.cn;sussex.ac.uk;cs.zju.edu.cn", "position": ";;Researcher;MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nwu2023tlm,\ntitle={{TLM}: Token-Level Masking for Transformers},\nauthor={Yangjun Wu and Kebin Fang and Dongxiang Zhang and Han Wang and Hao Zhang and Gang Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7CTp2gwqin}\n}", "github": "", "project": "", "reviewers": "NQeB;2Lox;Lwg8;e4DL", "site": "https://openreview.net/forum?id=7CTp2gwqin", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "3;4;5;3", "excitement": "3;3;4;4", "reproducibility": "3;4;4;4", "correctness": "3;3;4;4", "rating_avg": 5.0, "confidence_avg": 3.75, "excitement_avg": 3.5, "reproducibility_avg": 3.75, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0002-7483-0045", "linkedin": ";;;;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Zhejiang University;University of Sussex", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.sussex.ac.uk", "aff_unique_abbr": "ZJU;Sussex", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United Kingdom" }, { "id": "7D4TPisEBk", "title": "Selective Demonstrations for Cross-domain Text-to-SQL", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models (LLMs) with in-context learning have demonstrated impressive generalization capabilities in the cross-domain text-to-SQL task, without the use of in-domain annotations. However, incorporating in-domain demonstration examples has been found to greatly enhance LLMs' performance. In this paper, we delve into the key factors within in-domain examples that contribute to the improvement and explore whether we can harness these benefits without relying on in-domain annotations. Based on our findings, we propose a demonstration selection framework, ODIS, which utilizes both out-of-domain examples and synthetically generated in-domain examples to construct demonstrations. By retrieving demonstrations from hybrid sources, ODIS leverages the advantages of both, showcasing its effectiveness compared to baseline methods that rely on a single data source. Furthermore, ODIS outperforms state-of-the-art approaches on two cross-domain text-to-SQL datasets, with improvements of 1.1 and 11.8 points in execution accuracy, respectively.", "keywords": "text-to-SQL;semantic parsing;in-context learning", "primary_area": "", "supplementary_material": "", "author": "Shuaichen Chang;Eric Fosler-Lussier", "authorids": "~Shuaichen_Chang1;~Eric_Fosler-Lussier1", "gender": "M;M", "homepage": "https://shuaichenchang.github.io/;http://web.cse.ohio-state.edu/~fosler/", "dblp": "230/4596;80/6326", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=AlsMV98AAAAJ", "or_profile": "~Shuaichen_Chang1;~Eric_Fosler-Lussier1", "aff": "Ohio State University;Ohio State University", "aff_domain": "osu.edu;osu.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nchang2023selective,\ntitle={Selective Demonstrations for Cross-domain Text-to-{SQL}},\nauthor={Shuaichen Chang and Eric Fosler-Lussier},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7D4TPisEBk}\n}", "github": "", "project": "", "reviewers": "E6z7;TqYu;cHQZ", "site": "https://openreview.net/forum?id=7D4TPisEBk", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "3;3;3", "reproducibility": "4;3;5", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-8004-5169", "linkedin": ";eric-fosler-lussier-8b394210/", "aff_unique_index": "0;0", "aff_unique_norm": "Ohio State University", "aff_unique_dep": "", "aff_unique_url": "https://www.osu.edu", "aff_unique_abbr": "OSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "7DueCuvmgM", "title": "Incorporating Structured Representations into Pretrained Vision \\& Language Models Using Scene Graphs", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Vision and language models (VLMs) have demonstrated remarkable zero-shot (ZS) performance in a variety of tasks. However, recent works have shown that even the best VLMs struggle to capture aspects of compositional scene understanding, such as object attributes, relations, and action states. In contrast, obtaining structured annotations, such as scene graphs (SGs), that could improve these models is time-consuming and costly, and thus cannot be used on a large scale. Here we ask whether small SG datasets can provide sufficient information for enhancing structured understanding of pretrained VLMs. We show that it is indeed possible to improve VLMs when learning from SGs by integrating components that incorporate structured information into both visual and textual representations. For the visual side, we incorporate a special ''SG Component'' in the image transformer trained to predict SG information, while for the textual side, we utilize SGs to generate fine-grained captions that highlight different compositional aspects of the scene. Our method improves the performance of several popular VLMs on multiple VL datasets with only a mild degradation in ZS capabilities.", "keywords": "Vision and language models; Scene graphs; Visio-linguistic compositionality", "primary_area": "", "supplementary_material": "", "author": "Roei Herzig;Alon Mendelson;Leonid Karlinsky;Assaf Arbelle;Rogerio Feris;Trevor Darrell;Amir Globerson", "authorids": "~Roei_Herzig2;~Alon_Mendelson1;~Leonid_Karlinsky3;~Assaf_Arbelle1;~Rogerio_Feris1;~Trevor_Darrell2;~Amir_Globerson1", "gender": "M;M;M;M;M;M;M", "homepage": "https://roeiherz.github.io/;;;https://www.linkedin.com/in/assaf-arbelle-74065876/?originalSubdomain=il;http://rogerioferis.com;http://www.cs.tau.ac.il/~gamir/;https://people.eecs.berkeley.edu/~trevor/", "dblp": "215/5165;;05/4463;168/5494;;08/4162.html;d/TrevorDarrell", "google_scholar": "https://scholar.google.co.il/citations?user=6Q-289IAAAAJ;;https://scholar.google.co.il/citations?user=WbO7tjYAAAAJ;https://scholar.google.co.uk/citations?user=uU_V_PsAAAAJ;xt3XLjcAAAAJ;https://scholar.google.com.tw/citations?user=5JserkUAAAAJ;https://scholar.google.com.tw/citations?user=bh-uRFMAAAAJ", "or_profile": "~Roei_Herzig2;~Alon_Mendelson1;~Leonid_Karlinsky3;~Assaf_Arbelle1;~Rogerio_Feris1;~Amir_Globerson1;~trevor_darrell1", "aff": "Tel Aviv University;School of Computer Science, Tel Aviv University;International Business Machines;International Business Machines;International Business Machines;Tel Aviv University;Electrical Engineering & Computer Science Department", "aff_domain": "tau.ac.il;cs.tau.ac.il;ibm.com;ibm.com;ibm.com;tau.ac.il;eecs.berkeley.edu", "position": "PhD student;MS student;Principal Researcher;Researcher;Research Manager;Associate Professor;Professor", "bibtex": "@inproceedings{\nherzig2023incorporating,\ntitle={Incorporating Structured Representations into Pretrained Vision {\\textbackslash}\\& Language Models Using Scene Graphs},\nauthor={Roei Herzig and Alon Mendelson and Leonid Karlinsky and Assaf Arbelle and Rogerio Feris and Trevor Darrell and Amir Globerson},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7DueCuvmgM}\n}", "github": "", "project": "", "reviewers": "VrYK;dPsS;639u", "site": "https://openreview.net/forum?id=7DueCuvmgM", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "3;4;3", "reproducibility": "4;4;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-6559-2316;;;", "linkedin": "roei-herzig-7534615a/;alon-mendelson/;;assaf-arbelle-74065876/?originalSubdomain=il;;;", "aff_unique_index": "0;0;1;1;1;0;2", "aff_unique_norm": "Tel Aviv University;International Business Machines Corporation;Electrical Engineering & Computer Science Department", "aff_unique_dep": ";;Electrical Engineering & Computer Science", "aff_unique_url": "https://www.tau.ac.il;https://www.ibm.com;", "aff_unique_abbr": "TAU;IBM;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Tel Aviv", "aff_country_unique_index": "0;0;1;1;1;0", "aff_country_unique": "Israel;United States;" }, { "id": "7F5w5AQrv7", "title": "Task-Aware Self-Supervised Framework for Dialogue Discourse Parsing", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Dialogue discourse parsing is a fundamental natural language processing task. It can benefit a series of conversation-related downstream tasks including dialogue summarization and emotion recognition in conversations. However, existing parsing approaches are constrained by predefined relation types, which can impede the adaptability of the parser for downstream tasks. To this end, we propose to introduce a task-aware paradigm to improve the versatility of the parser in this paper. Moreover, to alleviate error propagation and learning bias, we design a graph-based discourse parsing model termed DialogDP. Building upon the symmetrical property of matrix-embedded parsing graphs, we have developed an innovative self-supervised mechanism that leverages both bottom-up and top-down parsing strategies. This approach allows the parsing graphs to mutually regularize and enhance each other. Empirical studies on dialogue discourse parsing datasets and a downstream task demonstrate the effectiveness and flexibility of our framework.", "keywords": "dialogue discourse parsing;emotion recognition in conversations;self-supervision;Soft-window triangular mask", "primary_area": "", "supplementary_material": "", "author": "Wei Li;Luyao Zhu;Wei Shao;Zonglin Yang;Erik Cambria", "authorids": "~Wei_Li74;~Luyao_Zhu1;~Wei_Shao5;~Zonglin_Yang1;~Erik_Cambria1", "gender": "M;F;M;M;M", "homepage": "https://maxwe11y.github.io/;https://cyn7hia.github.io;;https://zongliny.github.io/;https://sentic.net/erikcambria/", "dblp": "64/6025-76;;;238/0094-1;80/7421", "google_scholar": "https://scholar.google.com/citations?hl=en;vANgO5cAAAAJ;4o57IEAAAAAJ;cTTRbeMAAAAJ;ilSYpW0AAAAJ", "or_profile": "~Wei_Li74;~Luyao_Zhu1;~Wei_Shao5;~Zonglin_Yang1;~Erik_Cambria1", "aff": "Nanyang Technological University;Nanyang Technological University;City University of Hong Kong;Nanyang Technological University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu.sg;cityu.edu.hk;ntu.edu;ntu.edu.sg", "position": "PhD student;PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nli2023taskaware,\ntitle={Task-Aware Self-Supervised Framework for Dialogue Discourse Parsing},\nauthor={Wei Li and Luyao Zhu and Wei Shao and Zonglin Yang and Erik Cambria},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7F5w5AQrv7}\n}", "github": "", "project": "", "reviewers": "yNEK;jNsR;okwz", "site": "https://openreview.net/forum?id=7F5w5AQrv7", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;4;2", "excitement": "2;3;3", "reproducibility": "2;4;3", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-8077-7025;0000-0002-7422-7318;;0000-0002-8059-6654;0000-0002-3030-1280", "linkedin": "wei-li-299348240/;;;;erikcambria/", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Nanyang Technological University;City University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;https://www.cityu.edu.hk", "aff_unique_abbr": "NTU;CityU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Singapore;China" }, { "id": "7FXgefa9lU", "title": "This Reads Like That: Deep Learning for Interpretable Natural Language Processing", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Prototype learning, a popular machine learning method designed for inherently interpretable decisions, leverages similarities to learned prototypes for classifying new data. While it is mainly applied in computer vision, in this work, we build upon prior research and further explore the extension of prototypical networks to natural language processing. We introduce a learned weighted similarity measure that enhances the similarity computation by focusing on informative dimensions of pre-trained sentence embeddings. \nAdditionally, we propose a post-hoc explainability mechanism that extracts prediction-relevant words from both the prototype and input sentences. Finally, we empirically demonstrate that our proposed method not only improves predictive performance on the AG News and RT Polarity datasets over a previous prototype-based approach, but also improves the faithfulness of explanations compared to rationale-based recurrent convolutions.", "keywords": "interpretability;natural language processing;deep learning", "primary_area": "", "supplementary_material": "", "author": "Claudio Fanconi;Moritz Vandenhirtz;Severin Husmann;Julia E Vogt", "authorids": "~Claudio_Fanconi1;~Moritz_Vandenhirtz1;~Severin_Husmann1;~Julia_E_Vogt1", "gender": "M;M;;F", "homepage": ";;;http://mds.inf.ethz.ch", "dblp": ";;;13/8412", "google_scholar": "Uko0QY4AAAAJ;H2cG0BwAAAAJ;;UoeV-8kAAAAJ", "or_profile": "~Claudio_Fanconi1;~Moritz_Vandenhirtz1;~Severin_Husmann1;~Julia_E_Vogt1", "aff": "ETHZ - ETH Zurich;ETHZ - ETH Zurich;;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;ethz.ch;;ethz.ch", "position": "MS student;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nfanconi2023this,\ntitle={This Reads Like That: Deep Learning for Interpretable Natural Language Processing},\nauthor={Claudio Fanconi and Moritz Vandenhirtz and Severin Husmann and Julia E Vogt},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7FXgefa9lU}\n}", "github": "", "project": "", "reviewers": "pffd;L24G;AbxD", "site": "https://openreview.net/forum?id=7FXgefa9lU", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;2", "excitement": "4;4;3", "reproducibility": "4;4;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";moritz-simon-vandenhirtz-488b0b16b/;severinhusmann/;julia-vogt-50b53895", "aff_unique_index": "0;0;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "id": "7FaWK7HpKK", "title": "Interpreting Answers to Yes-No Questions in User-Generated Content", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Interpreting answers to yes-no questions in social media is difficult. Yes and no keywords are uncommon, and the few answers that include them are rarely to be interpreted what the keywords suggest. In this paper, we present a new corpus of 4,442 yes-no question-answer pairs from Twitter. We discuss linguistic characteristics of answers whose interpretation is yes or no, as well as answers whose interpretation is unknown. We show that large language models are far from solving this problem, even after fine-tuning and blending other corpora for the same problem but outside social media.", "keywords": "yes-no questions;question answering", "primary_area": "", "supplementary_material": "", "author": "Shivam Mathur;Keun Hee Park;Dhivya Chinnappa;Saketh Kotamraju;Eduardo Blanco", "authorids": "~Shivam_Mathur1;~Keun_Hee_Park1;~Dhivya_Chinnappa1;~Saketh_Kotamraju1;~Eduardo_Blanco1", "gender": ";M;F;M;M", "homepage": ";;https://dhivyachinnappa.com/;;https://eduardoblanco.github.io/", "dblp": ";348/5286;220/2021;;32/369-2", "google_scholar": "AoD5dgEAAAAJ;-QseKLUAAAAJ;rTO6XDkAAAAJ;wBQFH8oAAAAJ;AqGa3-MAAAAJ", "or_profile": "~Shivam_Mathur1;~Keun_Hee_Park1;~Dhivya_Chinnappa1;~Saketh_Kotamraju1;~Eduardo_Blanco1", "aff": "Arizona State University;Arizona State University;Thomson Reuters;University of Texas at Austin;University of Arizona", "aff_domain": "asu.edu;asu.edu;thomsonreuters.com;utexas.edu;arizona.edu", "position": "MS student;MS student;Research Scientist;Undergrad student;Associate Professor", "bibtex": "@inproceedings{\nmathur2023interpreting,\ntitle={Interpreting Answers to Yes-No Questions in User-Generated Content},\nauthor={Shivam Mathur and Keun Hee Park and Dhivya Chinnappa and Saketh Kotamraju and Eduardo Blanco},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7FaWK7HpKK}\n}", "github": "", "project": "", "reviewers": "v4HH;heYF;8ozY;9N5z", "site": "https://openreview.net/forum?id=7FaWK7HpKK", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;4;4", "excitement": "4;4;2;3", "reproducibility": "5;5;2;4", "correctness": "4;4;2;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.25, "reproducibility_avg": 4.0, "correctness_avg": 3.25, "replies_avg": 12, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0004-2912-7248;0000-0002-8318-3337;;", "linkedin": "shivam--mathur/;keun-park-b08411a9/;dhivyachinnappa/;sakethkotamraju/;", "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "Arizona State University;Thomson Reuters;University of Texas at Austin;University of Arizona", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.asu.edu;https://www.thomsonreuters.com;https://www.utexas.edu;https://www.arizona.edu", "aff_unique_abbr": "ASU;TR;UT Austin;UA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "7GxY4WVBzc", "title": "Arabic Mini-ClimateGPT : A Climate Change and Sustainability Tailored Arabic LLM", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Climate change is one of the most significant challenges we face together as a society. Creating awareness and educating policy makers the wide-ranging impact of climate change is an essential step towards a sustainable future. Recently, Large Language Models (LLMs) like ChatGPT and Bard have shown impressive conversational abilities and excel in a wide variety of NLP tasks. While these models are close-source, recently alternative open-source LLMs such as Stanford Alpaca and Vicuna have shown promising results. However, these open-source models are not specifically tailored for climate related domain specific information and also struggle to generate meaningful responses in other languages such as, Arabic. To this end, we propose a light-weight Arabic Mini-ClimateGPT that is built on an open-source LLM and is specifically fine-tuned on a conversational-style instruction tuning curated Arabic dataset Clima500-Instruct with over 500k instructions about climate change and sustainability. Further, our model also utilizes a vector embedding based retrieval mechanism during inference. We validate our proposed model through quantitative and qualitative evaluations on climate-related queries. Our model surpasses the baseline LLM in 88.3% of cases during ChatGPT-based evaluation. Furthermore, our human expert evaluation reveals an 81.6% preference for our model's responses over multiple popular open-source models. Our open-source demos, models and curated instruction sets are available here : https://github.com/mbzuai-oryx/ClimateGPT", "keywords": "Large Language Model;Climate change;Sustainability;Arabic NLP", "primary_area": "", "supplementary_material": "", "author": "Sahal Shaji Mullappilly;Abdelrahman M Shaker;Omkar Chakradhar Thawakar;Hisham Cholakkal;Rao Muhammad Anwer;Salman Khan;Fahad Khan", "authorids": "~Sahal_Shaji_Mullappilly1;~Abdelrahman_M_Shaker1;~Omkar_Chakradhar_Thawakar1;~Hisham_Cholakkal2;~Rao_Muhammad_Anwer2;~Salman_Khan4;~Fahad_Khan1", "gender": "M;M;M;M;;M;M", "homepage": "https://github.com/sahalshajim;https://amshaker.github.io/;https://omkarthawakar.github.io/;https://mbzuai.ac.ae/pages/hisham-cholakkal/;;https://salman-h-khan.github.io/;https://sites.google.com/view/fahadkhans/home", "dblp": "318/2751;;254/4317;129/2046;;32/11535-1;05/8618", "google_scholar": "LJWxVpUAAAAJ;eEz4Wu4AAAAJ;flvl5YQAAAAJ;bZ3YBRcAAAAJ;;https://scholar.google.es/citations?user=M59O9lkAAAAJ;zvaeYnUAAAAJ", "or_profile": "~Sahal_Shaji_Mullappilly1;~Abdelrahman_M_Shaker1;~Omkar_Chakradhar_Thawakar1;~Hisham_Cholakkal2;~Rao_Muhammad_Anwer2;~Salman_Khan4;~Fahad_Khan1", "aff": "Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;MBZUAI;MBZUAI;;Australian National University;Link\u00f6ping University", "aff_domain": "mbzuai.ac.ae;mbzuai.ac.ae;mbzuai.ac.ae;mbzuai.ac.ae;;anu.edu.au;liu.se", "position": "MS student;PhD student;Researcher;Assistant Professor;;Lecturer;Associate Professor", "bibtex": "@inproceedings{\nmullappilly2023arabic,\ntitle={Arabic Mini-Climate{GPT} : A Climate Change and Sustainability Tailored Arabic {LLM}},\nauthor={Sahal Shaji Mullappilly and Abdelrahman M Shaker and Omkar Chakradhar Thawakar and Hisham Cholakkal and Rao Muhammad Anwer and Salman Khan and Fahad Khan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7GxY4WVBzc}\n}", "github": "", "project": "", "reviewers": "qPCA;juqn;mnNP", "site": "https://openreview.net/forum?id=7GxY4WVBzc", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;3;3", "reproducibility": "5;4;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-3509-2614;0000-0001-7651-4057;;;;0000-0002-9502-1749;", "linkedin": "sahalshajim/;abdel-rahman-shaker-74274b9b/;omkar-thawakar-8b9521161/;;;;", "aff_unique_index": "0;0;0;0;1;2", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence;Australian National University;Link\u00f6ping University", "aff_unique_dep": ";;", "aff_unique_url": "https://mbzuai.ac.ae;https://www.anu.edu.au;https://www.liu.se", "aff_unique_abbr": "MBZUAI;ANU;LiU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;2", "aff_country_unique": "United Arab Emirates;Australia;Sweden" }, { "id": "7Gy8FXaTv6", "title": "CRUSH4SQL: Collective Retrieval Using Schema Hallucination For Text2SQL", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Existing Text-to-SQL generators require the entire schema to be encoded with the user text. This is expensive or impractical for large databases with tens of thousands of columns. Standard dense retrieval techniques are inadequate for schema subsetting of a large structured database, where the correct semantics of retrieval demands that we rank sets of schema elements rather than individual documents. In response, we propose a two-stage process for effective coverage during retrieval. First, we use an LLM to hallucinate a minimal DB schema that it deems adequate to answer the query. We use the hallucinated schema to retrieve a subset of the actual schema, by composing the results from multiple dense retrievals. Remarkably, hallucination --- generally considered a nuisance --- turns out to be actually useful as a bridging mechanism. Since no existing benchmarks exist for schema subsetting on large databases, we introduce two benchmarks: (1) A semi-synthetic dataset of 4502 schema elements, by taking a union of schema on the well-known SPIDER dataset, and (2) A real-life benchmark called SocialDB sourced from an actual large data warehouse comprising of 17844 schema elements. We show that our method leads to significantly higher recall than SOTA retrieval-based augmentation methods.", "keywords": "Text-to-SQL;LLM;Retrieval augmentation;Query decomposition", "primary_area": "", "supplementary_material": "", "author": "Mayank Kothyari;Dhruva Dhingra;Sunita Sarawagi;Soumen Chakrabarti", "authorids": "~Mayank_Kothyari1;~Dhruva_Dhingra1;~Sunita_Sarawagi1;~Soumen_Chakrabarti1", "gender": ";M;F;Not Specified", "homepage": ";;https://www.cse.iitb.ac.in/~sunita/;https://www.cse.iitb.ac.in/~soumen/", "dblp": ";;s/SunitaSarawagi;c/SChakrabarti", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.com.tw/citations?user=Hg4HmTAAAAAJ;https://scholar.google.com.tw/citations?user=LfF2zfQAAAAJ", "or_profile": "~Mayank_Kothyari1;~Dhruva_Dhingra1;~Sunita_Sarawagi1;~Soumen_Chakrabarti1", "aff": "Indian Institute of Technology Bombay, Indian Institute of Technology, Bombay;Indian Institute of Technology Bombay, Indian Institute of Technology, Bombay;IIT Bombay;Indian Institute of Technology Bombay", "aff_domain": "cse.iitb.ac.in;cse.iitb.ac.in;iitb.ac.in;iitb.ac.in", "position": "Researcher;Undergrad student;Full Professor;Professor", "bibtex": "@inproceedings{\nkothyari2023crushsql,\ntitle={{CRUSH}4{SQL}: Collective Retrieval Using Schema Hallucination For Text2{SQL}},\nauthor={Mayank Kothyari and Dhruva Dhingra and Sunita Sarawagi and Soumen Chakrabarti},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7Gy8FXaTv6}\n}", "github": "", "project": "", "reviewers": "H9wu;1yLK;yafE", "site": "https://openreview.net/forum?id=7Gy8FXaTv6", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;4;4", "reproducibility": "4;5;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";dhruva-dhingra;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Indian Institute of Technology Bombay", "aff_unique_dep": "", "aff_unique_url": "https://www.iitb.ac.in", "aff_unique_abbr": "IIT Bombay", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Bombay;Mumbai", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "India" }, { "id": "7H45HfXsJb", "title": "KCTS: Knowledge-Constrained Tree Search Decoding with Token-Level Hallucination Detection", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large Language Models (LLMs) have demonstrated remarkable human-level natural language generation capabilities. However, their potential to generate misinformation, often called the *hallucination* problem, poses a significant risk to their deployment. A common approach to address this issue is to retrieve relevant knowledge and fine-tune the LLM with the knowledge in its input. Unfortunately, this method incurs high training costs and may cause catastrophic forgetting for multi-tasking models. To overcome these limitations, we propose a knowledge-constrained decoding method called KCTS (Knowledge-Constrained Tree Search), which guides a frozen LM to generate text aligned with the reference knowledge at each decoding step using a knowledge classifier score and MCTS (Monte-Carlo Tree Search). To adapt the sequence-level knowledge classifier to token-level guidance, we also propose a novel token-level hallucination detection method called RIPA (Reward Inflection Point Approximation). Our empirical results on knowledge-grounded dialogue and abstractive summarization demonstrate the strength of KCTS as a plug-and-play, model-agnostic decoding method that can effectively reduce hallucinations in natural language generation.", "keywords": "Hallucination;Knowledge Grounding;Natural Language Generation;Constrained Decoding;Controllable Text Generation", "primary_area": "", "supplementary_material": "", "author": "Sehyun Choi;Tianqing Fang;Zhaowei Wang;Yangqiu Song", "authorids": "~Sehyun_Choi1;~Tianqing_Fang1;~Zhaowei_Wang2;~Yangqiu_Song1", "gender": "M;M;M;M", "homepage": "https://syncdoth.github.io;http://fangtq.com/;https://zhaowei-wang-nlp.github.io/;https://www.cse.ust.hk/~yqsong/", "dblp": ";283/4921;120/1278-3;86/2159", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=Tb3rc34AAAAJ;5dzojAsAAAAJ;MdQZ-q8AAAAJ", "or_profile": "~Sehyun_Choi1;~Tianqing_Fang1;~Zhaowei_Wang2;~Yangqiu_Song1", "aff": "Department of Computer Science and Engineering, Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Department of Computer Science and Engineering, Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": "cse.ust.hk;ust.hk;cse.ust.hk;ust.hk", "position": "MS student;PhD student;MS student;Associate Professor", "bibtex": "@inproceedings{\nchoi2023kcts,\ntitle={{KCTS}: Knowledge-Constrained Tree Search Decoding with Token-Level Hallucination Detection},\nauthor={Sehyun Choi and Tianqing Fang and Zhaowei Wang and Yangqiu Song},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7H45HfXsJb}\n}", "github": "", "project": "", "reviewers": "jKoz;nrcv;rGKp", "site": "https://openreview.net/forum?id=7H45HfXsJb", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "4;4;3", "reproducibility": "3;3;3", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0009-3225-9379;;0000-0001-5539-8181;0000-0002-7818-6090", "linkedin": "sehyun-choi-synch;;zhaowei-wang-571943221/;yqsong/", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "Department of Computer Science and Engineering", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "7IB8gZRptd", "title": "SUT: Active Defects Probing for Transcompiler Models", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Automatic Program translation has enormous application value and hence has been attracting significant interest from AI researchers. However, we observe that current program translation models still make elementary syntax errors, particularly, when the target language does not have syntax elements in the source language. Metrics like BLUE, CodeBLUE and computation accuracy may not expose these issues. In this paper we introduce a new metrics for programming language translation and these metrics address these basic syntax errors. We develop a novel active defects probing suite called Syntactic Unit Tests (SUT) which includes a highly interpretable evaluation harness for accuracy and test scoring. Experiments have shown that even powerful models like ChatGPT still make mistakes on these basic unit tests. Specifically, compared to previous program translation task evaluation dataset, its pass rate on our unit tests has decreased by 26.15%. Further our evaluation harness reveal syntactic element errors in which these models exhibit deficiencies.", "keywords": "Program translation;LLM Evaluation;Unit Test;Syntax Error Analysis", "primary_area": "", "supplementary_material": "", "author": "Mengnan Qi;Yufan Huang;Maoquan Wang;Yongqiang Yao;Zihan Liu;Bin Gu;Colin Clement;Neel Sundaresan", "authorids": "~Mengnan_Qi2;~Yufan_Huang3;~Maoquan_Wang2;~Yongqiang_Yao2;~Zihan_Liu4;~Bin_Gu1;~Colin_Clement1;~Neel_Sundaresan3", "gender": "M;M;M;M;;M;;", "homepage": "https://github.com/Mnangua;https://www.microsoft.com/en-us/research/people/yufanhuang/;https://github.com/ms-maoquan;https://github.com/yongqiang-yao;https://github.com/zhliuworks;https://mbzuai.ac.ae/study/faculty/bin-gu/;https://cbclement.com;https://www.linkedin.com/in/neel-sundaresan-a964a2/", "dblp": "305/9760.html;;;;;29/1758-1;;s/NeelSundaresan.html", "google_scholar": ";;;;;Vo8OgCgAAAAJ;J2aZLEYAAAAJ;", "or_profile": "~Mengnan_Qi2;~Yufan_Huang3;~Maoquan_Wang2;~Yongqiang_Yao2;~Zihan_Liu4;~Bin_Gu1;~Colin_Clement1;~Neel_Sundaresan3", "aff": ";;Microsoft;Microsoft;Shanghai Jiaotong University;Mohamed bin Zayed University of Artificial Intelligence;Microsoft;University of California, Santa Cruz", "aff_domain": ";;microsoft.com;microsoft.com;sjtu.edu.cn;mbzuai.ac.ae;microsoft.com;ucsc.edu", "position": ";;Researcher;Researcher;MS student;Assistant Professor;Senior Research Manager;Full Professor (adjunct)", "bibtex": "@inproceedings{\nqi2023sut,\ntitle={{SUT}: Active Defects Probing for Transcompiler Models},\nauthor={Mengnan Qi and Yufan Huang and Maoquan Wang and Yongqiang Yao and Zihan Liu and Bin Gu and Colin Clement and Neel Sundaresan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7IB8gZRptd}\n}", "github": "", "project": "", "reviewers": "ZEWW;aNwM;cbZo", "site": "https://openreview.net/forum?id=7IB8gZRptd", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;3;1", "excitement": "3;4;1", "reproducibility": "4;2;3", "correctness": "4;3;1", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0001-6049-1815;0000-0002-3727-7308;", "linkedin": ";;maoquan-wang-0917b520a/;;;;colin-b-clement/;neel-sundaresan-a964a2/", "aff_unique_index": "0;0;1;2;0;3", "aff_unique_norm": "Microsoft;Shanghai Jiao Tong University;Mohamed bin Zayed University of Artificial Intelligence;University of California, Santa Cruz", "aff_unique_dep": "Microsoft Corporation;;;", "aff_unique_url": "https://www.microsoft.com;https://www.sjtu.edu.cn;https://mbzuai.ac.ae;https://www.ucsc.edu", "aff_unique_abbr": "Microsoft;SJTU;MBZUAI;UCSC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Cruz", "aff_country_unique_index": "0;0;1;2;0;0", "aff_country_unique": "United States;China;United Arab Emirates" }, { "id": "7IcVI11lkO", "title": "Improving Transformer-based Program Repair Model through False Behavior Diagnosis", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Research on automated program repairs using transformer-based models has recently gained considerable attention. The comprehension of the erroneous behavior of a model enables the identification of its inherent capacity and provides insights for improvement. However, the current landscape of research on program repair models lacks an investigation of their false behavior. Thus, we propose a methodology for diagnosing and treating the false behaviors of transformer-based program repair models. Specifically, we propose 1) a behavior vector that quantifies the behavior of the model when it generates an output, 2) a behavior discriminator (BeDisc) that identifies false behaviors, and 3) two methods for false behavior treatment. Through a large-scale experiment on 55,562 instances employing four datasets and three models, the BeDisc exhibited a balanced accuracy of 86.6\\% for false behavior classification. The first treatment, namely, early abortion, successfully eliminated 60.4\\% of false behavior while preserving 97.4\\% repair accuracy. Furthermore, the second treatment, namely, masked bypassing, resulted in an average improvement of 40.5\\% in the top-1 repair accuracy. These experimental results demonstrated the importance of investigating false behaviors in program repair models.", "keywords": "Transformer;Program Repair;False Behavior", "primary_area": "", "supplementary_material": "", "author": "Youngkyoung Kim;Misoo Kim;Eunseok Lee", "authorids": "~Youngkyoung_Kim1;~Misoo_Kim1;~Eunseok_Lee2", "gender": ";F;M", "homepage": ";https://sites.google.com/view/misoo-kim;https://sites.google.com/view/skkuselab/people/professor", "dblp": "219/4400.html;221/1657.html;16/3590", "google_scholar": "-6QLK7gAAAAJ;qiPQBNsAAAAJ;pvUFE7EAAAAJ", "or_profile": "~Youngkyoung_Kim1;~Misoo_Kim1;~Eunseok_Lee2", "aff": "Sung Kyun Kwan University;Sungkyun Kwan University;Mitsubishi Electric", "aff_domain": "skku.edu;skku.edu;mitsubishi.co.jp", "position": "PhD student;Postdoc;Principal Researcher", "bibtex": "@inproceedings{\nkim2023improving,\ntitle={Improving Transformer-based Program Repair Model through False Behavior Diagnosis},\nauthor={Youngkyoung Kim and Misoo Kim and Eunseok Lee},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7IcVI11lkO}\n}", "github": "", "project": "", "reviewers": "GLov;aJHn;1EGD", "site": "https://openreview.net/forum?id=7IcVI11lkO", "pdf_size": 0, "rating": "", "confidence": "4;3;3", "excitement": "3;4;4", "reproducibility": "3;3;4", "correctness": "3;4;3", "rating_avg": 0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0, "corr_rating_correctness": 0, "orcid": ";0000-0002-8274-5457;0000-0002-6557-8087", "linkedin": ";;eunseok-lee-9011b1a9/", "aff_unique_index": "0;1;2", "aff_unique_norm": "Sungkyunkwan University;Sungkyun Kwan University;Mitsubishi Electric Corporation", "aff_unique_dep": ";;", "aff_unique_url": "https://www.skku.edu;https://www.skku.edu;https://www.mitsubishielectric.com", "aff_unique_abbr": "SKKU;SKKU;MELCO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "South Korea;Japan" }, { "id": "7Jis2yiiEZ", "title": "Syllogistic Reasoning for Legal Judgment Analysis", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Legal judgment assistants are developing fast due to impressive progress of large language models (LLMs). However, people can hardly trust the results generated by a model without reliable analysis of legal judgement. For legal practitioners, it is common practice to utilize syllogistic reasoning to select and evaluate the arguments of the parties as part of the legal decision-making process. But the development of syllogistic reasoning for legal judgment analysis is hindered by the lack of resources: (1) there is no large-scale syllogistic reasoning dataset for legal judgment analysis, and (2) there is no set of established benchmarks for legal judgment analysis. In this paper, we construct and manually correct a syllogistic reasoning dataset for legal judgment analysis. The dataset contains 11,239 criminal cases which cover 4 criminal elements, 80 charges and 124 articles. We also select a set of large language models as benchmarks, and conduct a in-depth analysis of the capacity of their legal judgment analysis.", "keywords": "Legal Judgment Analysis;Syllogism;Syllogistic Reasoning", "primary_area": "", "supplementary_material": "", "author": "Wentao Deng;Jiahuan Pei;Keyi Kong;Zhe Chen;Furu Wei;Yujun Li;Zhaochun Ren;Zhumin Chen;Pengjie Ren", "authorids": "~Wentao_Deng1;~Jiahuan_Pei1;~Keyi_Kong1;~Zhe_Chen13;~Furu_Wei1;~Yujun_Li3;~Zhaochun_Ren1;~Zhumin_Chen1;~Pengjie_Ren1", "gender": "M;F;M;M;M;M;M;;", "homepage": "https://github.com/dengwentao99;https://jiahuan-pei.github.io/;https://github.com/luxinyayaya;https://github.com/xiaobo-Chen;https://www.microsoft.com/en-us/research/people/fuwei/;;https://renzhaochun.github.io/;https://ir.sdu.edu.cn/~zhuminchen/~zhuminchen_en.htm;", "dblp": "265/1234;190/7893;359/3282;;72/5870;;58/10440;88/1081;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;cnhyEW0AAAAJ;https://scholar.google.com/citations?hl=zh-CN;;G-V1VpwAAAAJ;https://scholar.google.com/citations?hl=zh-CN;fPcIPt0AAAAJ;;", "or_profile": "~Wentao_Deng1;~Jiahuan_Pei1;~Keyi_Kong1;~Zhe_Chen13;~Furu_Wei1;~Yujun_Li3;~Zhaochun_Ren1;~Zhumin_Chen1;~Pengjie_Ren1", "aff": "Shandong University;Amazon;Shandong University;;Microsoft Research;Shandong University;Shandong University;Shandong University;", "aff_domain": "sdu.edu.cn;amazon.com;sdu.edu.cn;;microsoft.com;sdu.edu.cn;sdu.edu.cn;sdu.edu.cn;", "position": "PhD student;Applied Scientist;Undergrad student;;Distinguished Scientist;Full Professor;Full Professor;Full Professor;", "bibtex": "@inproceedings{\ndeng2023syllogistic,\ntitle={Syllogistic Reasoning for Legal Judgment Analysis},\nauthor={Wentao Deng and Jiahuan Pei and Keyi Kong and Zhe Chen and Furu Wei and Yujun Li and Zhaochun Ren and Zhumin Chen and Pengjie Ren},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7Jis2yiiEZ}\n}", "github": "", "project": "", "reviewers": "vyvp;mGTi;JXs1", "site": "https://openreview.net/forum?id=7Jis2yiiEZ", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;4;4", "excitement": "3;2;4", "reproducibility": "4;3;4", "correctness": "3;3;5", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-6951-8340;0009-0001-2646-4781;;;;0000-0002-9076-6565;0000-0003-4592-4074;", "linkedin": ";jiahuan-pei-b4b507b4/;;;;;zhaochun-ren-460491296/?locale=nl_NL;;", "aff_unique_index": "0;1;0;2;0;0;0", "aff_unique_norm": "Shandong University;Amazon;Microsoft", "aff_unique_dep": ";Amazon.com, Inc.;Microsoft Research", "aff_unique_url": "http://www.sdu.edu.cn;https://www.amazon.com;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "SDU;Amazon;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;0;0;0", "aff_country_unique": "China;United States" }, { "id": "7LBhEJ1DII", "title": "Quantifying Character Similarity with Vision Transformers", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Record linkage is a bedrock of quantitative social science, as analyses often require linking data from multiple, noisy sources. Off-the-shelf string matching methods are widely used, as they are straightforward and cheap to implement and scale. Not all character substitutions are equally probable, and for some settings there are widely used handcrafted lists denoting which string substitutions are more likely, that improve the accuracy of string matching. However, such lists do not exist for many settings, skewing research with linked datasets towards a few high-resource contexts that are not representative of the diversity of human societies. This study develops an extensible way to measure character substitution costs for OCR'ed documents, by employing large-scale self-supervised training of vision transformers (ViT) with augmented digital fonts. For each language written with the CJK script, we contrastively learn a metric space where different augmentations of the same character are represented nearby. In this space, homoglyphic characters - those with similar appearance such as \"O\" and \"0\" - have similar vector representations. Using the cosine distance between characters' representations as the substitution cost in an edit distance matching algorithm significantly improves record linkage compared to other widely used string matching methods, as OCR errors tend to be homoglyphic in nature. Homoglyphs can plausibly capture character visual similarity across any script, including low-resource settings. We illustrate this by creating homoglyph sets for 3,000 year old ancient Chinese characters, which are highly pictorial. Fascinatingly, a ViT is able to capture relationships in how different abstract concepts were conceptualized by ancient societies, that have been noted in the archaeological literature.", "keywords": "record linkage;homoglyphs;character similarity", "primary_area": "", "supplementary_material": "", "author": "Xinmei Yang;Abhishek Arora;Shao-Yu Jheng;Melissa Dell", "authorids": "~Xinmei_Yang1;~Abhishek_Arora1;~Shao-Yu_Jheng1;~Melissa_Dell1", "gender": "F;M;;", "homepage": "https://xinmeiyang.weebly.com;https://econabhishek.github.io/;https://sites.google.com/view/jheng-shao-yu;", "dblp": ";344/4529;;", "google_scholar": ";https://scholar.google.com/citations?hl=en;UVXrX9cAAAAJ;", "or_profile": "~Xinmei_Yang1;~Abhishek_Arora1;~Shao-Yu_Jheng1;~Melissa_Dell1", "aff": "Renmin University of China;Harvard University, Harvard University;Harvard University;", "aff_domain": "ruc.edu.cn;fas.harvard.edu;harvard.edu;", "position": "Undergrad student;Researcher;Researcher;", "bibtex": "@inproceedings{\nyang2023quantifying,\ntitle={Quantifying Character Similarity with Vision Transformers},\nauthor={Xinmei Yang and Abhishek Arora and Shao-Yu Jheng and Melissa Dell},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7LBhEJ1DII}\n}", "github": "", "project": "", "reviewers": "3GU8;BJgQ;Qkwc", "site": "https://openreview.net/forum?id=7LBhEJ1DII", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "2;4;4", "reproducibility": "2;4;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";abhishek-arora1996/;;", "aff_unique_index": "0;1;1", "aff_unique_norm": "Renmin University of China;Harvard University", "aff_unique_dep": ";", "aff_unique_url": "http://www.ruc.edu.cn;https://www.harvard.edu", "aff_unique_abbr": "RUC;Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "id": "7MmYaN93lb", "title": "Is Robustness Transferable across Languages in Multilingual Neural Machine Translation?", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Robustness, the ability of models to maintain performance in the face of perturbations, is critical for developing reliable NLP systems. Recent studies have shown promising results in improving the robustness of models through adversarial training and data augmentation. However, in machine translation, most of these studies have focused on bilingual machine translation with a single translation direction. In this paper, we investigate the transferability of robustness across different languages in multilingual neural machine translation. We propose a robustness transfer analysis protocol and conduct a series of experiments. In particular, we use character-, word-, and multi-level noises to attack the specific translation direction of the multilingual neural machine translation model and evaluate the robustness of other translation directions. Our findings demonstrate that the robustness gained in one translation direction can indeed transfer to other translation directions. Additionally, we empirically find scenarios where robustness to character-level noise and word-level noise is more likely to transfer.", "keywords": "robustness;transfer learning;multilingual neural machine translation", "primary_area": "", "supplementary_material": "", "author": "Leiyu Pan;Supryadi;Deyi Xiong", "authorids": "~Leiyu_Pan1;~Supryadi1;~Deyi_Xiong2", "gender": "M;M;M", "homepage": ";;https://dyxiong.github.io", "dblp": "359/6474.html;;55/6548", "google_scholar": ";https://scholar.google.com/citations?hl=en;QPLO3myO5PkC", "or_profile": "~Leiyu_Pan1;~Supryadi1;~Deyi_Xiong2", "aff": "Northeastern University;Tianjin University;Tianjin University", "aff_domain": "neu.edu.cn;tju.edu.cn;tju.edu.cn", "position": "Undergrad student;PhD student;Full Professor", "bibtex": "@inproceedings{\npan2023is,\ntitle={Is Robustness Transferable across Languages in Multilingual Neural Machine Translation?},\nauthor={Leiyu Pan and Supryadi and Deyi Xiong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7MmYaN93lb}\n}", "github": "", "project": "", "reviewers": "SHa5;USAS;xN8k", "site": "https://openreview.net/forum?id=7MmYaN93lb", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "2;3;3", "reproducibility": "2;4;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0002-5304-0914;0009-0001-8316-1840;0000-0002-2353-5038", "linkedin": ";;", "aff_unique_index": "0;1;1", "aff_unique_norm": "Northeastern University;Tianjin University", "aff_unique_dep": ";", "aff_unique_url": "https://www.northeastern.edu;http://www.tju.edu.cn", "aff_unique_abbr": "NEU;TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;China" }, { "id": "7O9bTjLgTQ", "title": "VISIT: Visualizing and Interpreting the Semantic Information Flow of Transformers", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recent advances in interpretability suggest we can project weights and hidden states of transformer-based language models (LMs) to their vocabulary, a transformation that makes them more human interpretable. In this paper, we investigate LM attention heads and memory values, the vectors the models dynamically create and recall while processing a given input. By analyzing the tokens they represent through this projection, we identify patterns in the information flow inside the attention mechanism. Based on our discoveries, we create a tool to visualize a forward pass of Generative Pre-trained Transformers (GPTs) as an interactive flow graph, with nodes representing neurons or hidden states and edges representing the interactions between them. Our visualization simplifies huge amounts of data into easy-to-read plots that can reflect the models' internal processing, uncovering the contribution of each component to the models' final prediction. Our visualization also unveils new insights about the role of layer norms as semantic filters that influence the models' output, and about neurons that are always activated during forward passes and act as regularization vectors.", "keywords": "attention;interpretability;visualization", "primary_area": "", "supplementary_material": "", "author": "Shahar Katz;Yonatan Belinkov", "authorids": "~Shahar_Katz1;~Yonatan_Belinkov1", "gender": "M;M", "homepage": "https://www.belinkov.com;", "dblp": "136/8705;348/4607", "google_scholar": "https://scholar.google.com/citations?authorid=K-6ujU4AAAAJ;https://scholar.google.co.il/citations?user=0k2nOjgAAAAJ", "or_profile": "~Yonatan_Belinkov1;~Shachar_Katz1", "aff": "Technion, Technion;Computer Science Department, Technion - Israel Institute of Technology", "aff_domain": "technion.ac.il;cs.technion.ac.il", "position": "Assistant Professor;MS student", "bibtex": "@inproceedings{\nkatz2023visit,\ntitle={{VISIT}: Visualizing and Interpreting the Semantic Information Flow of Transformers},\nauthor={Shahar Katz and Yonatan Belinkov},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7O9bTjLgTQ}\n}", "github": "", "project": "", "reviewers": "arzY;mg6v;PqVU", "site": "https://openreview.net/forum?id=7O9bTjLgTQ", "pdf_size": 0, "rating": "2;2;2", "confidence": "3;4;3", "excitement": "4;4;3", "reproducibility": "4;2;4", "correctness": "3;4;3", "rating_avg": 2.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";shachar-katz-ab024a219?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3BryB62fS%2FRdKt5CxZYixxiw%3D%3D", "aff_unique_index": "0;0", "aff_unique_norm": "Technion - Israel Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.technion.ac.il/en/", "aff_unique_abbr": "Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "id": "7QSa2w5Wai", "title": "Transitioning Representations between Languages for Cross-lingual Event Detection via Langevin Dynamics", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Cross-lingual transfer learning (CLTL) for event detection (ED) aims to develop models in high-resource source languages that can be directly applied to produce effective performance for lower-resource target languages. Previous research in this area has focused on representation matching methods to develop a language-universal representation space into which source- and target-language example representations can be mapped to achieve cross-lingual transfer. However, as this approach modifies the representations for the source-language examples, the models might lose discriminative features for ED that are learned over training data of the source language to prevent effective predictions. To this end, our work introduces a novel approach for cross-lingual ED where we only aim to transition the representations for the target-language examples into the source-language space, thus preserving the representations in the source language and their discriminative information. Our method introduces Langevin Dynamics to perform representation transition and a semantic preservation framework to retain event type features during the transition process. Extensive experiments over three languages demonstrate the state-of-the-art performance for ED in CLTL.", "keywords": "Event Detection;Information Extraction;Cross-lingual Transfer Learning;Langevin Dynamics", "primary_area": "", "supplementary_material": "", "author": "Chien Van Nguyen;Huy Huu Nguyen;Franck Dernoncourt;Thien Huu Nguyen", "authorids": "~Chien_Van_Nguyen1;~Huy_Huu_Nguyen1;~Franck_Dernoncourt1;~Thien_Huu_Nguyen1", "gender": "M;M;;M", "homepage": "https://chiennv2000.github.io/;;http://francky.me;http://ix.cs.uoregon.edu/~thien", "dblp": "351/5540;;132/4043;17/9407", "google_scholar": "fW5HEnEAAAAJ;;kz2aIc8AAAAJ;Da2FhegAAAAJ", "or_profile": "~Chien_Van_Nguyen1;~Huy_Huu_Nguyen1;~Franck_Dernoncourt1;~Thien_Huu_Nguyen1", "aff": "Hanoi University of Science and Technology;Hanoi University of Science and Technology;Adobe Systems;University of Oregon", "aff_domain": "hust.edu.vn;hust.edu.vn;adobe.com;cs.uoregon.edu", "position": "Student;Undergrad student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nnguyen2023transitioning,\ntitle={Transitioning Representations between Languages for Cross-lingual Event Detection via Langevin Dynamics},\nauthor={Chien Van Nguyen and Huy Huu Nguyen and Franck Dernoncourt and Thien Huu Nguyen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7QSa2w5Wai}\n}", "github": "", "project": "", "reviewers": "87k9;9TYZ;dUHy;xAXc", "site": "https://openreview.net/forum?id=7QSa2w5Wai", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;5;4", "excitement": "3;3;3;3", "reproducibility": "3;3;3;4", "correctness": "3;3;2;3", "rating_avg": 3.0, "confidence_avg": 4.25, "excitement_avg": 3.0, "reproducibility_avg": 3.25, "correctness_avg": 2.75, "replies_avg": 13, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-1119-1346;", "linkedin": "chiennv2000/;h%E1%BB%AFu-huy-nguy%E1%BB%85n-97832a1b4/;franckdernoncourt;thien-huu-nguyen-7a193030/", "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Hanoi University of Science and Technology;Adobe;University of Oregon", "aff_unique_dep": ";Adobe Systems Incorporated;", "aff_unique_url": "https://www.hust.edu.vn;https://www.adobe.com;https://www.uoregon.edu", "aff_unique_abbr": "HUST;Adobe;UO", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hanoi;", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "Vietnam;United States" }, { "id": "7QSvLXXHQt", "title": "Who Wrote it and Why? Prompting Large-Language Models for Authorship Verification", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Authorship verification (AV) is a fundamental task in natural language processing (NLP) and computational linguistics, with applications in forensic analysis, plagiarism detection, and identification of deceptive content. Existing AV techniques, including traditional stylometric and deep learning approaches, face limitations in terms of data requirements and lack of explainability. To address these limitations, this paper proposes \\textsf{PromptAV}, a novel technique that leverages Large-Language Models (LLMs) for AV by providing step-by-step stylometric explanation prompts. \\textsf{PromptAV} outperforms state-of-the-art baselines, operates effectively with limited training data, and enhances interpretability through intuitive explanations, showcasing its potential as an effective and interpretable solution for the AV task.", "keywords": "Authorship Verification;Chain-of-Thought Prompting", "primary_area": "", "supplementary_material": "", "author": "Chia-Yu Hung;Zhiqiang Hu;Yujia Hu;Roy Ka-Wei Lee", "authorids": "~Chia-Yu_Hung1;~Zhiqiang_Hu3;~Yujia_Hu1;~Roy_Ka-Wei_Lee1", "gender": "M;;;M", "homepage": ";https://hzq950419.github.io/HomePage/;https://www.linkedin.com/in/yujia-hu-170a41198/;https://www.socialai.studio/team", "dblp": ";;;139/2266", "google_scholar": "RgUB8xgAAAAJ;vjQQUnwAAAAJ;;https://scholar.google.com.sg/citations?user=uQxdOlsAAAAJ", "or_profile": "~Chia-Yu_Hung1;~Zhiqiang_Hu3;~Yujia_Hu1;~Roy_Ka-Wei_Lee1", "aff": "Singapore University of Technology and Design;Singapore University of Technology and Design;KU Leuven;Singapore University of Technology and Design", "aff_domain": "sutd.edu.sg;sutd.edu.sg;kuleuven.be;sutd.edu.sg", "position": "Undergrad student;PhD student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nhung2023who,\ntitle={Who Wrote it and Why? Prompting Large-Language Models for Authorship Verification},\nauthor={Chia-Yu Hung and Zhiqiang Hu and Yujia Hu and Roy Ka-Wei Lee},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7QSvLXXHQt}\n}", "github": "", "project": "", "reviewers": "ug9M;Waug;4xL7", "site": "https://openreview.net/forum?id=7QSvLXXHQt", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;5", "excitement": "4;3;3", "reproducibility": "3;4;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-1986-7750", "linkedin": "hungchiayu/;;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Singapore University of Technology and Design;Katholieke Universiteit Leuven", "aff_unique_dep": ";", "aff_unique_url": "https://www.sutd.edu.sg;https://www.kuleuven.be", "aff_unique_abbr": "SUTD;KU Leuven", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Singapore;Belgium" }, { "id": "7RzRbVXWPN", "title": "AfriSenti: A Twitter Sentiment Analysis Benchmark for African Languages", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Africa is home to over 2,000 languages from over six language families and has the highest linguistic diversity among all continents. This includes 75 languages with at least one million speakers each. Yet, there is little NLP research conducted on African languages. Crucial in enabling such research is the availability of high-quality annotated datasets. In this paper, we introduce AfriSenti, a sentiment analysis benchmark that contains a total of >110,000 tweets in 14 African languages (Amharic, Algerian Arabic, Hausa, Igbo, Kinyarwanda, Moroccan Arabic, Mozambican Portuguese, Nigerian Pidgin, Oromo, Swahili, Tigrinya, Twi, Xitsonga, and Yoruba) from four language families. The tweets were annotated by native speakers and used in the AfriSenti-SemEval shared task (with over 200 participants, see website: https://afrisenti-semeval.github.io). We describe the data collection methodology, annotation process, and the challenges we dealt with when curating each dataset. We further report baseline experiments conducted on the AfriSenti datasets and discuss their usefulness.", "keywords": "Africa;Sentiment;Dataset;NLP", "primary_area": "", "supplementary_material": "", "author": "Shamsuddeen Hassan Muhammad;Idris Abdulmumin;Abinew Ali Ayele;Nedjma OUSIDHOUM;David Ifeoluwa Adelani;Seid Muhie Yimam;Ibrahim Said Ahmad;Meriem Beloucif;Saif M. Mohammad;Sebastian Ruder;Oumaima Hourrane;Alipio Jorge;Pavel Brazdil;Felermino D. M. A. Ali;Davis David;Salomey Osei;Bello Shehu-Bello;Falalu Ibrahim Lawan;Tajuddeen Gwadabe;Samuel Rutunda;Tadesse Destaw Belay;Wendimu Baye Messelle;Hailu Beshada Balcha;Sisay Adugna Chala;Hagos Tesfahun Gebremichael;Bernard Opoku;Stephen Arthur", "authorids": "~Shamsuddeen_Hassan_Muhammad1;~Idris_Abdulmumin1;~Abinew_Ali_Ayele1;~Nedjma_OUSIDHOUM1;~David_Ifeoluwa_Adelani1;~Seid_Muhie_Yimam1;~Ibrahim_Said_Ahmad1;~Meriem_Beloucif1;~Saif_M._Mohammad1;~Sebastian_Ruder2;~Oumaima_Hourrane1;~Alipio_Jorge1;~Pavel_Brazdil2;~Felermino_D._M._A._Ali1;~Davis_David1;~Salomey_Osei1;~Bello_Shehu-Bello1;~Falalu_Ibrahim_Lawan1;~Tajuddeen_Gwadabe1;~Samuel_Rutunda1;~Tadesse_Destaw_Belay1;~Wendimu_Baye_Messelle2;~Hailu_Beshada_Balcha1;~Sisay_Adugna_Chala1;~Hagos_Tesfahun_Gebremichael1;~Bernard_Opoku1;~Stephen_Arthur1", "gender": ";M;M;F;M;M;M;;M;;F;M;M;M;M;F;M;M;M;M;M;M;M;;M;M;M", "homepage": ";https://abumafrim.github.io;https://www.inf.uni-hamburg.de/en/inst/ab/lt/people/abinew-ali.html;https://nedjmaou.github.io/;https://dadelani.github.io/;https://seyyaw.github.io/;https://isahmadbbr.github.io;https://www.inf.uni-hamburg.de/en/inst/ab/lt/people/meriem-beloucif.html;http://saifmohammad.com;;;https://sigarra.up.pt/fcup/pt/func_geral.formview?p_codigo=232938;http://www.liaad.up.pt/area/pbrazdil/pavel-brazdil;https://felerminoali.github.io/#about;http://davisdavid.com/;;;;;https://digitalumuganda.com;https://tadesse-destaw.github.io/;https://ir.bdu.edu.et/handle/123456789/10546;;;;;", "dblp": ";254/2143;;248/2832;230/6973;136/8659;https://dblp.uni-trier.de/pid/270/7405;136/9157;58/380;;;j/AlipioMarioJorge.html;https://dblp.uni-trier.de/pid/14/2530.html;;;;;;;;312/3078;;;;;;", "google_scholar": ";ANvTtmYAAAAJ;g2m1wH4AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.ca/citations?user=W9sTkS0AAAAJ;https://scholar.google.de/citations?user=rDKEGNgAAAAJ;QA4Aib4AAAAJ;https://scholar.google.com.hk/citations?user=yRo5n7cAAAAJ;zJHymXh9EVwC;;4gQwz54AAAAJ;wMjqg7QAAAAJ;https://scholar.google.pt/citations?user=SaB1VPQAAAAJ;bzFT8cIAAAAJ;;32M1HMsAAAAJ;https://scholar.google.co.uk/citations?user=DdAYSqUAAAAJ;IeAUfwYAAAAJ;Fu1ru8YAAAAJ;e-tCsjYAAAAJ;8S7ilV0AAAAJ;;https://scholar.google.com/citations?hl=en;AhwEye4AAAAJ;;f5tqjbkAAAAJ;", "or_profile": "~Shamsuddeen_Hassan_Muhammad1;~Idris_Abdulmumin1;~Abinew_Ali_Ayele1;~Nedjma_OUSIDHOUM1;~David_Ifeoluwa_Adelani1;~Seid_Muhie_Yimam1;~Ibrahim_Said_Ahmad1;~Meriem_Beloucif1;~Saif_M._Mohammad1;~Sebastian_Ruder2;~Oumaima_Hourrane1;~Alipio_Jorge1;~Pavel_Brazdil2;~Felermino_D._M._A._Ali1;~Davis_David1;~Salomey_Osei1;~Bello_Shehu-Bello1;~Falalu_Ibrahim_Lawan1;~Tajuddeen_Gwadabe1;~Samuel_Rutunda1;~Tadesse_Destaw_Belay1;~Wendimu_Baye_Messelle2;~Hailu_Beshada_Balcha1;~Sisay_Adugna_Chala1;~Hagos_Tesfahun_Gebremichael1;~Bernard_Opoku1;~Stephen_Arthur1", "aff": ";Bayero University Kano;Universit\u00e4t Hamburg;University of Cambridge;University College London, University of London;Universit\u00e4t Hamburg;Bayero University Kano;Uppsala University;National Research Council Canada;;;INESC TEC;University of Porto;Universidade do Porto;;Universidad de Deusto;;Kaduna State University;Masakhane Research Foundation;Digital Umuganda;Instituto Polit\u00e9cnico Nacional, Centro de Investigaci\u00f3n en Computaci\u00f3n;Bahir Dar University;Jimma University;Fraunhofer FIT;Bahir Dar University;Accra Institute of Technology;", "aff_domain": ";buk.edu.ng;uni-hamburg.de;cam.ac.uk;ucl.ac.uk;uni-hamburg.de;buk.edu.ng;uu.se;nrc-cnrc.gc.ca;;;inesctec.pt;umass.edu;up.pt;;deusto.es;;kasuportal.net;masakhane.io;digitalumuganda.com;cic.ipn.mx;bdu.edu.et;ju.edu.et;fit.fraunhofer.de;bdu.edu.et;ait.edu.gh;", "position": ";PhD student;PhD student;Postdoc;Postdoc;Researcher;Lecturer;Assistant Professor;Researcher;;;Researcher;Full Professor;PhD student;;PhD student;;Lecturer;Researcher;Principal Researcher;PhD student;Lecturer;Lecturer;Postdoc;Lecturer;Instructor;", "bibtex": "@inproceedings{\nmuhammad2023afrisenti,\ntitle={AfriSenti: A Twitter Sentiment Analysis Benchmark for African Languages},\nauthor={Shamsuddeen Hassan Muhammad and Idris Abdulmumin and Abinew Ali Ayele and Nedjma OUSIDHOUM and David Ifeoluwa Adelani and Seid Muhie Yimam and Ibrahim Said Ahmad and Meriem Beloucif and Saif M. Mohammad and Sebastian Ruder and Oumaima Hourrane and Alipio Jorge and Pavel Brazdil and Felermino D. M. A. Ali and Davis David and Salomey Osei and Bello Shehu-Bello and Falalu Ibrahim Lawan and Tajuddeen Gwadabe and Samuel Rutunda and Tadesse Destaw Belay and Wendimu Baye Messelle and Hailu Beshada Balcha and Sisay Adugna Chala and Hagos Tesfahun Gebremichael and Bernard Opoku and Stephen Arthur},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7RzRbVXWPN}\n}", "github": "", "project": "", "reviewers": "j4yL;TvNi;DhE5", "site": "https://openreview.net/forum?id=7RzRbVXWPN", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;4", "excitement": "3;4;4", "reproducibility": "3;3;2", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 27, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-3795-8381;0000-0003-4686-5053;;0000-0002-0193-2083;0000-0002-8289-388X;0000-0001-9514-1807;;0000-0003-2716-7516;;;0000-0002-5475-1382;0000-0002-4720-0486;0009-0003-4101-8979;;my-orcid?orcid=0000-0003-1900-3124;;0000-0003-3310-0326;;;0000-0003-0883-984X;;;;0000-0003-0216-2727;0000-0003-3104-9897;", "linkedin": ";;;;david-adelani-7557b337/;seyaw;ibrahim-said-ahmad-42419775/;;;;;;;;davis-david-783461124/;salomey-osei-4b08a5b8/;;https://linkedin.com/in/falalu-ibrahim-003283114;;samuel-rutunda-07bb818a/;tadesse-belay/;;hailu-beshada-10572868/;;;bernard-kwabena-opoku-965653b1/;stephen-arthur-315b0712b/", "aff_unique_index": "0;1;2;3;1;0;4;5;6;7;8;9;10;11;12;13;14;15;16;14;17", "aff_unique_norm": "Bayero University;University of Hamburg;University of Cambridge;University College London;Uppsala University;National Research Council Canada;INESC TEC;University of Porto;Universidade do Porto;Universidad de Deusto;Kaduna State University;Masakhane Research Foundation;Digital Umuganda;Instituto Polit\u00e9cnico Nacional;Bahir Dar University;Jimma University;Fraunhofer Institute for Integrated Circuits;Accra Institute of Technology", "aff_unique_dep": ";;;;;;;;;;;;;Centro de Investigaci\u00f3n en Computaci\u00f3n;;;;", "aff_unique_url": "https://www.buk.edu.ng;https://www.uni-hamburg.de;https://www.cam.ac.uk;https://www.ucl.ac.uk;https://www.uu.se;https://www.nrc-cnrc.gc.ca;https://www.inesctec.pt;https://www.up.pt;https://www.up.pt;https://www.deusto.es;https://www.kasu.edu.ng;https://www.masakhane.io;;https://www.ipn.mx;https://www.bdu.edu.et;https://www.ju.edu.et;https://www.fit.fraunhofer.de/;https://www.aitech.edu.gh", "aff_unique_abbr": "Bayero;UHH;Cambridge;UCL;UU;NRC-CNRC;INESC TEC;UPorto;UPorto;Deusto;KASU;;;IPN;BDU;JU;FIT;AIT", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Kano;;Cambridge", "aff_country_unique_index": "0;1;2;2;1;0;3;4;5;5;5;6;0;7;8;9;10;10;1;10;11", "aff_country_unique": "Nigeria;Germany;United Kingdom;Sweden;Canada;Portugal;Spain;South Africa;Rwanda;Mexico;Ethiopia;Ghana" }, { "id": "7SaXczaBpG", "title": "RWKV: Reinventing RNNs for the Transformer Era", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Transformers have revolutionized almost all natural language processing (NLP) tasks but suffer from memory and computational complexity that scales quadratically with sequence length. In contrast, recurrent neural networks (RNNs) exhibit linear scaling in memory and computational requirements but struggle to match the same performance as Transformers due to limitations in parallelization and scalability. We propose a novel model architecture, Receptance Weighted Key Value (RWKV), that combines the efficient parallelizable training of transformers with the efficient inference of RNNs.\n\nOur approach leverages a linear attention mechanism and allows us to formulate the model as either a Transformer or an RNN, thus parallelizing computations during training and maintains constant computational and memory complexity during inference. We scale our models as large as 14 billion parameters, by far the largest dense RNN ever trained, and find RWKV performs on par with similarly sized Transformers, suggesting future work can leverage this architecture to create more efficient models. This work presents a significant step towards reconciling trade-offs between computational efficiency and model performance in sequence processing tasks.", "keywords": "large language model;scaling laws;open source;pretraining", "primary_area": "", "supplementary_material": "", "author": "Bo Peng;Eric Alcaide;Quentin Gregory Anthony;Alon Albalak;Samuel Arcadinho;Stella Biderman;Huanqi Cao;Xin Cheng;Michael Nguyen Chung;Leon Derczynski;Xingjian Du;Matteo Grella;Kranthi Kiran GV;Xuzheng He;Haowen Hou;Przemyslaw Kazienko;Jan Kocon;Jiaming Kong;Bart\u0142omiej Koptyra;Hayden Lau;Jiaju Lin;Krishna Sri Ipsit Mantri;Ferdinand Mom;Atsushi Saito;Guangyu Song;Xiangru Tang;Johan S. Wind;Stanis\u0142aw Wo\u017aniak;Zhenyuan Zhang;Qinghua Zhou;Jian Zhu;Rui-Jie Zhu", "authorids": "~Bo_Peng21;~Eric_Alcaide2;~Quentin_Gregory_Anthony1;~Alon_Albalak1;~Samuel_Arcadinho1;~Stella_Biderman1;~Huanqi_Cao1;~Xin_Cheng2;~Michael_Nguyen_Chung1;~Leon_Derczynski1;~Xingjian_Du1;~Matteo_Grella1;~Kranthi_Kiran_GV1;~Xuzheng_He1;~Haowen_Hou1;~Przemyslaw_Kazienko1;~Jan_Kocon1;~Jiaming_Kong1;~Bart\u0142omiej_Koptyra1;~Hayden_Lau1;~Jiaju_Lin1;~Krishna_Sri_Ipsit_Mantri1;~Ferdinand_Mom1;~Atsushi_Saito1;~Guangyu_Song1;~Xiangru_Tang2;~Johan_S._Wind2;~Stanis\u0142aw_Wo\u017aniak2;~Zhenyuan_Zhang4;~Qinghua_Zhou2;~Jian_Zhu2;~Rui-Jie_Zhu2", "gender": "M;;M;;;F;M;;Non-Binary;M;Non-Binary;M;M;M;;M;M;M;M;M;Non-Binary;;M;M;;M;;M;M;M;;M", "homepage": "https://www.rwkv.com;;https://quentin-anthony.github.io/;https://alon-albalak.github.io/;;http://www.stellabiderman.com;https://gitbub.com/Blealtan;;https://databites.ca/;https://www.derczynski.com/itu/;;http://www.matteogrella.com/;https://kranthigv.com;https://github.com/cyclekiller;;https://kazienko.eu/en;;https://github.com/jiamingkong;;https://sites.google.com/view/lazertc/home?authuser=2;https://jiaju-lin-97.github.io/;;https://3outeille.github.io/;;;https://xiangrutang.github.io/;;;https://cryscan.github.io/profile;;https://lingjzhu.github.io/;https://ruijie-zhu.github.io", "dblp": ";;;283/4427;238/0363;239/5641;214/8159.html;;;66/8157;;;250/5514;;;k/PrzemyslawKazienko;117/2896;;340/8735;;;;;44/3636.html;;246/8064;;;;;;317/4836", "google_scholar": ";;https://scholar.google.com/citations?hl=en;F6J_7d8AAAAJ;;bO7H0DAAAAAJ;;;;https://scholar.google.dk/citations?user=d8iwqa8AAAAJ;UqBl_VMAAAAJ;;L_OIp_oAAAAJ;;;https://scholar.google.pl/citations?user=cxLgNccAAAAJ;pmQHb5IAAAAJ;;9IAbTK8AAAAJ;;JCAH3OoAAAAJ;;;;;;;q49nN_kAAAAJ;;;jLtpcLgAAAAJ;08ITzJsAAAAJ", "or_profile": "~Bo_Peng21;~Eric_Alcaide2;~Quentin_Gregory_Anthony1;~Alon_Albalak1;~Samuel_Arcadinho1;~Stella_Biderman1;~Huanqi_Cao1;~Xin_Cheng2;~Michael_Nguyen_Chung1;~Leon_Derczynski1;~Xingjian_Du1;~Matteo_Grella1;~Kranthi_Kiran_GV1;~Xuzheng_He1;~Haowen_Hou1;~Przemyslaw_Kazienko1;~Jan_Kocon1;~Jiaming_Kong1;~Bart\u0142omiej_Koptyra1;~Hayden_Lau1;~Jiaju_Lin1;~Krishna_Sri_Ipsit_Mantri1;~Ferdinand_Mom1;~Atsushi_Saito1;~Guangyu_Song1;~Xiangru_Tang2;~Johan_S._Wind2;~Stanis\u0142aw_Wo\u017aniak2;~Zhenyuan_Zhang4;~Qinghua_Zhou2;~Jian_Zhu2;~Rui-Jie_Zhu2", "aff": "University of Hong Kong;;Ohio State University, Columbus;University of California, Santa Barbara;Outsystems;Booz Allen Hamilton;Tsinghua University;;;University of Washington;ByteDance AI Lab;EXOP GmbH;New York University;;;Wroclaw University of Science and Technology;Wroclaw University of Science and Technology;;Wroc\u0142aw University of Science and Technology;;East China Normal University;;Ecole Pour l'Informatique et les Techniques Avancees;;;Yale University;;Technical University of Wroclaw;University of Michigan - Ann Arbor;Ohio State University, Columbus;University of British Columbia;University of Electronic Science and Technology of China", "aff_domain": "hku.hk;;osu.edu;ucsb.edu;outsystems.com;boozallen.com;cs.tsinghua.edu.cn;;;uw.edu;bytedance.com;exop-group.com;nyu.edu;;;pwr.edu.pl;pwr.edu.pl;;pwr.edu.pl;;ecnu.edu.cn;;epita.fr;;;yale.edu;;pwr.edu.pl;umich.edu;osu.edu;ubc.ca;uestc.edu.cn", "position": "Undergrad student;;PhD student;PhD student;Researcher;Industry researcher;PhD student;;;Visiting Professor;Researcher;Associate Director;MS student;;;Full Professor;Assistant Professor;;MS student;;MS student;;MS student;;;PhD student;;PhD student;MS student;PhD student;Assistant Professor;Undergrad student", "bibtex": "@inproceedings{\npeng2023rwkv,\ntitle={{RWKV}: Reinventing {RNN}s for the Transformer Era},\nauthor={Bo Peng and Eric Alcaide and Quentin Gregory Anthony and Alon Albalak and Samuel Arcadinho and Stella Biderman and Huanqi Cao and Xin Cheng and Michael Nguyen Chung and Leon Derczynski and Xingjian Du and Matteo Grella and Kranthi Kiran GV and Xuzheng He and Haowen Hou and Przemyslaw Kazienko and Jan Kocon and Jiaming Kong and Bart{\\l}omiej Koptyra and Hayden Lau and Jiaju Lin and Krishna Sri Ipsit Mantri and Ferdinand Mom and Atsushi Saito and Guangyu Song and Xiangru Tang and Johan S. Wind and Stanis{\\l}aw Wo{\\'z}niak and Zhenyuan Zhang and Qinghua Zhou and Jian Zhu and Rui-Jie Zhu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7SaXczaBpG}\n}", "github": "", "project": "", "reviewers": "HDNB;85wr;rSzx;Zd3h", "site": "https://openreview.net/forum?id=7SaXczaBpG", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;4;4;5", "excitement": "4;5;4;3", "reproducibility": "4;4;3;4", "correctness": "4;2;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 3.75, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 32, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-6823-9080;0000-0003-0809-1704;;0000-0001-8228-1042;;;;0000-0002-8656-3431;;0000-0003-3069-2039;;;;0000-0001-5868-356X;my-orcid?orcid=0000-0002-7665-6896;;0009-0005-9938-305X;;;;;;;;;0000-0001-8761-1629;;;;", "linkedin": ";;quentin-anthony;alonalbalak;samuel-arcadinho/;stellabiderman;;;michael-c-31662a38/;leon-derczynski/;;https://it.linkedin.com/in/matteogrella;;;;;jankocon/;;;hayden-lau-7b9958276/;;;;;;;;;;qinghua-zhou-740ab9125/;;", "aff_unique_index": "0;1;2;3;4;5;6;7;8;9;10;10;11;12;13;14;15;16;1;17;18", "aff_unique_norm": "University of Hong Kong;Ohio State University;University of California, Santa Barbara;OutSystems;Booz Allen Hamilton;Tsinghua University;University of Washington;ByteDance;EXOP GmbH;New York University;Wroclaw University of Science and Technology;Wroc\u0142aw University of Science and Technology;East China Normal University;Ecole Pour l'Informatique et les Techniques Avancees;Yale University;Wroclaw University of Technology;University of Michigan;University of British Columbia;University of Electronic Science and Technology of China", "aff_unique_dep": ";;;;;;;AI Lab;;;;;;;;;;;", "aff_unique_url": "https://www.hku.hk;https://www.osu.edu;https://www.ucsb.edu;https://www.outsystems.com;https://www.boozallen.com;https://www.tsinghua.edu.cn;https://www.washington.edu;https://www.bytedance.com;;https://www.nyu.edu;https://www.pwr.edu.pl;https://www.pwr.edu.pl;http://www.ecnu.edu.cn;https://www.epita.fr;https://www.yale.edu;https://www.pwr.edu.pl;https://www.umich.edu;https://www.ubc.ca;https://www.uestc.edu.cn", "aff_unique_abbr": "HKU;OSU;UCSB;OutSystems;BAH;THU;UW;ByteDance;;NYU;WUST;WUST;ECNU;EPITA;Yale;WUT;UM;UBC;UESTC", "aff_campus_unique_index": "0;1;2;4;1", "aff_campus_unique": "Hong Kong SAR;Columbus;Santa Barbara;;Ann Arbor", "aff_country_unique_index": "0;1;1;2;1;0;1;0;3;1;4;4;4;0;5;1;4;1;1;6;0", "aff_country_unique": "China;United States;Portugal;Germany;Poland;France;Canada" }, { "id": "7TKKvwyQef", "title": "DialGuide: Aligning Dialogue Model Behavior with Developer Guidelines", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Dialogue models are able to generate coherent and fluent responses, but they can still be challenging to control and may produce non-engaging, unsafe results. This unpredictability diminishes user trust and can hinder the use of the models in the real world. To address this, we introduce DialGuide, a novel framework for controlling dialogue model behavior using natural language rules, or guidelines. These guidelines provide information about the context they are applicable to and what should be included in the response, allowing the models to generate responses that are more closely aligned with the developer's expectations and intent. We evaluate DialGuide on three tasks in open-domain dialogue response generation: guideline selection, response generation, and response entailment verification. Our dataset contains 10,737 positive and 15,467 negative dialogue context-response-guideline triplets across two domains - chit-chat and safety. We provide baseline models for the tasks and benchmark their performance. We also demonstrate that DialGuide is effective in the dialogue safety domain, producing safe and engaging responses that follow developer guidelines.", "keywords": "dialogue;safety;generation", "primary_area": "", "supplementary_material": "", "author": "Prakhar Gupta;Yang Liu;Di Jin;Behnam Hedayatnia;Spandana Gella;Sijia Liu;Patrick L. Lange;Julia Hirschberg;Dilek Hakkani-Tur", "authorids": "~Prakhar_Gupta1;~Yang_Liu60;~Di_Jin1;~Behnam_Hedayatnia1;~Spandana_Gella2;~Sijia_Liu4;~Patrick_L._Lange1;~Julia_Hirschberg1;~Dilek_Hakkani-Tur1", "gender": "M;F;M;M;F;M;F;F;F", "homepage": "https://prakharguptaz.github.io/;;https://jind11.github.io/;;https://scholar.google.com/citations?user=fChTW6MAAAAJ&hl=en&oi=ao;https://patricklange.dev/;http://www.cs.columbia.edu/~julia/;https://siebelschool.illinois.edu/about/people/faculty/dilek;https://sijial430.github.io/", "dblp": "121/0747;51/3710-4;;194/7461;146/3968.html;188/8957;h/JuliaHirschberg;h/DilekZHakkaniTur;", "google_scholar": "YuFcRF0AAAAJ;w90wOucAAAAJ;x5QTK9YAAAAJ;MTG_OgQAAAAJ;fChTW6MAAAAJ;https://scholar.google.com/citations?hl=en;Qrd7FCoAAAAJ;GMcL_9kAAAAJ;WMMaqKkAAAAJ", "or_profile": "~Prakhar_Gupta1;~Yang_Liu60;~Di_Jin1;~Behnam_Hedayatnia1;~Spandana_Gella2;~Patrick_L._Lange1;~Julia_Hirschberg1;~Dilek_Hakkani_Tur1;~Sijia_Liu9", "aff": "Carnegie Mellon University;Amazon;Amazon;Amazon;Amazon;Amazon - Alexa AI;Columbia University;Amazon;Amazon AGI", "aff_domain": "cmu.edu;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;cs.columbia.edu;amazon.com;amazon.com", "position": "PhD student;Principal Researcher;Researcher;Researcher;Research Scientist;Researcher;Full Professor;Snr Principal Scientist;Researcher", "bibtex": "@inproceedings{\ngupta2023dialguide,\ntitle={DialGuide: Aligning Dialogue Model Behavior with Developer Guidelines},\nauthor={Prakhar Gupta and Yang Liu and Di Jin and Behnam Hedayatnia and Spandana Gella and Sijia Liu and Patrick L. Lange and Julia Hirschberg and Dilek Hakkani-Tur},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7TKKvwyQef}\n}", "github": "", "project": "", "reviewers": "mumb;EDyR;QdKf", "site": "https://openreview.net/forum?id=7TKKvwyQef", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;4;3", "reproducibility": "4;4;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0003-3935-663X;;0000-0001-5246-2117;", "linkedin": "prakhar-gupta-100/;yang-liu-8555143/;;behnam-h-68872238/;spandana-gella-313b7019/;langep/;;dilek-hakkani-tur-9517543/;letti-sijia-liu/", "aff_unique_index": "0;1;1;1;1;1;2;1;1", "aff_unique_norm": "Carnegie Mellon University;Amazon;Columbia University", "aff_unique_dep": ";Amazon.com, Inc.;", "aff_unique_url": "https://www.cmu.edu;https://www.amazon.com;https://www.columbia.edu", "aff_unique_abbr": "CMU;Amazon;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "7UVOFuNk27", "title": "e-THERAPIST: I suggest you to cultivate a mindset of positivity and nurture uplifting thoughts", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The shortage of therapists for mental health patients emphasizes the importance of globally accessible dialogue systems alleviating their issues. To have effective interpersonal psychotherapy, these systems must exhibit politeness and empathy when needed. However, these factors may vary as per the user's gender, age, persona, and sentiment. Hence, in order to establish trust and provide a personalized cordial experience, it is essential that generated responses should be tailored to individual profiles and attributes. Focusing on this objective, we propose e-THERAPIST, a novel polite interpersonal psychotherapy dialogue system to address issues like depression, anxiety, schizophrenia, etc. We begin by curating a unique conversational dataset for psychotherapy, called PsyCon. It is annotated at two levels: (i) dialogue-level - including user's profile information (gender, age, persona) and therapist's psychotherapeutic approach; and (ii) utterance-level - encompassing user's sentiment and therapist's politeness, and interpersonal behaviour. Then, we devise a novel reward model to adapt correct polite interpersonal behaviour and use it to train e-THERAPIST on PsyCon employing NLPO loss. Our extensive empirical analysis validates the effectiveness of each component of the proposed e-THERAPIST demonstrating its potential impact in psychotherapy settings.", "keywords": "Dialogue Systems;Psychotherapy;Persona;Sentiment;Politeness;Interpersonal communication Strategy;Rewards;Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Kshitij Mishra;Priyanshu Priya;Manisha Burja;Asif Ekbal", "authorids": "~Kshitij_Mishra1;~Priyanshu_Priya1;~Manisha_Burja1;~Asif_Ekbal1", "gender": "M;F;F;M", "homepage": ";;;https://ekbalasif.github.io", "dblp": "254/5221;321/1747;;11/3590", "google_scholar": "https://scholar.google.com/citations?hl=en;-A-CDPkAAAAJ;_pQ3kAUAAAAJ;https://scholar.google.co.in/citations?user=IAL_F04AAAAJ", "or_profile": "~Kshitij_Mishra1;~Priyanshu_Priya1;~Manisha_Burja1;~Asif_Ekbal1", "aff": "Indian Institute of Technology, Patna;Indian Institute of Technology, Patna;Indian Institute of Technology, Patna.;Indian Institute of Technology, Patna", "aff_domain": "iitp.ac.in;iitp.ac.in;iitp.ac.in;iitp.ac.in", "position": "PhD student;PhD student;MS student;Associate Professor", "bibtex": "@inproceedings{\nmishra2023etherapist,\ntitle={e-{THERAPIST}: I suggest you to cultivate a mindset of positivity and nurture uplifting thoughts},\nauthor={Kshitij Mishra and Priyanshu Priya and Manisha Burja and Asif Ekbal},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7UVOFuNk27}\n}", "github": "", "project": "", "reviewers": "MZbm;jwGD;YcLm", "site": "https://openreview.net/forum?id=7UVOFuNk27", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;3;4", "excitement": "2;4;3", "reproducibility": "3;4;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-6474-2757;;;0000-0003-3612-8834", "linkedin": "https://linkedin.com/in/kshitij-mishra-6770451b1;priyanshu-priya-33;manishaburja/;asif-ekbal-3b8a4517/?originalSubdomain=in", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Indian Institute of Technology Patna", "aff_unique_dep": "", "aff_unique_url": "https://www.iitp.ac.in", "aff_unique_abbr": "IIT Patna", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Patna", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "India" }, { "id": "7UvOkmrB8V", "title": "Approximating CKY with Transformers", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We investigate the ability of transformer models to approximate the CKY algorithm, using them to directly predict a sentence's parse and thus avoid the CKY algorithm's cubic dependence on sentence length. We find that on standard constituency parsing benchmarks this approach achieves competitive or better performance than comparable parsers that make use of CKY, while being faster. We also evaluate the viability of this approach for parsing under \\textit{random} PCFGs. Here we find that performance declines as the grammar becomes more ambiguous, suggesting that the transformer is not fully capturing the CKY computation. However, we also find that incorporating additional inductive bias is helpful, and we propose a novel approach that makes use of gradients with respect to chart representations in predicting the parse, in analogy with the CKY algorithm being a subgradient of a partition function variant with respect to the chart.", "keywords": "transformer;algorithmic reasoning;dynamic programming;constituency parsing", "primary_area": "", "supplementary_material": "", "author": "Ghazal Khalighinejad;Ollie Liu;Sam Wiseman", "authorids": "~Ghazal_Khalighinejad1;~Ollie_Liu1;~Sam_Wiseman1", "gender": "F;M;M", "homepage": "https://ghazalkhalighinejad.github.io/;https://ollieliu.com;https://swiseman.github.io", "dblp": "280/3641.html;;149/1260", "google_scholar": "FlU9aQkAAAAJ;https://scholar.google.com/citations?view_op=list_works;SDavuPAAAAAJ", "or_profile": "~Ghazal_Khalighinejad1;~Ollie_Liu1;~Sam_Wiseman1", "aff": "Department of Computer Science, Duke University;University of Southern California;Department of Computer Science, Duke University", "aff_domain": "cs.duke.edu;usc.edu;cs.duke.edu", "position": "PhD student;PhD;Assistant Professor", "bibtex": "@inproceedings{\nkhalighinejad2023approximating,\ntitle={Approximating {CKY} with Transformers},\nauthor={Ghazal Khalighinejad and Ollie Liu and Sam Wiseman},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7UvOkmrB8V}\n}", "github": "", "project": "", "reviewers": "e4gq;LCCU;Fb95", "site": "https://openreview.net/forum?id=7UvOkmrB8V", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;3;3", "reproducibility": "4;4;3", "correctness": "4;4;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";oliu/;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Duke University;University of Southern California", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.duke.edu;https://www.usc.edu", "aff_unique_abbr": "Duke;USC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "7YluNq3HQQ", "title": "BYOC: Personalized Few-Shot Classification with Co-Authored Class Descriptions", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Text classification is a well-studied and versatile building block for many NLP applications. Yet, existing approaches require either large annotated corpora to train a model with or, when using large language models as a base, require carefully crafting the prompt as well as using a long context that can fit many examples. As a result, it is not possible for end-users to build classifiers for themselves. \n\nTo address this issue, we propose a novel approach to few-shot text classification using an LLM. Rather than few-shot examples, the LLM is prompted with descriptions of the salient features of each class. These descriptions are coauthored by the user and the LLM interactively: while the user annotates each few-shot example, the LLM asks relevant questions that the user answers. Examples, questions, and answers are summarized to form the classification prompt.\n\nOur experiments show that our approach yields high accuracy classifiers, within 79% of the performance of models trained with significantly larger datasets while using only 1% of their training sets. Additionally, in a study with 30 participants, we show that end-users are able to build classifiers to suit their specific needs. The personalized classifiers show an average accuracy of 90%, which is 15% higher than the state-of-the-art approach.", "keywords": "text classification;large language models;prompt engineering;few-shot classification;user study;personalization", "primary_area": "", "supplementary_material": "", "author": "Arth Bohra;Govert Verkes;Artem Harutyunyan;Pascal Weinberger;Giovanni Campagna", "authorids": "~Arth_Bohra2;~Govert_Verkes1;~Artem_Harutyunyan1;~Pascal_Weinberger1;~Giovanni_Campagna1", "gender": "M;;;M;M", "homepage": ";;;;https://gcampax.com", "dblp": ";;;;167/5968", "google_scholar": ";;p5ax1soAAAAJ;;fOioY3wAAAAJ", "or_profile": "~Arth_Bohra2;~Govert_Verkes1;~Artem_Harutyunyan1;~Pascal_Weinberger1;~Giovanni_Campagna1", "aff": "Bardeen;;Bardeen;Bardeen.ai;Bardeen, Inc.", "aff_domain": "bardeen.ai;;bardeen.ai;bardeen.ai;bardeen.ai", "position": "Intern;;Researcher;CEO;Researcher", "bibtex": "@inproceedings{\nbohra2023byoc,\ntitle={{BYOC}: Personalized Few-Shot Classification with Co-Authored Class Descriptions},\nauthor={Arth Bohra and Govert Verkes and Artem Harutyunyan and Pascal Weinberger and Giovanni Campagna},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7YluNq3HQQ}\n}", "github": "", "project": "", "reviewers": "cNMD;ixK3;JyXx", "site": "https://openreview.net/forum?id=7YluNq3HQQ", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;3;3", "reproducibility": "3;4;3", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "arthbohra/;;;https://linkedin.com/in/PascalWeinberger;", "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Bardeen;Bardeen.ai;Bardeen, Inc.", "aff_unique_dep": ";;", "aff_unique_url": ";https://www.bardeen.ai;", "aff_unique_abbr": ";Bardeen.ai;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1", "aff_country_unique": ";United States" }, { "id": "7Z1F0h7gWq", "title": "Learn and Consolidate: Continual Adaptation for Zero-Shot and Multilingual Neural Machine Translation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Although existing multilingual neural machine translation (MNMT) models have demonstrated remarkable performance to handle multiple translation directions in a single model and achieved zero-shot translation between language pairs unseen in training, they still suffer from relatively poor translation qualities for some language pairs. A practical scenario is that how to continually update MNMT models for both supervised and zero-shot translations when limited new data arrives. To this end, we propose a two-stage approach that encourages original models to acquire language-agnostic multilingual representations from new data, and preserves the model architecture without introducing parameters. Experimental results and further analysis demonstrate that our method can efficiently improve performance of existing MNMT models in translation directions where they are initially weak, and mitigates the degeneration in the original well-performing translation directions, offering flexibility in the real-world scenario.", "keywords": "Multilingual Neural Machine Translation;Zero-shot Machine Translation;Continual Learning", "primary_area": "", "supplementary_material": "", "author": "Kaiyu Huang;Peng Li;Junpeng Liu;Maosong Sun;Yang Liu", "authorids": "~Kaiyu_Huang1;~Peng_Li2;~Junpeng_Liu1;~Maosong_Sun1;~Yang_Liu19", "gender": "M;M;M;M;M", "homepage": "https://kaiyuhwang.github.io/;http://www.lpeng.net/;;https://www.cs.tsinghua.edu.cn/csen/info/1312/4394.htm;http://nlp.csai.tsinghua.edu.cn/~ly/", "dblp": "191/2871;83/6353-30;125/9435;95/3291-1;51/3710-5", "google_scholar": "qAp-hS4AAAAJ;hgYzkOQAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=zIgT0HMAAAAJ;https://scholar.google.com.hk/citations?user=lVhoKNcAAAAJ", "or_profile": "~Kaiyu_Huang1;~Peng_Li2;~Junpeng_Liu1;~Maosong_Sun1;~Yang_Liu19", "aff": "Tsinghua University;Tsinghua University;;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn", "position": "Postdoc;Associate Professor;;Full Professor;Professor", "bibtex": "@inproceedings{\nhuang2023learn,\ntitle={Learn and Consolidate: Continual Adaptation for Zero-Shot and Multilingual Neural Machine Translation},\nauthor={Kaiyu Huang and Peng Li and Junpeng Liu and Maosong Sun and Yang Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7Z1F0h7gWq}\n}", "github": "", "project": "", "reviewers": "ZXCV;3Rbs;HGki", "site": "https://openreview.net/forum?id=7Z1F0h7gWq", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "3;3;4", "reproducibility": "4;3;4", "correctness": "2;2;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-6779-1810;0000-0003-1374-5979;;;0000-0002-3087-242X", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "7cXoueVCoL", "title": "CodeBERTScore: Evaluating Code Generation with Pretrained Models of Code", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Since the rise of neural natural-language-to-code models (NL$\\rightarrow$Code) that can generate long expressions and statements rather than a single next-token, one of the major problems has been reliably evaluating their generated output. \nIn this paper, we propose CodeBERTScore: an evaluation metric for code generation, which builds on BERTScore (Zhang et al., 2020).\nInstead of encoding only the generated tokens as in BERTScore, CodeBERTScore also encodes the natural language input preceding the generated code, thus modeling the consistency between the generated code and its given natural language context as well. We perform an extensive evaluation of CodeBERTScore across four programming languages. We find that CodeBERTScore achieves a higher correlation with human preference and with functional correctness than all existing metrics. That is, generated code that receives a higher score by CodeBERTScore is more likely to be preferred by humans, as well as to function correctly when executed.\nWe release five language-specific pretrained models to use with our publicly available code.\nOur language-specific models have been downloaded more than **1,000,000** times from the Huggingface Hub. Our code and data are available at https://github.com/neulab/code-bert-score", "keywords": "nl2code;code generation;code;evaluation;codebert;bertscore", "primary_area": "", "supplementary_material": "", "author": "Shuyan Zhou;Uri Alon;Sumit Agarwal;Graham Neubig", "authorids": "~Shuyan_Zhou1;~Uri_Alon1;~Sumit_Agarwal2;~Graham_Neubig1", "gender": "Non-Binary;M;M;M", "homepage": "https://shuyanzhou.github.io/;https://urialon.ml/;;http://phontron.com", "dblp": ";40/2257-2;134/6808;03/8155", "google_scholar": "t6YzEpgAAAAJ;https://scholar.google.co.il/citations?user=QBn7vq8AAAAJ;FhP8jyIAAAAJ;wlosgkoAAAAJ", "or_profile": "~Shuyan_Zhou1;~Uri_Alon1;~Sumit_Agarwal2;~Graham_Neubig1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cs.cmu.edu;cmu.edu;andrew.cmu.edu;cmu.edu", "position": "PhD student;Postdoc;MS student;Associate Professor", "bibtex": "@inproceedings{\nzhou2023codebertscore,\ntitle={Code{BERTS}core: Evaluating Code Generation with Pretrained Models of Code},\nauthor={Shuyan Zhou and Uri Alon and Sumit Agarwal and Graham Neubig},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7cXoueVCoL}\n}", "github": "", "project": "", "reviewers": "Q9Kk;HHa9;29mb", "site": "https://openreview.net/forum?id=7cXoueVCoL", "pdf_size": 0, "rating": "", "confidence": "4;4;5", "excitement": "2;4;5", "reproducibility": "4;5;5", "correctness": "2;4;4", "rating_avg": 0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.666666666666667, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0, "corr_rating_correctness": 0, "orcid": ";;;", "linkedin": ";https://linkedin.com/in/urialon1/;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "7fdIbXjRSp", "title": "Thorny Roses: Investigating the Dual Use Dilemma in Natural Language Processing", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Dual use, the intentional, harmful reuse of technology and scientific artefacts, is an ill-defined problem within the context of Natural Language Processing (NLP). As large language models (LLMs) have advanced in their capabilities and become more accessible, the risk of their intentional misuse becomes more prevalent. To prevent such intentional malicious use, it is necessary for NLP researchers and practitioners to understand and mitigate the risks of their research. Hence, we present an NLP-specific definition of dual use informed by researchers and practitioners in the field. Further, we propose a checklist focusing on dual-use in NLP, that can be integrated into existing conference ethics-frameworks. The definition and checklist are created based on a survey of NLP researchers and practitioners.", "keywords": "dual use;ai ethics;checklist;harms;survey", "primary_area": "", "supplementary_material": "", "author": "Lucie-Aim\u00e9e Kaffee;Arnav Arora;Zeerak Talat;Isabelle Augenstein", "authorids": "~Lucie-Aim\u00e9e_Kaffee1;~Arnav_Arora1;~Zeerak_Talat1;~Isabelle_Augenstein1", "gender": "F;;F;", "homepage": "https://luciekaffee.github.io/;;http://isabelleaugenstein.github.io/;https://zeerak.org", "dblp": "204/9536;;93/11424.html;305/7414", "google_scholar": "xiuGTq0AAAAJ;EQUUUUoAAAAJ;https://scholar.google.co.uk/citations?user=DjJp0dcAAAAJ;3M3WdvkAAAAJ", "or_profile": "~Lucie-Aim\u00e9e_Kaffee1;~Arnav_Arora1;~Isabelle_Augenstein1;~Zeerak_Waseem1", "aff": "Copenhagen University;University of Copenhagen;University of Copenhagen;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "ku.dk;diku.dk;ku.dk;mbzuai.ac.ae", "position": "Postdoc;PhD student;Full Professor;Postdoc", "bibtex": "@inproceedings{\nkaffee2023thorny,\ntitle={Thorny Roses: Investigating the Dual Use Dilemma in Natural Language Processing},\nauthor={Lucie-Aim{\\'e}e Kaffee and Arnav Arora and Zeerak Talat and Isabelle Augenstein},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7fdIbXjRSp}\n}", "github": "", "project": "", "reviewers": "chMo;AsLm;mJ7P", "site": "https://openreview.net/forum?id=7fdIbXjRSp", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;3;3", "excitement": "3;4;3", "reproducibility": "0;0;4", "correctness": "2;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 1.3333333333333333, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-1562-7909;", "linkedin": ";;isabelle-augenstein-82436b7a/;", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Copenhagen;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.ku.dk;https://mbzuai.ac.ae", "aff_unique_abbr": "UCPH;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Denmark;United Arab Emirates" }, { "id": "7gIhLGqyph", "title": "Are Structural Concepts Universal in Transformer Language Models? Towards Interpretable Cross-Lingual Generalization", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models (LLMs) have exhibited considerable cross-lingual generalization abilities, whereby they implicitly transfer knowledge across languages. However, the transfer is not equally successful for all languages, especially for low-resource ones, which poses an ongoing challenge. It is unclear whether we have reached the limits of implicit cross-lingual generalization and if explicit knowledge transfer is viable. In this paper, we investigate the potential for explicitly aligning conceptual correspondence between languages to enhance cross-lingual generalization. Using the syntactic aspect of language as a testbed, our analyses of 43 languages reveal a high degree of alignability among the spaces of structural concepts within each language for both encoder-only and decoder-only LLMs. We then propose a meta-learning-based method to learn to align conceptual spaces of different languages, which facilitates zero-shot and few-shot generalization in concept classification and also offers insights into the cross-lingual in-context learning phenomenon. Experiments on syntactic analysis tasks show that our approach achieves competitive results with state-of-the-art methods and narrows the performance gap between languages, particularly benefiting those with limited resources.", "keywords": "cross-lingual generalization;syntax;multilinguality;language models", "primary_area": "", "supplementary_material": "", "author": "Ningyu Xu;Qi Zhang;Jingting Ye;Menghan Zhang;Xuanjing Huang", "authorids": "~Ningyu_Xu1;~Qi_Zhang8;~Jingting_Ye1;~Menghan_Zhang1;~Xuanjing_Huang1", "gender": "F;M;;;F", "homepage": ";http://qizhang.info;https://www.eva.mpg.de/linguistic-and-cultural-evolution/staff/jingting-ye/;;https://xuanjing-huang.github.io/", "dblp": "336/7014;52/323-1;336/7632;;05/6735-1", "google_scholar": "ROKR004AAAAJ;XfqR3yYAAAAJ;NO0tprQAAAAJ;7U5KadkAAAAJ;RGsMgZA4H78C", "or_profile": "~Ningyu_Xu1;~Qi_Zhang8;~Jingting_Ye1;~Menghan_Zhang1;~Xuanjing_Huang1", "aff": "Fudan University;Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "PhD student;Full Professor;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nxu2023are,\ntitle={Are Structural Concepts Universal in Transformer Language Models? Towards Interpretable Cross-Lingual Generalization},\nauthor={Ningyu Xu and Qi Zhang and Jingting Ye and Menghan Zhang and Xuanjing Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7gIhLGqyph}\n}", "github": "", "project": "", "reviewers": "McsH;TY8m;m4pp", "site": "https://openreview.net/forum?id=7gIhLGqyph", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;2;4", "excitement": "3;4;3", "reproducibility": "4;4;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-3505-862X;0000-0001-9197-9426", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "7hSVLwNbWT", "title": "Coverage-based Example Selection for In-Context Learning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In-context learning (ICL), the ability of large language models to perform novel tasks by conditioning on a prompt with a few task examples, requires these examples to be informative about the test instance. The standard approach of independently ranking and selecting the most similar examples selects redundant examples while omitting important information. In this work, we show that BERTScore-Recall (BSR) selects better examples that demonstrate more of the salient aspects, e.g. reasoning patterns, of the test input. We further extend BSR and many standard metrics to easily optimizable set-level metrics, giving still better coverage of those salient aspects. On 15 datasets spanning 6 tasks and with 7 diverse LLMs, we show that (1) BSR is the superior metric for in-context example selection across the board, and (2) for compositional tasks, set selection using Set-BSR outperforms independent ranking by up to 17 points on average and, despite being training-free, surpasses methods that leverage task or LLM-specific training.", "keywords": "In-Context Learning;Demonstration Selection;Large Language Models;Prompting;Compositional Generalization;Semantic Parsing", "primary_area": "", "supplementary_material": "", "author": "Shivanshu Gupta;Matt Gardner;Sameer Singh", "authorids": "~Shivanshu_Gupta2;~Matt_Gardner1;~Sameer_Singh1", "gender": "M;;M", "homepage": "https://shivanshu-gupta.github.io;;http://sameersingh.org", "dblp": "302/4731;00/8046;13/3568-1", "google_scholar": "OtlUDs8AAAAJ;SfKdzrUAAAAJ;-hGZC54AAAAJ", "or_profile": "~Shivanshu_Gupta2;~Matt_Gardner1;~Sameer_Singh1", "aff": "University of California, Irvine;Microsoft;Allen Institute for Artificial Intelligence", "aff_domain": "uci.edu;microsoft.com;allenai.org", "position": "PhD student;Researcher;Allen AI Fellow", "bibtex": "@inproceedings{\ngupta2023coveragebased,\ntitle={Coverage-based Example Selection for In-Context Learning},\nauthor={Shivanshu Gupta and Matt Gardner and Sameer Singh},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7hSVLwNbWT}\n}", "github": "", "project": "", "reviewers": "cBG5;t4QT;bmFi;YWop", "site": "https://openreview.net/forum?id=7hSVLwNbWT", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "4;3;4;4", "excitement": "3;3;4;3", "reproducibility": "4;4;3;3", "correctness": "3;3;3;3", "rating_avg": 5.0, "confidence_avg": 3.75, "excitement_avg": 3.25, "reproducibility_avg": 3.5, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-0621-6323", "linkedin": "shivanshu-gupta1995/;;sameersingh/", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of California, Irvine;Microsoft;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "https://www.uci.edu;https://www.microsoft.com;https://allenai.org", "aff_unique_abbr": "UCI;Microsoft;AI2", "aff_campus_unique_index": "0", "aff_campus_unique": "Irvine;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "7jYZd05yjJ", "title": "ClusterLLM: Large Language Models as a Guide for Text Clustering", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We introduce ClusterLLM, a novel text clustering framework that leverages feedback from an instruction-tuned large language model, such as ChatGPT. Compared with traditional unsupervised methods that builds upon \"small\" embedders, ClusterLLM exhibits two intriguing advantages: (1) it enjoys the emergent capability of LLM even if its embeddings are inaccessible; and (2) it understands the user's preference on clustering through textual instruction and/or a few annotated data. First, we prompt ChatGPT for insights on clustering perspective by constructing hard triplet questions $<$does A better correspond to B than C$>$, where A, B and C are similar data points that belong to different clusters according to small embedder. We empirically show that this strategy is both effective for fine-tuning small embedder and cost-efficient to query ChatGPT. Second, we prompt ChatGPT for helps on clustering granularity by carefully designed pairwise questions $<$do A and B belong to the same category$>$, and tune the granularity from cluster hierarchies that is the most consistent with the ChatGPT answers. Extensive experiments on $14$ datasets show that ClusterLLM consistently improves clustering quality, at an average cost of $\\sim$\\$0.6 per dataset.", "keywords": "text clustering;large language model;sentence relation;entropy-based sampling", "primary_area": "", "supplementary_material": "", "author": "Yuwei Zhang;Zihan Wang;Jingbo Shang", "authorids": "~Yuwei_Zhang1;~Zihan_Wang1;~Jingbo_Shang2", "gender": "M;M;M", "homepage": "https://zhang-yu-wei.github.io/;https://zihanwangki.github.io/;https://shangjingbo1226.github.io/", "dblp": "95/8351-1;152/5077-1;151/3145.html", "google_scholar": "nQyS0asAAAAJ;6UWtYZQAAAAJ;0SkFI4MAAAAJ", "or_profile": "~Yuwei_Zhang1;~Zihan_Wang1;~Jingbo_Shang2", "aff": "University of California, San Diego;University of California, San Diego;University of California, San Diego", "aff_domain": "ucsd.edu;ucsd.edu;ucsd.edu", "position": "MS student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023clusterllm,\ntitle={Cluster{LLM}: Large Language Models as a Guide for Text Clustering},\nauthor={Yuwei Zhang and Zihan Wang and Jingbo Shang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7jYZd05yjJ}\n}", "github": "", "project": "", "reviewers": "odh6;vYLX;s3g2", "site": "https://openreview.net/forum?id=7jYZd05yjJ", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;3;3", "excitement": "4;3;4", "reproducibility": "3;4;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-6910-8130;;", "linkedin": "yuwei-zhang-38a3281a3/;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "7okuG5JhaM", "title": "Can ChatGPT Perform Reasoning Using the IRAC Method in Analyzing Legal Scenarios Like a Lawyer?", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large Language Models (LLMs), such as ChatGPT, have drawn a lot of attentions recently in the legal domain due to its emergent ability to tackle a variety of legal tasks. However, it is still unknown if LLMs are able to analyze a legal case and perform reasoning in the same manner as lawyers. Therefore, we constructed a novel corpus consisting of scenarios pertain to Contract Acts Malaysia and Australian Social Act for Dependent Child. ChatGPT is applied to perform analysis on the corpus using the IRAC method, which is a framework widely used by legal professionals for organizing legal analysis. Each scenario in the corpus is annotated with a complete IRAC analysis in a semi-structured format so that both machines and legal professionals are able to interpret and understand the annotations. In addition, we conducted the first empirical assessment of ChatGPT for IRAC analysis in order to understand how well it aligns with the analysis of legal professionals. Our experimental results shed lights on possible future research directions to improve alignments between LLMs and legal experts in terms of legal reasoning.", "keywords": "Legal Reasoning;IRAC method;Natural Language Processing;Generative Language Models;In-context Learning;Question Decomposition", "primary_area": "", "supplementary_material": "", "author": "Xiaoxi Kang;Lizhen Qu;Lay-Ki Soon;Adnan Trakic;Terry Yue Zhuo;Patrick Charles Emerton;Genevieve Grant", "authorids": "~Xiaoxi_Kang1;~Lizhen_Qu2;~Lay-Ki_Soon1;~Adnan_Trakic1;~Terry_Yue_Zhuo1;~Patrick_Charles_Emerton1;~Genevieve_Grant1", "gender": "F;M;F;M;M;F;M", "homepage": ";https://research.monash.edu/en/persons/lizhen-qu;https://research.monash.edu/en/persons/soon-lay-ki;;https://www.deakin.edu.au/about-deakin/people/patrick-emerton;https://research.monash.edu/en/persons/genevieve-grant;http://terryyz.github.io/", "dblp": ";58/3601;47/4141;;;;", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.au/citations?user=cHXZgHUAAAAJ;https://scholar.google.com.my/citations?user=SiVW3HkAAAAJ;iNZiSKIAAAAJ;kW4g4CAAAAAJ;;https://scholar.google.com.au/citations?hl=en", "or_profile": "~Xiaoxi_Kang1;~Lizhen_Qu2;~Lay-Ki_Soon1;~Adnan_Trakic1;~Patrick_Charles_Emerton1;~Genevieve_Grant1;~Terry_Zhuo1", "aff": "Monash University, Malaysia Campus;Monash University;Monash University;Monash University;Deakin University;Monash University;Sea AI Lab", "aff_domain": "monash.edu.my;monash.edu.au;monash.edu;monash.edu;deakin.edu.au;monash.edu;sea.com", "position": "PhD student;Lecturer;Associate Professor;Associate Professor;Associate Professor;Associate Professor;Researcher", "bibtex": "@inproceedings{\nkang2023can,\ntitle={Can Chat{GPT} Perform Reasoning Using the {IRAC} Method in Analyzing Legal Scenarios Like a Lawyer?},\nauthor={Xiaoxi Kang and Lizhen Qu and Lay-Ki Soon and Adnan Trakic and Terry Yue Zhuo and Patrick Charles Emerton and Genevieve Grant},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7okuG5JhaM}\n}", "github": "", "project": "", "reviewers": "Vapu;623p;uuNg", "site": "https://openreview.net/forum?id=7okuG5JhaM", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;5;4", "excitement": "3;4;3", "reproducibility": "1;4;0", "correctness": "2;5;2", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 1.6666666666666667, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-3869-4741;0000-0002-7764-431X;0000-0002-8072-242X;;0000-0001-5157-2437;;0000-0002-5760-5188", "linkedin": "xiaoxi-kang-4718617b/;lizhen-qu-50017717/;lay-ki-soon-91576122/;;;;", "aff_unique_index": "0;0;0;0;1;0;2", "aff_unique_norm": "Monash University;Deakin University;Sea AI Lab", "aff_unique_dep": ";;", "aff_unique_url": "https://www.monash.edu.my;https://www.deakin.edu.au;", "aff_unique_abbr": "Monash;Deakin;", "aff_campus_unique_index": "0", "aff_campus_unique": "Malaysia;", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "Malaysia;Australia;" }, { "id": "7qCuicCunf", "title": "Learning to love diligent trolls: Accounting for rater effects in the dialogue safety task", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Chatbots have the risk of generating offensive utterances, which must be avoided.\nPost-deployment, one way for a chatbot to continuously improve is to source utterance/label pairs from feedback by live users.\nHowever, among users are trolls, who provide training examples with incorrect labels.\nTo de-troll training data, previous work removed training examples that have high user-aggregated cross-validation (CV) error.\nHowever, CV is expensive; and in a coordinated attack, CV may be overwhelmed by trolls in number and in consistency among themselves.\nIn the present work, I address both limitations by proposing a solution inspired by methodology in automated essay scoring (AES):\nhave multiple users rate each utterance, then perform latent class analysis (LCA) to infer correct labels.\nAs it does not require GPU computations, LCA is inexpensive.\nIn experiments, I found that the AES-like solution can infer training labels with high accuracy when trolls are consistent, even when trolls are the majority.", "keywords": "chatbots;trolls;safety;automated essay scoring;latent class analysis", "primary_area": "", "supplementary_material": "", "author": "Michael John Ilagan", "authorids": "~Michael_John_Ilagan1", "gender": "M", "homepage": "", "dblp": "", "google_scholar": "", "or_profile": "~Michael_John_Ilagan1", "aff": "McGill University, McGill University", "aff_domain": "mail.mcgill.ca", "position": "PhD student", "bibtex": "@inproceedings{\nilagan2023learning,\ntitle={Learning to love diligent trolls: Accounting for rater effects in the dialogue safety task},\nauthor={Michael John Ilagan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7qCuicCunf}\n}", "github": "", "project": "", "reviewers": "UqMy;Ysnq;VSeF;U2Js;5UUg", "site": "https://openreview.net/forum?id=7qCuicCunf", "pdf_size": 0, "rating": "3;3;3;3;3", "confidence": "2;3;4;3;4", "excitement": "4;3;3;2;3", "reproducibility": "4;4;3;3;5", "correctness": "4;3;2;2;4", "rating_avg": 3.0, "confidence_avg": 3.2, "excitement_avg": 3.0, "reproducibility_avg": 3.8, "correctness_avg": 3.0, "replies_avg": 15, "authors#_avg": 1, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-6340-9346", "linkedin": "", "aff_unique_index": "0", "aff_unique_norm": "McGill University", "aff_unique_dep": "", "aff_unique_url": "https://www.mcgill.ca", "aff_unique_abbr": "McGill", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "id": "7rjkSqMJ5n", "title": "Target-Agnostic Gender-Aware Contrastive Learning for Mitigating Bias in Multilingual Machine Translation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Gender bias is a significant issue in machine translation, leading to ongoing research efforts in developing bias mitigation techniques. However, most works focus on debiasing bilingual models without much consideration for multilingual systems. In this paper, we specifically target the gender bias issue of multilingual machine translation models for unambiguous cases where there is a single correct translation, and propose a bias mitigation method based on a novel approach. Specifically, we propose Gender-Aware Contrastive Learning, GACL, which encodes contextual gender information into the representations of non-explicit gender words. Our method is target language-agnostic and is applicable to pre-trained multilingual machine translation models via fine-tuning. Through multilingual evaluation, we show that our approach improves gender accuracy by a wide margin without hampering translation performance. We also observe that incorporated gender information transfers and benefits other target languages regarding gender accuracy. Finally, we demonstrate that our method is applicable and beneficial to models of various sizes.", "keywords": "machine translation;bias mitigation;contrastive learning", "primary_area": "", "supplementary_material": "", "author": "Minwoo Lee;Hyukhun Koh;Kang-il Lee;Dongdong Zhang;Minsung Kim;Kyomin Jung", "authorids": "~Minwoo_Lee2;~Hyukhun_Koh1;~Kang-il_Lee1;~Dongdong_Zhang4;~Minsung_Kim1;~Kyomin_Jung1", "gender": "M;Not Specified;M;M;M;M", "homepage": ";https://hyukhunkoh-ai.github.io/;;https://www.microsoft.com/en-us/research/people/dozhang/;https://github.com/kms0805;http://milab.snu.ac.kr/kjung/index.html", "dblp": ";344/0846;304/2031-1;02/621-1.html;;48/3867", "google_scholar": ";;https://scholar.google.co.kr/citations?user=-YroyxsAAAAJ;w2qu71oAAAAJ;;https://scholar.google.co.kr/citations?user=u3uMl4MAAAAJ", "or_profile": "~Minwoo_Lee2;~Hyukhun_Koh1;~Kang-il_Lee1;~Dongdong_Zhang4;~Minsung_Kim1;~Kyomin_Jung1", "aff": "Seoul National University;;Seoul National University;Microsoft Research Asia;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;;snu.ac.kr;microsoft.com;snu.ac.kr;snu.ac.kr", "position": "PhD student;;PhD student;Researcher;MS student;Full Professor", "bibtex": "@inproceedings{\nlee2023targetagnostic,\ntitle={Target-Agnostic Gender-Aware Contrastive Learning for Mitigating Bias in Multilingual Machine Translation},\nauthor={Minwoo Lee and Hyukhun Koh and Kang-il Lee and Dongdong Zhang and Minsung Kim and Kyomin Jung},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7rjkSqMJ5n}\n}", "github": "", "project": "", "reviewers": "KfGt;3Bkt;fQoU", "site": "https://openreview.net/forum?id=7rjkSqMJ5n", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;4", "excitement": "4;4;4", "reproducibility": "3;3;3", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";https://www.linkedin.com/hyukhun-koh-593283283;;;;", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Seoul National University;Microsoft", "aff_unique_dep": ";Research", "aff_unique_url": "https://www.snu.ac.kr;https://www.microsoft.com/en-us/research/group/asia", "aff_unique_abbr": "SNU;MSR Asia", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "South Korea;China" }, { "id": "7s8KOmvdJc", "title": "InheritSumm: A General, Versatile and Compact Summarizer by Distilling from GPT", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "While large models such as GPT-3 demonstrate exceptional performance in zeroshot and fewshot summarization tasks, their extensive serving and fine-tuning costs hinder their utilization in various applications. Conversely, previous studies have found that although automatic metrics tend to favor smaller fine-tuned models, the quality of the summaries they generate is inferior to that of larger models like GPT-3 when assessed by human evaluators. To address this issue, we propose InheritSumm, a versatile and compact summarization model derived from GPT-3.5 through distillation. InheritSumm not only exhibits comparable zeroshot and fewshot summarization capabilities to GPT-3.5 but is also sufficiently compact for fine-tuning purposes. Experimental results demonstrate that InheritSumm achieves similar or superior performance to GPT-3.5 in zeroshot and fewshot settings. Furthermore, it outperforms the previously established best small models in both prefix-tuning and full-data fine-tuning scenarios.", "keywords": "summarization;distillation;zero-shot;few-shot;large language model", "primary_area": "", "supplementary_material": "", "author": "Yichong Xu;Ruochen Xu;Dan Iter;Yang Liu;Shuohang Wang;Chenguang Zhu;Michael Zeng", "authorids": "~Yichong_Xu1;~Ruochen_Xu2;~Dan_Iter1;~Yang_Liu50;~Shuohang_Wang1;~Chenguang_Zhu1;~Michael_Zeng1", "gender": "M;M;Not Specified;M;M;M;M", "homepage": "http://xycking.wixsite.com/yichongxu;https://xrc10.github.io/;https://daniter-cu.github.io/;https://nlp-yang.github.io/;;;https://www.microsoft.com/en-us/research/people/nzeng/", "dblp": "154/6421;188/3515;63/10689.html;;173/5469.html;48/7536-1.html;232/1866-1.html", "google_scholar": "sYza2XwAAAAJ;HTp5S00AAAAJ;bg8RrSkAAAAJ;HxTr-CtMdrsC;mN-IO6wAAAAJ;1b2kKWoAAAAJ;", "or_profile": "~Yichong_Xu1;~Ruochen_Xu2;~Dan_Iter1;~Yang_Liu50;~Shuohang_Wang1;~Chenguang_Zhu1;~Michael_Zeng1", "aff": "Microsoft;Microsoft Research;Microsoft;Microsoft;Microsoft;Zoom;Microsoft", "aff_domain": "microsoft.com;research.microsoft.com;microsoft.com;microsoft.com;microsoft.com;zoom.us;microsoft.com", "position": "Senior Researcher;Researcher;Researcher;Researcher;Researcher;Principal Researcher;Vice President Research Manager", "bibtex": "@inproceedings{\nxu2023inheritsumm,\ntitle={InheritSumm: A General, Versatile and Compact Summarizer by Distilling from {GPT}},\nauthor={Yichong Xu and Ruochen Xu and Dan Iter and Yang Liu and Shuohang Wang and Chenguang Zhu and Michael Zeng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7s8KOmvdJc}\n}", "github": "", "project": "", "reviewers": "UWoN;mfxh;FFF7", "site": "https://openreview.net/forum?id=7s8KOmvdJc", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "3;4;3", "reproducibility": "2;3;3", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;", "linkedin": ";ruochenx/;daniter;;;;michaelnanshanzeng/", "aff_unique_index": "0;0;0;0;0;1;0", "aff_unique_norm": "Microsoft;Zoom Video Communications Inc.", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://zoom.us", "aff_unique_abbr": "Microsoft;Zoom", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "7umLwqBbvw", "title": "A Comprehensive Evaluation of Tool-Assisted Generation Strategies", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "A growing area of research investigates augmenting language models with tools (e.g., search engines, calculators) to overcome their shortcomings (e.g., missing or incorrect knowledge, incorrect logical inferences). Various few-shot tool-usage strategies have been proposed. However, there is no systematic and fair comparison across different strategies, or between these strategies and strong baselines that do not leverage tools. We conduct an extensive empirical analysis, finding that (1) across various datasets, example difficulty levels, and models, strong no-tool baselines are competitive to tool-assisted strategies, implying that effectively using tools with in-context demonstrations is a difficult unsolved problem; (2) for knowledge-retrieval tasks, strategies that *refine* incorrect outputs with tools outperform strategies that retrieve relevant information *ahead of* or *during generation*; (3) tool-assisted strategies are expensive in the number of tokens they require to work---incurring additional costs by orders of magnitude---which does not translate into significant improvement in performance. Overall, our findings suggest that few-shot tool integration is still an open challenge, emphasizing the need for comprehensive evaluations of future strategies to accurately assess their *benefits* and *costs*.", "keywords": "tool-assisted;tool-augmented;tool usage;large language models;LLMs;few-shot", "primary_area": "", "supplementary_material": "", "author": "Alon Jacovi;Avi Caciularu;Jonathan Herzig;Roee Aharoni;Bernd Bohnet;Mor Geva", "authorids": "~Alon_Jacovi1;~Avi_Caciularu1;~Jonathan_Herzig2;~Roee_Aharoni1;~Bernd_Bohnet1;~Mor_Geva1", "gender": "M;M;M;M;M;F", "homepage": "https://alonjacovi.github.io/;http://aviclu.github.io/;https://jonathanherzig.github.io/;http://www.roeeaharoni.com;;https://mega002.github.io/", "dblp": "218/5900;https://dblp.uni-trier.de/pid/207/8509;133/3687.html;148/9506;59/4391;203/9159", "google_scholar": "cX9TtloAAAAJ;https://scholar.google.co.il/citations?user=fPG_0aQAAAAJ;https://scholar.google.co.il/citations?view_op=list_works;https://scholar.google.co.il/citations?user=wV0mHWgAAAAJ;https://scholar.google.co.uk/citations?user=IzqMoZMAAAAJ;https://scholar.google.co.il/citations?user=GxpQbSkAAAAJ", "or_profile": "~Alon_Jacovi1;~Avi_Caciularu1;~Jonathan_Herzig2;~Roee_Aharoni1;~Bernd_Bohnet1;~Mor_Geva1", "aff": "Bar Ilan University;Google;Research, Google;Google;Google;Google DeepMind", "aff_domain": "biu.ac.il;google.com;research.google.com;google.com;google.com;google.com", "position": "PhD student;Researcher;Researcher;Researcher;Researcher;Postdoc", "bibtex": "@inproceedings{\njacovi2023a,\ntitle={A Comprehensive Evaluation of Tool-Assisted Generation Strategies},\nauthor={Alon Jacovi and Avi Caciularu and Jonathan Herzig and Roee Aharoni and Bernd Bohnet and Mor Geva},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7umLwqBbvw}\n}", "github": "", "project": "", "reviewers": "2Ti3;uECc;YEbL", "site": "https://openreview.net/forum?id=7umLwqBbvw", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "3;4;3", "reproducibility": "3;2;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";avicaciularu/;;roeeaharoni;;morgeva/", "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "Bar-Ilan University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.biu.ac.il;https://www.google.com", "aff_unique_abbr": "BIU;Google", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;1;1;2", "aff_country_unique": "Israel;United States;United Kingdom" }, { "id": "7vR0fWRwTX", "title": "Exploring Discourse Structure in Document-level Machine Translation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Neural machine translation has achieved great success in the past few years with the help of transformer architectures and large-scale bilingual corpora. However, when the source text gradually grows into an entire document, the performance of current methods for document-level machine translation (DocMT) is less satisfactory. Although the context is beneficial to the translation in general, it is difficult for traditional methods to utilize such long-range information. Previous studies on DocMT have concentrated on extra contents such as multiple surrounding sentences and input instances divided by a fixed length. We suppose that they ignore the structure inside the source text, which leads to under-utilization of the context. In this paper, we present a more sound paragraph-to-paragraph translation mode and explore whether discourse structure can improve DocMT. We introduce several methods from different perspectives, among which our RST-Att model with a multi-granularity attention mechanism based on the RST parsing tree works best. The experiments show that our method indeed utilizes discourse information and performs better than previous work.", "keywords": "Document-level Machine Translation;Discourse Structure;RST Parsing", "primary_area": "", "supplementary_material": "", "author": "Xinyu Hu;Xiaojun Wan", "authorids": "~Xinyu_Hu1;~Xiaojun_Wan1", "gender": ";M", "homepage": ";https://wanxiaojun.github.io", "dblp": ";07/1521", "google_scholar": ";lTTeBdkAAAAJ", "or_profile": "~Xinyu_Hu1;~Xiaojun_Wan1", "aff": ";Peking University", "aff_domain": ";pku.edu.cn", "position": ";Full Professor", "bibtex": "@inproceedings{\nhu2023exploring,\ntitle={Exploring Discourse Structure in Document-level Machine Translation},\nauthor={Xinyu Hu and Xiaojun Wan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7vR0fWRwTX}\n}", "github": "", "project": "", "reviewers": "JdmR;EkAb;JzyP;2yPU", "site": "https://openreview.net/forum?id=7vR0fWRwTX", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;4;3;4", "excitement": "3;4;3;3", "reproducibility": "4;4;2;4", "correctness": "3;4;3;4", "rating_avg": 4.0, "confidence_avg": 3.5, "excitement_avg": 3.25, "reproducibility_avg": 3.5, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "7wJhlDMNH7", "title": "Can We Edit Multimodal Large Language Models?", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In this paper, we focus on editing multimodal Large Language Models (LLMs). Compared to editing single-modal LLMs, multimodal model editing is more challenging, which demands a higher level of scrutiny and careful consideration in the editing process. To facilitate research in this area, we construct a new benchmark, dubbed MMEdit, for editing multimodal LLMs and establishing a suite of innovative metrics for evaluation. We conduct comprehensive experiments involving various model editing baselines and analyze the impact of editing different components for multimodal LLMs. Empirically, we notice that previous baselines can implement editing multimodal LLMs to some extent, but the effect is still barely satisfactory, indicating the potential difficulty of this task. We hope that our work can provide the NLP community with insights.", "keywords": "Model Editing;Multimodal Language Models;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "Siyuan Cheng;Bozhong Tian;Qingbin Liu;Xi Chen;Yongheng Wang;Huajun Chen;Ningyu Zhang", "authorids": "~Siyuan_Cheng2;~Bozhong_Tian1;~Qingbin_Liu1;~Xi_Chen21;~Yongheng_Wang1;~Huajun_Chen1;~Ningyu_Zhang1", "gender": "M;M;M;M;M;M;M", "homepage": "https://github.com/cheng-simian;https://github.com/tbozhong;https://scholar.google.com.hk/citations?user=FGxyOtYAAAAJ&hl=zh-CN;;;;https://person.zju.edu.cn/en/ningyu", "dblp": ";338/5451;137/6023.html;;34/6716.html;94/5089;139/4181-1.html", "google_scholar": ";Sj9kUscAAAAJ;https://scholar.google.com.hk/citations?user=FGxyOtYAAAAJ;https://scholar.google.com/citations?hl=zh-CN;LTZcz5cAAAAJ;;xQDOPvsAAAAJ", "or_profile": "~Siyuan_Cheng2;~Bozhong_Tian1;~Qingbin_Liu1;~Xi_Chen21;~Yongheng_Wang1;~Huajun_Chen1;~Ningyu_Zhang1", "aff": "Zhejiang University;Zhejiang University;Tencent;Tencent Content and Platform Group;Zhejiang lab;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;tencent.com;tencent.com;zhejianglab.com;zju.edu.cn;zju.edu.cn", "position": "MS student;MS student;Researcher;Researcher;Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\ncheng2023can,\ntitle={Can We Edit Multimodal Large Language Models?},\nauthor={Siyuan Cheng and Bozhong Tian and Qingbin Liu and Xi Chen and Yongheng Wang and Huajun Chen and Ningyu Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=7wJhlDMNH7}\n}", "github": "", "project": "", "reviewers": "hZHi;ysoz;sCgZ", "site": "https://openreview.net/forum?id=7wJhlDMNH7", "pdf_size": 0, "rating": "4;4;4", "confidence": "1;4;3", "excitement": "3;3;3", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;0000-0002-1970-0678", "linkedin": ";;;;yongheng-wang-34615a277/;;ningyuzhang/", "aff_unique_index": "0;0;1;1;0;0;0", "aff_unique_norm": "Zhejiang University;Tencent", "aff_unique_dep": ";Tencent Holdings Limited", "aff_unique_url": "https://www.zju.edu.cn;https://www.tencent.com", "aff_unique_abbr": "ZJU;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "80ZDEuEJVC", "title": "A Parallel Corpus for Vietnamese Central-Northern Dialect Text Transfer", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The Vietnamese language embodies dialectal variants closely attached to the nation's three macro-regions: the Northern, Central and Southern regions. As the northern dialect forms the basis of the standard language, it's considered the prestige dialect. While the northern dialect differs from the remaining two in certain aspects, it almost shares an identical lexicon with the southern dialect, making the textual attributes nearly interchangeable. In contrast, the central dialect possesses a number of unique vocabularies and is less mutually intelligible to the standard dialect. Through preliminary experiments, we observe that current NLP models do not possess understandings of the Vietnamese central dialect text, which most likely originates from the lack of resources. To facilitate research on this domain, we introduce a new parallel corpus for Vietnamese central-northern dialect text transfer. Via exhaustive benchmarking, we discover monolingual language models' superiority over their multilingual counterparts on the dialect transfer task. We further demonstrate that fine-tuned transfer models can seamlessly improve the performance of existing NLP systems on the central dialect domain with dedicated results in translation and text-image retrieval tasks.", "keywords": "Vietnamese;Dialect;Text Style Transfer", "primary_area": "", "supplementary_material": "", "author": "Thang Le;Anh Tuan Luu", "authorids": "~Thang_Le2;~Anh_Tuan_Luu2", "gender": ";M", "homepage": ";https://tuanluu.github.io/", "dblp": ";81/8329.html", "google_scholar": ";https://scholar.google.com.sg/citations?hl=en", "or_profile": "~Thang_Le2;~Anh_Tuan_Luu2", "aff": ";Nanyang Technological University", "aff_domain": ";ntu.edu.sg", "position": ";Assistant Professor", "bibtex": "@inproceedings{\nle2023a,\ntitle={A Parallel Corpus for Vietnamese Central-Northern Dialect Text Transfer},\nauthor={Thang Le and Anh Tuan Luu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=80ZDEuEJVC}\n}", "github": "", "project": "", "reviewers": "sxaD;64HP;rL5s", "site": "https://openreview.net/forum?id=80ZDEuEJVC", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "4;3;3", "reproducibility": "4;4;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0", "aff_unique_norm": "Nanyang Technological University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.sg", "aff_unique_abbr": "NTU", "aff_country_unique_index": "0", "aff_country_unique": "Singapore" }, { "id": "83m634EuTW", "title": "Re-Examining Summarization Evaluation across Multiple Quality Criteria", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "The common practice for assessing automatic evaluation metrics is to measure the correlation between their induced system rankings and those obtained by reliable human evaluation, where a higher correlation indicates a better metric. \nYet, an intricate setting arises when an NLP task is evaluated by multiple Quality Criteria (QCs), like for text summarization where prominent criteria including relevance, consistency, fluency and coherence.\nIn this paper, we challenge the soundness of this methodology when multiple QCs are involved, concretely for the summarization case. \nFirst, we show that the allegedly best metrics for certain QCs actually do not perform well, failing to detect even drastic summary corruptions with respect to the considered QC.\nTo explain this, we show that some of the high correlations obtained in the multi-QC setup are spurious.\nFinally, we propose a procedure that may help detecting this effect.\nOverall, our findings highlight the need for further investigating metric evaluation methodologies for the multiple-QC case.", "keywords": "summarization;evaluation;summarization evaluation;confounding variable;spurious correlation;confounding factor", "primary_area": "", "supplementary_material": "", "author": "Ori Ernst;Ori Shapira;Ido Dagan;Ran Levy", "authorids": "~Ori_Ernst1;~Ori_Shapira1;~Ido_Dagan1;~Ran_Levy1", "gender": "M;;M;", "homepage": ";https://orishapira.wordpress.com/;http://u.cs.biu.ac.il/~dagan/;", "dblp": "217/3552;205/9013;95/284;146/3128-1", "google_scholar": ";s7djZnUAAAAJ;https://scholar.google.com.tw/citations?user=YzGAGtoAAAAJ;6HcOMAgAAAAJ", "or_profile": "~Ori_Ernst1;~Ori_Shapira1;~Ido_Dagan1;~Ran_Levy1", "aff": "Bar-Ilan University;Amazon;Bar-Ilan University;Amazon", "aff_domain": "biu.ac.il;amazon.com;biu.ac.il;amazon.com", "position": "PhD student;Researcher;Full Professor;Researcher", "bibtex": "@inproceedings{\nernst2023reexamining,\ntitle={Re-Examining Summarization Evaluation across Multiple Quality Criteria},\nauthor={Ori Ernst and Ori Shapira and Ido Dagan and Ran Levy},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=83m634EuTW}\n}", "github": "", "project": "", "reviewers": "rPrT;jPgs;bKiH", "site": "https://openreview.net/forum?id=83m634EuTW", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "4;2;3", "reproducibility": "3;4;3", "correctness": "4;2;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0009-0008-7352-2586", "linkedin": ";;;", "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Bar-Ilan University;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.biu.ac.il;https://www.amazon.com", "aff_unique_abbr": "BIU;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "Israel;United States" }, { "id": "855dPxyaex", "title": "Finding Authentic Counterhate Arguments: A Case Study with Public Figures", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We explore authentic counterhate arguments for online hateful content toward individuals. Previous efforts are limited to counterhate to fight against hateful content toward groups. Thus, we present a corpus of 54,816 hateful tweet-paragraph pairs, where the paragraphs are candidate counterhate arguments. The counterhate arguments are retrieved from 2,500 online articles from multiple sources. We propose a methodology that assures the authenticity of the counter argument and its specificity to the individual of interest. We show that finding arguments in online articles is an efficient alternative to counterhate generation approaches that may hallucinate unsupported arguments. We also present linguistic insights on the language used in counterhate arguments. Experimental results show promising results. It is more challenging, however, to identify counterhate arguments for hateful content toward individuals not included in the training set.", "keywords": "Hate Speech;Counterhate;Social Media", "primary_area": "", "supplementary_material": "", "author": "Abdullah Albanyan;Ahmed Hassan;Eduardo Blanco", "authorids": "~Abdullah_Albanyan2;~Ahmed_Hassan5;~Eduardo_Blanco1", "gender": "M;M;M", "homepage": "https://www.albanyan.com;;https://eduardoblanco.github.io/", "dblp": ";;32/369-2", "google_scholar": "WH3t6VwAAAAJ;https://scholar.google.com/citations?hl=en;AqGa3-MAAAAJ", "or_profile": "~Abdullah_Albanyan2;~Ahmed_Hassan5;~Eduardo_Blanco1", "aff": "University of North Texas;;University of Arizona", "aff_domain": "unt.edu;;arizona.edu", "position": "PhD student;;Associate Professor", "bibtex": "@inproceedings{\nalbanyan2023finding,\ntitle={Finding Authentic Counterhate Arguments: A Case Study with Public Figures},\nauthor={Abdullah Albanyan and Ahmed Hassan and Eduardo Blanco},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=855dPxyaex}\n}", "github": "", "project": "", "reviewers": "xM6k;pTGV;bvb9", "site": "https://openreview.net/forum?id=855dPxyaex", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;5;4", "excitement": "3;4;4", "reproducibility": "4;3;4", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-2371-9400;;", "linkedin": "abdullah-albanyan-ph-d-23129858/;ahmedhassan19/;", "aff_unique_index": "0;1", "aff_unique_norm": "University of North Texas;University of Arizona", "aff_unique_dep": ";", "aff_unique_url": "https://www.unt.edu;https://www.arizona.edu", "aff_unique_abbr": "UNT;UA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "8752c2KVwd", "title": "Dialect-to-Standard Normalization: A Large-Scale Multilingual Evaluation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Text normalization methods have been commonly applied to historical language or user-generated content, but less often to dialectal transcriptions. In this paper, we introduce dialect-to-standard normalization \u2013 i.e., mapping phonetic transcriptions from different dialects to the orthographic norm of the standard variety \u2013 as a distinct sentence-level character transduction task and provide a large-scale analysis of dialect-to-standard normalization methods. To this end, we compile a multilingual dataset covering four languages: Finnish, Norwegian, Swiss German and Slovene. For the two biggest corpora, we provide three different data splits corresponding to different use cases for automatic normalization. We evaluate the most successful sequence-to-sequence model architectures proposed for text normalization tasks using different tokenization approaches and context sizes. We find that a character-level Transformer trained on sliding windows of three words works best for Finnish, Swiss German and Slovene, whereas the pre-trained byT5 model using full sentences obtains the best results for Norwegian. Finally, we perform an error analysis to evaluate the effect of different data splits on model performance.", "keywords": "text normalization;multilingual evaluation;multilingual datasets;linguistic variation;dialects and language varieties;Finnish;Norwegian;Slovene;Swiss German", "primary_area": "", "supplementary_material": "", "author": "Olli Kuparinen;Aleksandra Mileti\u0107;Yves Scherrer", "authorids": "~Olli_Kuparinen1;~Aleksandra_Mileti\u01071;~Yves_Scherrer1", "gender": "M;;M", "homepage": "https://okuparinen.github.io;;https://blogs.helsinki.fi/yvesscherrer/", "dblp": ";;65/6671", "google_scholar": "https://scholar.google.com/citations?hl=en;;SyQu88MAAAAJ", "or_profile": "~Olli_Kuparinen1;~Aleksandra_Mileti\u01071;~Yves_Scherrer1", "aff": "University of Helsinki;;University of Helsinki", "aff_domain": "helsinki.fi;;helsinki.fi", "position": "Postdoc;;Lecturer", "bibtex": "@inproceedings{\nkuparinen2023dialecttostandard,\ntitle={Dialect-to-Standard Normalization: A Large-Scale Multilingual Evaluation},\nauthor={Olli Kuparinen and Aleksandra Mileti{\\'c} and Yves Scherrer},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8752c2KVwd}\n}", "github": "", "project": "", "reviewers": "xDQG;cje3;fuGZ", "site": "https://openreview.net/forum?id=8752c2KVwd", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "4;2;4", "reproducibility": "5;4;4", "correctness": "4;2;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-9468-7111;;", "linkedin": ";;", "aff_unique_index": "0;0", "aff_unique_norm": "University of Helsinki", "aff_unique_dep": "", "aff_unique_url": "https://www.helsinki.fi", "aff_unique_abbr": "UH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Finland" }, { "id": "87WEkTIVSh", "title": "Multilingual Pixel Representations for Translation and Effective Cross-lingual Transfer", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We introduce and demonstrate how to effectively train multilingual machine translation models with pixel representations. We experiment with two different data settings with a variety of language and script coverage, demonstrating improved performance compared to subword embeddings. We explore various properties of pixel representations such as parameter sharing within and across scripts to better understand where they lead to positive transfer. We observe that these properties not only enable seamless cross-lingual transfer to unseen scripts, but make pixel representations more data-efficient than alternatives such as vocabulary expansion. We hope this work contributes to more extensible multilingual models for all languages and scripts.", "keywords": "machine translation;pixel representations;multilinguality;cross-lingual transfer;unseen scripts", "primary_area": "", "supplementary_material": "", "author": "Elizabeth Salesky;Neha Verma;Philipp Koehn;Matt Post", "authorids": "~Elizabeth_Salesky1;~Neha_Verma1;~Philipp_Koehn2;~Matt_Post1", "gender": ";F;M;M", "homepage": "https://esalesky.github.io;https://nverma1.github.io/;http://www.cs.jhu.edu/~phi/;http://cs.jhu.edu/~post/", "dblp": "184/8920;12/11473-1;84/4538.html;51/8151", "google_scholar": "9I7TjgMAAAAJ;Lxbdj6IAAAAJ;OsIZgIYAAAAJ;4w7LhxsAAAAJ", "or_profile": "~Elizabeth_Salesky1;~Neha_Verma1;~Philipp_Koehn2;~Matt_Post1", "aff": "Johns Hopkins University;Meta;Johns Hopkins University;Johns Hopkins University", "aff_domain": "jhu.edu;meta.com;jhu.edu;jhu.edu", "position": "PhD student;Intern;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nsalesky2023multilingual,\ntitle={Multilingual Pixel Representations for Translation and Effective Cross-lingual Transfer},\nauthor={Elizabeth Salesky and Neha Verma and Philipp Koehn and Matt Post},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=87WEkTIVSh}\n}", "github": "", "project": "", "reviewers": "mFpr;V6mX;rKto", "site": "https://openreview.net/forum?id=87WEkTIVSh", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;2", "excitement": "4;4;4", "reproducibility": "4;5;5", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.666666666666667, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-6765-1447;;0000-0003-1565-064X;0000-0002-1297-6794", "linkedin": "elizabeth-salesky;;philipp-koehn-bbb8024/;matt-post-82a05769/", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Johns Hopkins University;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.jhu.edu;https://meta.com", "aff_unique_abbr": "JHU;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "8851TT2R0l", "title": "The Benefits of Label-Description Training for Zero-Shot Text Classification", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Pretrained language models have improved zero-shot text classification by allowing the transfer of semantic knowledge from the training data in order to classify among specific label sets in downstream tasks. We propose a simple way to further improve zero-shot accuracies with minimal effort. We curate small finetuning datasets intended to describe the labels for a task. Unlike typical finetuning data, which has texts annotated with labels, our data simply describes the labels in language, e.g., using a few related terms, dictionary/encyclopedia entries, and short templates. Across a range of topic and sentiment datasets, our method is more accurate than zero-shot by 17-19% absolute. It is also more robust to choices required for zero-shot classification, such as patterns for prompting the model to classify and mappings from labels to tokens in the model's vocabulary. Furthermore, since our data merely describes the labels but does not use input texts, finetuning on it yields a model that performs strongly on multiple text domains for a given label set, even improving over few-shot out-of-domain classification in multiple settings.", "keywords": "zero-shot;text classification;label description", "primary_area": "", "supplementary_material": "", "author": "Lingyu Gao;Debanjan Ghosh;Kevin Gimpel", "authorids": "~Lingyu_Gao1;~Debanjan_Ghosh2;~Kevin_Gimpel1", "gender": ";M;M", "homepage": ";https://debanjanghosh.github.io;http://ttic.uchicago.edu/~kgimpel/index.html", "dblp": ";27/982.html;47/1252", "google_scholar": ";Sc_vp7gAAAAJ;http://scholar.google.com/citations?user=kDHs7DYAAAAJ", "or_profile": "~Lingyu_Gao1;~Debanjan_Ghosh2;~Kevin_Gimpel1", "aff": ";Educational Testing Service;Toyota Technological Institute at Chicago", "aff_domain": ";ets.org;ttic.edu", "position": ";Researcher;Assistant Professor", "bibtex": "@inproceedings{\ngao2023the,\ntitle={The Benefits of Label-Description Training for Zero-Shot Text Classification},\nauthor={Lingyu Gao and Debanjan Ghosh and Kevin Gimpel},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8851TT2R0l}\n}", "github": "", "project": "", "reviewers": "ZfZP;S9DN;RAHH", "site": "https://openreview.net/forum?id=8851TT2R0l", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;4;5", "excitement": "4;4;2", "reproducibility": "5;5;4", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 4.666666666666667, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.666666666666667, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;1", "aff_unique_norm": "Educational Testing Service;Toyota Technological Institute at Chicago", "aff_unique_dep": ";", "aff_unique_url": "https://www.ets.org;https://www.tti-chicago.org", "aff_unique_abbr": "ETS;TTI Chicago", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "8AKBcTXEd3", "title": "Unifying Discrete and Continuous Representations for Unsupervised Paraphrase Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Unsupervised paraphrase generation is a challenging task that benefits a variety of downstream NLP applications.\nCurrent unsupervised methods for paraphrase generation typically employ round-trip translation or denoising, which require translation corpus and result in paraphrases overly similar to the original sentences in surface structure.\nMost of these methods lack explicit control over the similarity between the original and generated sentences, and the entities are also less correctly kept.\nTo obviate the reliance on translation data and prompt greater variations in surface structure, we propose a self-supervised pseudo-data construction method that generates diverse pseudo-paraphrases in distinct surface structures for a given sentence.\nTo control the similarity and generate accurate entities, we propose an unsupervised paraphrasing model that encodes the sentence meaning and the entities with discrete and continuous variables, respectively.\nThe similarity can be controlled by sampling discrete variables and the entities are kept substantially accurate due to the specific modeling of entities using continuous variables.\nExperimental results on two benchmark datasets demonstrate the advantages of our pseudo-data construction method compared to round-trip translation, and the superiority of our paraphrasing model over the state-of-the-art unsupervised methods.", "keywords": "unsupervised paraphrase generation;discrete variables;VQ-VAE;entity", "primary_area": "", "supplementary_material": "", "author": "Mingfeng Xue;Dayiheng Liu;Wenqiang Lei;Jie Fu;Jian Lan;Mei Li;Baosong Yang;Jun Xie;Yidan Zhang;Dezhong Peng;Jiancheng Lv", "authorids": "~Mingfeng_Xue1;~Dayiheng_Liu1;~Wenqiang_Lei1;~Jie_Fu2;~Jian_Lan1;~Mei_Li3;~Baosong_Yang1;~Jun_Xie9;~Yidan_Zhang2;~Dezhong_Peng1;~Jiancheng_Lv2", "gender": "M;M;M;;F;M;F;M;M;M;Not Specified", "homepage": ";https://dayihengliu.github.io/;https://sites.google.com/view/wenqianghome/home;https://github.com/dicalab-scu;;https://baosongyang.site/;;https://cs.scu.edu.cn/info/1249/10284.htm;https://cs.scu.edu.cn/info/1303/13767.htm;https://bigaidream.github.io/;", "dblp": ";https://dblp.uni-trier.de/pers/hd/l/Liu:Dayiheng;167/9604;;;203/8245;;;;;", "google_scholar": ";pPLQrX4AAAAJ;https://scholar.google.com.hk/citations?user=qexdxuEAAAAJ;https://scholar.google.de/citations?user=QD7lzBsAAAAJ;;https://scholar.google.com.tw/citations?user=fXsHJXkAAAAJ;;0gupif8AAAAJ;https://scholar.google.com/citations?hl=zh-CN;66osleIAAAAJ;YjuM2GsAAAAJ", "or_profile": "~Mingfeng_Xue1;~Dayiheng_Liu1;~Wenqiang_Lei1;~Jian_Lan1;~Mei_Li3;~Baosong_Yang1;~Yidan_Zhang2;~Dezhong_Peng1;~Jiancheng_Lv2;~Jie_Fu1;~jun_xie5", "aff": "Sichuan University;Alibaba Group;Sichuan University;Sichuan University;;Alibaba Group;Sichuan University;Sichuan University;Sichuan University;Beijing Academy of Artificial Intelligence;Alibaba DAMO Academy", "aff_domain": "scu.edu.cn;alibaba-inc.com;scu.edu.cn;scu.edu.cn;;alibaba-inc.com;scu.edu.cn;scu.edu.cn;scu.edu.cn;baai.ac.cn;alibaba-inc.com", "position": "PhD student;Researcher;Full Professor;MS student;;Researcher;PhD student;Full Professor;Full Professor;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nxue2023unifying,\ntitle={Unifying Discrete and Continuous Representations for Unsupervised Paraphrase Generation},\nauthor={Mingfeng Xue and Dayiheng Liu and Wenqiang Lei and Jie Fu and Jian Lan and Mei Li and Baosong Yang and Jun Xie and Yidan Zhang and Dezhong Peng and Jiancheng Lv},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8AKBcTXEd3}\n}", "github": "", "project": "", "reviewers": "Lw6x;PrBd;PEo6", "site": "https://openreview.net/forum?id=8AKBcTXEd3", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;1", "excitement": "3;4;5", "reproducibility": "3;4;5", "correctness": "3;5;5", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.333333333333333, "replies_avg": 9, "authors#_avg": 11, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-8755-8941;;;0000-0003-2313-7969;;0000-0002-0440-2117;;;0000-0002-4494-843X;", "linkedin": ";;;;;;;;;;", "aff_unique_index": "0;1;0;0;1;0;0;0;2;1", "aff_unique_norm": "Sichuan University;Alibaba Group;Beijing Academy of Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.scu.edu.cn;https://www.alibaba.com;https://www.baaic.cn", "aff_unique_abbr": "SCU;Alibaba;BAAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "8B9mL26NDT", "title": "On the Impact of Cross-Domain Data on German Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Traditionally, large language models have been either trained on general web crawls or domain-specific data. However, recent successes of generative large language models, have shed light on the benefits of cross-domain datasets. To examine the significance of prioritizing data diversity over quality, we present a German dataset comprising texts from five domains, along with another dataset aimed at containing high-quality data. Through training a series of models ranging between 122M and 750M parameters on both datasets, we conduct a comprehensive benchmark on multiple downstream tasks. Our findings demonstrate that the models trained on the cross-domain dataset outperform those trained on quality data alone, leading to improvements up to 4.45% over the previous state-of-the-art.", "keywords": "large language models;cross-domain datasets;data diversity;data quality;benchmark", "primary_area": "", "supplementary_material": "", "author": "Amin Dada;Aokun Chen;Cheng Peng;Kaleb E Smith;Ahmad Idrissi-Yaghir;Constantin Marc Seibold;Jianning Li;Lars Heiliger;Christoph M. Friedrich;Daniel Truhn;Jan Egger;Jiang Bian;Jens Kleesiek;Yonghui Wu", "authorids": "~Amin_Dada1;~Aokun_Chen1;~Cheng_Peng7;~Kaleb_E_Smith1;~Ahmad_Idrissi-Yaghir1;~Constantin_Marc_Seibold1;~Jianning_Li1;~Lars_Heiliger1;~Christoph_M._Friedrich1;~Daniel_Truhn1;~Jan_Egger1;~Jiang_Bian2;~Jens_Kleesiek1;~Yonghui_Wu3", "gender": "M;M;M;M;M;M;M;;M;M;;M;;M", "homepage": "https://mml.ikim.nrw/;https://scholar.google.com/citations?user=r5d5y_oAAAAJ&hl=en;;;;https://cvhci.anthropomatik.kit.edu/people_1853.php;https://jianningli.me/;;https://www.fh-dortmund.de/friedrich/;https://www.truhn.ai;http://www.janegger.de/;https://jiangbian.me/;;https://hobi.med.ufl.edu/profile/wu-yonghui/", "dblp": ";;;;;246/4779;;;38/2820;59/5522;;09/851-1;63/7927;", "google_scholar": "https://scholar.google.com/citations?hl=en;;wq39Rt4AAAAJ;pO0jk84AAAAJ;;rSuG-f4AAAAJ;qPPTM_AAAAAJ;;https://scholar.google.de/citations?user=cBJs78QAAAAJ;https://scholar.google.de/citations?user=dlbH2gMAAAAJ;;ysr--voAAAAJ;Vly6hRQAAAAJ;bMWMlk4AAAAJ", "or_profile": "~Amin_Dada1;~Aokun_Chen1;~Cheng_Peng7;~Kaleb_E_Smith1;~Ahmad_Idrissi-Yaghir1;~Constantin_Marc_Seibold1;~Jianning_Li1;~Lars_Heiliger1;~Christoph_M._Friedrich1;~Daniel_Truhn1;~Jan_Egger1;~Jiang_Bian2;~Jens_Kleesiek1;~Yonghui_Wu3", "aff": "Essen University Hospital;University of Florida;University of Florida;NVIDIA;Fachhochschule Dortmund;University Medicine Essen; Institute for Artificial Intelligence in Medicine;;Fachhochschule Dortmund;University Hospital Aachen;IKIM;University of Florida;Institute for AI in Medicine (IKIM), University Medicine Essen;University of Florida", "aff_domain": "uk-essen.de;ufl.edu;ufl.edu;nvidia.com;fh-dortmund.de;uk-essen.de;uk-essen.de;;fh-dortmund.de;ukaachen.de;uk-essen.de;ufl.edu;uk-essen.de;ufl.edu", "position": "PhD student;Researcher;Postdoc;Researcher;PhD student;Postdoc;Researcher;;Full Professor;Researcher;Professor;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\ndada2023on,\ntitle={On the Impact of Cross-Domain Data on German Language Models},\nauthor={Amin Dada and Aokun Chen and Cheng Peng and Kaleb E Smith and Ahmad Idrissi-Yaghir and Constantin Marc Seibold and Jianning Li and Lars Heiliger and Christoph M. Friedrich and Daniel Truhn and Jan Egger and Jiang Bian and Jens Kleesiek and Yonghui Wu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8B9mL26NDT}\n}", "github": "", "project": "", "reviewers": "BK21;Rbed;oKki", "site": "https://openreview.net/forum?id=8B9mL26NDT", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "2;3;3", "reproducibility": "5;4;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 14, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0003-1507-9690;;;;0000-0001-7906-0038;0000-0002-9605-0728;;0000-0002-2238-5429;0000-0001-8686-0682;", "linkedin": ";;;kaleb-smith-ph-d-8977199a/;;;;;;daniel-truhn-03a315287/;;;;", "aff_unique_index": "0;1;1;2;3;4;5;3;6;7;1;8;1", "aff_unique_norm": "Essen University Hospital;University of Florida;NVIDIA;Fachhochschule Dortmund;University of Duisburg-Essen;Institute for Artificial Intelligence in Medicine;RWTH Aachen University Hospital;Institute of Knowledge Engineering and Management;University Medicine Essen", "aff_unique_dep": ";;NVIDIA Corporation;;University Medicine;;Hospital;;Institute for AI in Medicine (IKIM)", "aff_unique_url": "https://www.essen.de/en/university-hospital;https://www.ufl.edu;https://www.nvidia.com;https://www.fh-dortmund.de;https://www.uni-due.de;;https://www.klinikum-aachen.de;http://www.ikim.edu.my/;https://www.essen.de/", "aff_unique_abbr": ";UF;NVIDIA;FH Dortmund;UDE;;UKA;IKIM;", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Essen;Aachen", "aff_country_unique_index": "0;1;1;1;0;0;1;0;0;2;1;0;1", "aff_country_unique": "Germany;United States;Malaysia" }, { "id": "8CQ0DUuSAK", "title": "Clustering Pseudo Language Family in Multilingual Translation Models with Fisher Information Matrix", "track": "main", "status": "Short Main", "tldr": "", "abstract": "In multilingual translation research, the comprehension and utilization of language families are of paramount importance. Nevertheless, clustering languages based solely on their ancestral families can yield suboptimal results due to variations in the datasets employed during the model's training phase. To mitigate this challenge, we introduce an innovative method that leverages the fisher information matrix (FIM) to cluster language families, anchored on the multilingual translation model's characteristics. We hypothesize that language pairs with similar effects on model parameters exhibit a considerable degree of linguistic congruence and should thus be grouped cohesively. This concept has led us to define pseudo language families. We provide an in-depth discussion regarding the inception and application of these pseudo language families. Empirical evaluations reveal that employing these pseudo language families enhances performance over conventional language families in adapting a multilingual translation model to unfamiliar language pairs. The proposed methodology may also be extended to scenarios requiring language similarity measurements. The source code and associated scripts can be accessed at https://github.com/ecoli-hit/PseudoFamily.", "keywords": "Multilingual Translation;Low-resource", "primary_area": "", "supplementary_material": "", "author": "Xinyu Ma;Xuebo Liu;Min Zhang", "authorids": "~Xinyu_Ma5;~Xuebo_Liu1;~Min_Zhang9", "gender": "M;M;M", "homepage": "https://sunbowliu.github.io/;https://zhangmin-nlp-ai.github.io/;https://www.linkedin.com/in/%E9%A9%AC-%E6%96%B0%E7%BE%BD-741600257/", "dblp": "166/0029-2;83/5342-5;", "google_scholar": "XkDl9aoAAAAJ;https://scholar.google.com/citations?hl=zh-CN;", "or_profile": "~Xuebo_Liu1;~Min_Zhang9;~xinyu_ma1", "aff": "Harbin Institute of Technolgy, Shenzhen;Harbin Institute of Technology, Shenzhen;Harbin Institute of Technology,Shenzhen", "aff_domain": "hit.edu.cn;hit.edu.cn;hit.edu.cn", "position": "Assistant Professor;Full Professor;Undergrad student", "bibtex": "@inproceedings{\nma2023clustering,\ntitle={Clustering Pseudo Language Family in Multilingual Translation Models with Fisher Information Matrix},\nauthor={Xinyu Ma and Xuebo Liu and Min Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8CQ0DUuSAK}\n}", "github": "", "project": "", "reviewers": "kKvr;yzL1;BkKF", "site": "https://openreview.net/forum?id=8CQ0DUuSAK", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;3;4", "excitement": "3;3;4", "reproducibility": "4;3;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "xuebo-liu-47877b195/;;%E9%A9%AC-%E6%96%B0%E7%BE%BD-741600257/", "aff_unique_index": "0;0;0", "aff_unique_norm": "Harbin Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://en.hhit.edu.cn/", "aff_unique_abbr": "HIT", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Shenzhen", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "8DKrruapZ5", "title": "Boosting Prompt-Based Self-Training With Mapping-Free Automatic Verbalizer for Multi-Class Classification", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recently, prompt-based fine-tuning has garnered considerable interest as a core technique for few-shot text classification task. This approach reformulates the fine-tuning objective to align with the Masked Language Modeling (MLM) objective. Leveraging unlabeled data, prompt-based self-training has shown greater effectiveness in binary and three-class classification. However, prompt-based self-training for multi-class classification has not been adequately investigated, despite its significant applicability to real-world scenarios. Moreover, extending current methods to multi-class classification suffers from the verbalizer that extracts the predicted value of manually pre-defined single label word for each class from MLM predictions. Consequently, we introduce a novel, efficient verbalizer structure, named Mapping-free Automatic Verbalizer (MAV). Comprising two fully connected layers, MAV serves as a trainable verbalizer that automatically extracts the requisite word features for classification by capitalizing on all available information from MLM predictions. Experimental results on five multi-class classification datasets indicate MAV's superior self-training efficacy.", "keywords": "Prompt-based Learning;Prompt-based fine-tuning;Prompt-based self-training;Verbalizer;Label Word Mapping", "primary_area": "", "supplementary_material": "", "author": "Yookyung Kho;Jaehee Kim;Pilsung Kang", "authorids": "~Yookyung_Kho1;~Jaehee_Kim1;~Pilsung_Kang1", "gender": "F;M;M", "homepage": "https://github.com/yookyungkho;https://github.com/KimJaehee0725;", "dblp": ";92/5260;65/3604", "google_scholar": ";Iwp52qoAAAAJ;I2pcWZIAAAAJ", "or_profile": "~Yookyung_Kho1;~Jaehee_Kim1;~Pilsung_Kang1", "aff": "Korea University;Korea University;Korea University", "aff_domain": "korea.ac.kr;korea.ac.kr;korea.ac.kr", "position": "MS student;MS student;Full Professor", "bibtex": "@inproceedings{\nkho2023boosting,\ntitle={Boosting Prompt-Based Self-Training With Mapping-Free Automatic Verbalizer for Multi-Class Classification},\nauthor={Yookyung Kho and Jaehee Kim and Pilsung Kang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8DKrruapZ5}\n}", "github": "", "project": "", "reviewers": "jFV1;iK6f;jSyo", "site": "https://openreview.net/forum?id=8DKrruapZ5", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;4;2", "excitement": "4;3;4", "reproducibility": "4;5;2", "correctness": "3;3;4", "rating_avg": 2.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea University", "aff_unique_dep": "", "aff_unique_url": "https://www.korea.ac.kr", "aff_unique_abbr": "KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "8ElstW3DUT", "title": "DADA: Dialect Adaptation via Dynamic Aggregation of Linguistic Rules", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Existing large language models (LLMs) that mainly focus on Standard American English (SAE) often lead to significantly worse performance when being applied to other English dialects. While existing mitigations tackle discrepancies for individual target dialects, they assume access to high-accuracy dialect identification systems. The boundaries between dialects are inherently flexible, making it difficult to categorize language into discrete predefined categories. In this paper, we propose DADA (Dialect Adaptation via Dynamic Aggregation), a modular approach to imbue SAE-trained models with multi-dialectal robustness by composing adapters which handle specific linguistic features. The compositional architecture of DADA allows for both targeted adaptation to specific dialect variants and simultaneous adaptation to various dialects. We show that DADA is effective for both single task and instruction finetuned language models, offering an extensible and interpretable framework for adapting existing LLMs to different English dialects.", "keywords": "Dialect Adaptation; Dialect Robustness; Linguistic Diversity; Fairness; Human-Centered NLP", "primary_area": "", "supplementary_material": "", "author": "Yanchen Liu;William Barr Held;Diyi Yang", "authorids": "~Yanchen_Liu2;~William_Barr_Held1;~Diyi_Yang2", "gender": "M;M;F", "homepage": "https://liuyanchen1015.github.io/;https://williamheld.com/;https://cs.stanford.edu/~diyiy/", "dblp": ";245/8601.html;70/11145", "google_scholar": "https://scholar.google.com/citations?hl=en;SP9VJNkAAAAJ;j9jhYqQAAAAJ", "or_profile": "~Yanchen_Liu2;~William_Barr_Held1;~Diyi_Yang2", "aff": "Harvard University;Georgia Institute of Technology;Stanford University", "aff_domain": "harvard.edu;gatech.edu;stanford.edu", "position": "MS student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nliu2023dada,\ntitle={{DADA}: Dialect Adaptation via Dynamic Aggregation of Linguistic Rules},\nauthor={Yanchen Liu and William Barr Held and Diyi Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8ElstW3DUT}\n}", "github": "", "project": "", "reviewers": "XXyA;KiYC;ybzc", "site": "https://openreview.net/forum?id=8ElstW3DUT", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "4;4;4", "reproducibility": "4;3;3", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";williambarrheld/;", "aff_unique_index": "0;1;2", "aff_unique_norm": "Harvard University;Georgia Institute of Technology;Stanford University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.harvard.edu;https://www.gatech.edu;https://www.stanford.edu", "aff_unique_abbr": "Harvard;Georgia Tech;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "8FXeFY5487", "title": "Enhancing Scalability of Pre-trained Language Models via Efficient Parameter Sharing", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In this paper, we propose a highly parameter-efficient approach to scaling pre-trained language models (PLMs) to a deeper model depth. \nUnlike prior work that shares all parameters or uses extra blocks, we design a more capable parameter-sharing architecture based on matrix product operator (MPO), an efficient tensor decomposition method to factorize the parameter matrix into a set of local tensors. Based on such a decomposition, we share the important local tensor across all layers for reducing the model size and meanwhile keep layer-specific tensors (also using Adapters) for enhancing the adaptation flexibility. To improve the model training, we further propose a stable initialization algorithm tailored for the MPO-based architecture. Extensive experiments have demonstrated the effectiveness of our proposed model in enhancing scalability and achieving higher performance (i.e., with fewer parameters than BERT-base, we successfully scale the model depth by a factor of 4x and even achieve 0.1 points higher than BERT-large for GLUE score). The code to reproduce the results of this paper can be found at https://github.com/RUCAIBox/MPOBERT-code.", "keywords": "parameter-efficient;pre-trained language models;scalability", "primary_area": "", "supplementary_material": "", "author": "Peiyu Liu;Ze-Feng Gao;Yushuo Chen;Xin Zhao;Ji-Rong Wen", "authorids": "~Peiyu_Liu1;~Ze-Feng_Gao1;~Yushuo_Chen2;~Xin_Zhao10;~Ji-Rong_Wen1", "gender": "M;M;M;M;M", "homepage": "https://peiyuliu.tech/;https://zfgao66.github.io/homepage/;;https://gsai.ruc.edu.cn/addons/teacher/index/info.html?user_id=5&ruccode=20140041&ln=cn;https://gsai.ruc.edu.cn/english/jrwen", "dblp": "85/670-2;239/5268.html;45/4693;https://dblp.uni-trier.de/pid/52/8700.html;w/JRWen", "google_scholar": "0UyGs0YAAAAJ;vB64k4IAAAAJ;mHEUwC0AAAAJ;JNhNacoAAAAJ;tbxCHJgAAAAJ", "or_profile": "~Peiyu_Liu1;~Ze-Feng_Gao1;~Yushuo_Chen2;~Xin_Zhao10;~Ji-Rong_Wen1", "aff": "Renmin University of China;Renmin University of China;Renmin University of China;Renmin University of China;Renmin University of China", "aff_domain": "ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;ruc.edu.cn", "position": "PhD student;Postdoc;MS student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nliu2023enhancing,\ntitle={Enhancing Scalability of Pre-trained Language Models via Efficient Parameter Sharing},\nauthor={Peiyu Liu and Ze-Feng Gao and Yushuo Chen and Xin Zhao and Ji-Rong Wen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8FXeFY5487}\n}", "github": "", "project": "", "reviewers": "M4qE;v9WE;4FdP;mWkV", "site": "https://openreview.net/forum?id=8FXeFY5487", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;3;3;3", "excitement": "3;4;3;3", "reproducibility": "3;4;3;3", "correctness": "4;4;3;4", "rating_avg": 4.0, "confidence_avg": 3.25, "excitement_avg": 3.25, "reproducibility_avg": 3.25, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-6695-8209;0000-0003-3076-9518;0000-0002-8333-6196;0000-0002-9777-9676", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Renmin University of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ruc.edu.cn", "aff_unique_abbr": "RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "8FgdMHbW27", "title": "Poisoning Retrieval Corpora by Injecting Adversarial Passages", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Dense retrievers have achieved state-of-the-art performance in various information retrieval tasks, but to what extent can they be safely deployed in real-world applications? In this work, we propose a novel attack for dense retrieval systems in which a malicious user generates a small number of adversarial passages by perturbing discrete tokens to maximize similarity with a provided set of training queries. When these adversarial passages are inserted into a large retrieval corpus, we show that this attack is highly effective in fooling these systems to retrieve them for queries that were not seen by the attacker. More surprisingly, these adversarial passages can directly generalize to out-of-domain queries and corpora with a high success attack rate --- for instance, we find that 50 generated passages optimized on Natural Questions can mislead >94% of questions posed in financial documents or online forums. We also benchmark and compare a range of state-of-the-art dense retrievers, both unsupervised and supervised. Although different systems exhibit varying levels of vulnerability, we show they can all be successfully attacked by injecting up to 500 passages, a small fraction compared to a retrieval corpus of millions of passages.", "keywords": "Dense Retrieval;Corpus Poisoning;Adversarial Attack", "primary_area": "", "supplementary_material": "", "author": "Zexuan Zhong;Ziqing Huang;Alexander Wettig;Danqi Chen", "authorids": "~Zexuan_Zhong1;~Ziqing_Huang1;~Alexander_Wettig1;~Danqi_Chen1", "gender": "M;;;F", "homepage": "https://www.cs.princeton.edu/~zzhong/;;https://www.cs.princeton.edu/~awettig/;https://www.cs.princeton.edu/~danqic/", "dblp": "218/7257;;302/0235;87/7949", "google_scholar": ";;N_jSE08AAAAJ;sVR8ktkAAAAJ", "or_profile": "~Zexuan_Zhong1;~Ziqing_Huang1;~Alexander_Wettig1;~Danqi_Chen1", "aff": "Princeton University;;Princeton University;Princeton University", "aff_domain": "princeton.edu;;princeton.edu;cs.princeton.edu", "position": "PhD student;;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhong2023poisoning,\ntitle={Poisoning Retrieval Corpora by Injecting Adversarial Passages},\nauthor={Zexuan Zhong and Ziqing Huang and Alexander Wettig and Danqi Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8FgdMHbW27}\n}", "github": "", "project": "", "reviewers": "eoXC;CaDC;oe2c", "site": "https://openreview.net/forum?id=8FgdMHbW27", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;4;4", "excitement": "3;3;4", "reproducibility": "4;5;4", "correctness": "4;3;4", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;alexander-wettig/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "8IrFLWRvuW", "title": "InfoDiffusion: Information Entropy Aware Diffusion Process for Non-Autoregressive Text Generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Diffusion models have garnered considerable interest in the field of text generation. Several studies have explored text diffusion models with different structures and applied them to various tasks, including named entity recognition and summarization. However, there exists a notable disparity between the \"easy-first\" text generation process of current diffusion models and the \"keyword-first\" natural text generation process of humans, which has received limited attention. To bridge this gap, we propose InfoDiffusion, a non-autoregressive text diffusion model. Our approach introduces a \"keyinfo-first\" generation strategy and incorporates a noise schedule based on the amount of text information. In addition, InfoDiffusion combines self-conditioning with a newly proposed partially noising model structure. Experimental results show that InfoDiffusion outperforms the baseline model in terms of generation quality and diversity, as well as exhibiting higher sampling efficiency.", "keywords": "Text generation;Diffusion model;Information entropy", "primary_area": "", "supplementary_material": "", "author": "Renzhi Wang;Jing Li;Piji Li", "authorids": "~Renzhi_Wang2;~Jing_Li18;~Piji_Li1", "gender": ";F;M", "homepage": ";http://www4.comp.polyu.edu.hk/~jing1li/;http://lipiji.com/", "dblp": "152/2466-1;181/2820-49;77/8278.html", "google_scholar": "EbG5MYsAAAAJ;jvjOLx4AAAAJ;88ZlyicAAAAJ", "or_profile": "~Renzhi_Wang2;~Jing_Li18;~Piji_Li1", "aff": "Nanjing University of Aeronautics and Astronautics;The Hong Kong Polytechnic University;Nanjing University of Aeronautics and Astronautics", "aff_domain": "nuaa.edu.cn;polyu.edu.hk;nuaa.edu.cn", "position": "Undergrad student;Assistant Professor;Professor", "bibtex": "@inproceedings{\nwang2023infodiffusion,\ntitle={InfoDiffusion: Information Entropy Aware Diffusion Process for Non-Autoregressive Text Generation},\nauthor={Renzhi Wang and Jing Li and Piji Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8IrFLWRvuW}\n}", "github": "", "project": "", "reviewers": "SUds;SEBp;xSvW", "site": "https://openreview.net/forum?id=8IrFLWRvuW", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "3;3;4", "reproducibility": "4;3;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0008-7448-4593;0000-0002-8044-2284;", "linkedin": ";jing-li-b815b7a5/;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Nanjing University of Aeronautics and Astronautics;Hong Kong Polytechnic University", "aff_unique_dep": ";", "aff_unique_url": "http://www.nuaa.edu.cn;https://www.polyu.edu.hk", "aff_unique_abbr": "NUAA;PolyU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "8L5SA7ENI4", "title": "The Sentiment Problem: A Critical Survey towards Deconstructing Sentiment Analysis", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We conduct an inquiry into the sociotechnical aspects of sentiment analysis (SA) by critically examining 189 peer-reviewed papers on their applications, models, and datasets. Our investigation stems from the recognition that SA has become an integral component of diverse sociotechnical systems, exerting influence on both social and technical users. By delving into sociological and technological literature on sentiment, we unveil distinct conceptualizations of this term in domains such as finance, government, and medicine. Our study exposes a lack of explicit definitions and frameworks for characterizing sentiment, resulting in potential challenges and biases. To tackle this issue, we propose an ethics sheet encompassing critical inquiries to guide practitioners in ensuring equitable utilization of SA. Our findings underscore the significance of adopting an interdisciplinary approach to defining sentiment in SA and offer a pragmatic solution for its implementation.", "keywords": "Sentiment Analysis;Natural Language Processing;Critical Survey;Ethics in NLP;Ethics based Auditing.", "primary_area": "", "supplementary_material": "", "author": "Pranav Narayanan Venkit;Mukund Srinath;Sanjana Gautam;Saranya Venkatraman;Vipul Gupta;Rebecca J. Passonneau;Shomir Wilson", "authorids": "~Pranav_Narayanan_Venkit1;~Mukund_Srinath1;~Sanjana_Gautam1;~Saranya_Venkatraman1;~Vipul_Gupta3;~Rebecca_J._Passonneau1;~Shomir_Wilson1", "gender": "M;M;F;;M;F;M", "homepage": "https://www.pranavkit.com/;https://mukundsrinath.github.io;https://www.sanjanagautam.com/;;https://vipulgupta1011.github.io/;https://sites.psu.edu/becky/;https://shomir.net/", "dblp": "287/9127;;230/5026.html;;77/1831;04/696;98/8884", "google_scholar": "oxgNwDsAAAAJ;-1l0pUIAAAAJ;mdUH-4QAAAAJ;;QWVqqnMAAAAJ;https://scholar.google.com/citations?hl=en;0GHjBSUAAAAJ", "or_profile": "~Pranav_Narayanan_Venkit1;~Mukund_Srinath1;~Sanjana_Gautam1;~Saranya_Venkatraman1;~Vipul_Gupta3;~Rebecca_J._Passonneau1;~Shomir_Wilson1", "aff": "Pennsylvania State University;Pennsylvania State University;Pennsylvania State University;;Pennsylvania State University;Pennsylvania State University;Pennsylvania State University", "aff_domain": "psu.edu;psu.edu;psu.edu;;psu.edu;psu.edu;psu.edu", "position": "PhD student;PhD student;PhD student;;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nvenkit2023the,\ntitle={The Sentiment Problem: A Critical Survey towards Deconstructing Sentiment Analysis},\nauthor={Pranav Narayanan Venkit and Mukund Srinath and Sanjana Gautam and Saranya Venkatraman and Vipul Gupta and Rebecca J. Passonneau and Shomir Wilson},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8L5SA7ENI4}\n}", "github": "", "project": "", "reviewers": "8ryf;HzjA;tS1k", "site": "https://openreview.net/forum?id=8L5SA7ENI4", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;4;4", "reproducibility": "5;4;0", "correctness": "4;4;5", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 3.0, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-5671-0461;;0000-0002-2933-304X;;;0000-0001-8626-811X;0000-0003-1235-3754", "linkedin": "pranav-venkit/;mukund-srinath/;sanjanagautam/;;vipul-gupta-70950111b/;rebecca-j-passonneau-8967091/;shomirwilson/", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Pennsylvania State University", "aff_unique_dep": "", "aff_unique_url": "https://www.psu.edu", "aff_unique_abbr": "PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "8LSuy5nNmz", "title": "Multilingual Generation and Answering of Questions from Texts and Knowledge Graphs", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The ability to bridge Question Generation (QG) and Question Answering (QA) across structured and unstructured modalities has the potential for aiding different NLP applications. One key application is in QA-based methods that have recently been shown to be useful for automatically evaluating Natural Language (NL) texts generated from Knowledge Graphs (KG). While methods have been proposed for QG-QA across these modalities, these efforts have been in English only; in this work, we bring multilinguality (Brazilian Portuguese and Russian) to multimodal (KG and NL) QG-QA. Using synthetic data generation and machine translation to produce QG-QA data that is aligned between graph and text, we are able to train multimodal, multi-task models that can perform multimodal QG and QA in Portuguese and Russian. We show that our approach outperforms a baseline which is derived from previous work on English and adapted to handle these two languages.", "keywords": "question generation;question answering;multilinguality;multi-modality;knowledge bases;consistency", "primary_area": "", "supplementary_material": "", "author": "Kelvin Han;Claire Gardent", "authorids": "~Kelvin_Han1;~Claire_Gardent1", "gender": ";F", "homepage": ";https://members.loria.fr/CGardent/", "dblp": ";71/6819", "google_scholar": ";gHC1paQAAAAJ", "or_profile": "~Kelvin_Han1;~Claire_Gardent1", "aff": ";CNRS", "aff_domain": ";cnrs.fr", "position": ";Principal Researcher", "bibtex": "@inproceedings{\nhan2023multilingual,\ntitle={Multilingual Generation and Answering of Questions from Texts and Knowledge Graphs},\nauthor={Kelvin Han and Claire Gardent},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8LSuy5nNmz}\n}", "github": "", "project": "", "reviewers": "HqCQ;URPa;zbSx", "site": "https://openreview.net/forum?id=8LSuy5nNmz", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;2;2", "excitement": "4;3;4", "reproducibility": "4;3;3", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 2.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-3805-6662", "linkedin": ";claire-gardent-70116341/?originalSubdomain=fr", "aff_unique_index": "0", "aff_unique_norm": "Centre National de la Recherche Scientifique", "aff_unique_dep": "", "aff_unique_url": "https://www.cnrs.fr", "aff_unique_abbr": "CNRS", "aff_country_unique_index": "0", "aff_country_unique": "France" }, { "id": "8NA76tz7Jj", "title": "Data Augmentation for Code Translation with Comparable Corpora and Multiple References", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "One major challenge of translating code between programming languages is that parallel training data is often limited. To overcome this challenge, we present two data augmentation techniques, one that builds comparable corpora (i.e., code pairs with similar functionality), and another that augments existing parallel data with multiple reference translations. Specifically, we build and analyze multiple types of comparable corpora, including programs generated from natural language documentation using a code generation model. Furthermore, to reduce overfitting to a single reference translation, we automatically generate additional translation references for available parallel data and filter the translations by unit tests, which increases variation in target translations. Experiments show that our data augmentation techniques significantly improve CodeT5 for translation between Java, Python, and C++ by an average of 7.5\\% Computational Accuracy (CA@1), which verifies the correctness of translations by execution. The code is available at https://github.com/Veronicium/CMTrans.", "keywords": "Code translation;Machine learning for code", "primary_area": "", "supplementary_material": "", "author": "Yiqing Xie;Atharva Naik;Daniel Fried;Carolyn Rose", "authorids": "~Yiqing_Xie1;~Atharva_Naik1;~Daniel_Fried1;~Carolyn_Rose1", "gender": "Not Specified;M;M;F", "homepage": "https://veronicium.github.io;https://atharva-naik.github.io;https://dpfried.github.io/;http://www.cs.cmu.edu/~cprose/", "dblp": "147/6506;272/8869.html;117/4804;r/CarolynPensteinRose", "google_scholar": "200mJh8AAAAJ;wTTF4yYAAAAJ;sJDqACEAAAAJ;https://scholar.google.com.tw/citations?user=BMydCgcAAAAJ", "or_profile": "~Yiqing_Xie1;~Atharva_Naik1;~Daniel_Fried1;~Carolyn_Rose1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;School of Computer Science, Carnegie Mellon University", "aff_domain": "cmu.edu;andrew.cmu.edu;cmu.edu;cs.cmu.edu", "position": "PhD student;MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nxie2023data,\ntitle={Data Augmentation for Code Translation with Comparable Corpora and Multiple References},\nauthor={Yiqing Xie and Atharva Naik and Daniel Fried and Carolyn Rose},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8NA76tz7Jj}\n}", "github": "", "project": "", "reviewers": "PYWi;jFgc;9cbG", "site": "https://openreview.net/forum?id=8NA76tz7Jj", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;5", "excitement": "4;3;2", "reproducibility": "3;3;3", "correctness": "4;4;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0007-1759-2259;;0000-0003-1128-5155", "linkedin": ";;;carolyn-rose-11226b23b/", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "8NFU2kLql3", "title": "HANSEN: Human and AI Spoken Text Benchmark for Authorship Analysis", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "$\\textit{Authorship Analysis}$, also known as stylometry, has been an essential aspect of Natural Language Processing (NLP) for a long time. Likewise, the recent advancement of Large Language Models (LLMs) has made authorship analysis increasingly crucial for distinguishing between human-written and AI-generated texts. However, these authorship analysis tasks have primarily been focused on $\\textit{written texts}$, not considering $\\textit{spoken texts}$. Thus, we introduce the largest benchmark for spoken texts - ${\\sf HANSEN}$($\\underline{H}$uman $\\underline{AN}$d ai $\\underline{S}$poken t$\\underline{E}$xt be$\\underline{N}$chmark). ${\\sf HANSEN}$ encompasses meticulous curation of existing speech datasets accompanied by transcripts, alongside the creation of novel AI-generated spoken text datasets. Together, it comprises 17 human datasets, and AI-generated spoken texts created using 3 prominent LLMs: ChatGPT, PaLM2, and Vicuna13B. To evaluate and demonstrate the utility of ${\\sf HANSEN}$, we perform Authorship Attribution (AA) \\& Author Verification (AV) on human-spoken datasets and conducted Human vs. AI text detection using state-of-the-art (SOTA) models. \nWhile SOTA methods, such as, character n-gram or Transformer-based model, exhibit similar AA \\& AV performance in human-spoken datasets compared to written ones, there is much room for improvement in AI-generated spoken text detection. The ${\\sf HANSEN}$ benchmark is available at: https://huggingface.co/datasets/HANSEN-REPO/HANSEN", "keywords": "Authorship analysis;Spoken text;Large Language Model;AI text detection", "primary_area": "", "supplementary_material": "", "author": "Nafis Irtiza Tripto;Adaku Uchendu;Thai Le;Mattia Setzu;Fosca Giannotti;Dongwon Lee", "authorids": "~Nafis_Irtiza_Tripto1;~Adaku_Uchendu1;~Thai_Le1;~Mattia_Setzu1;~Fosca_Giannotti2;~Dongwon_Lee1", "gender": "M;F;;M;F;M", "homepage": "https://sites.google.com/view/nafis-irtiza-tripto/home;https://adauchendu.github.io/;https://lethaiq.github.io/tql3/;https://msetzu.github.io/about;https://kdd.isti.cnr.it/people/giannotti-fosca;https://pike.psu.edu/dongwon", "dblp": ";244/0488;03/9889;177/7148.html;g/FoscaGiannotti;l/DongwonLee", "google_scholar": "nQ2Hir8AAAAJ;A4be1l4AAAAJ;Fd8K7kAAAAAJ;3UL0kVUAAAAJ;PKz_a_AAAAAJ;MzL-WnEAAAAJ", "or_profile": "~Nafis_Irtiza_Tripto1;~Adaku_Uchendu1;~Thai_Le1;~Mattia_Setzu1;~Fosca_Giannotti2;~Dongwon_Lee1", "aff": "Pennsylvania State University;Pennsylvania State University;University of Mississippi;Universita' di Pisa, University of Pisa;School of Education Pisa;The Pennsylvania State University", "aff_domain": "psu.edu;psu.edu;olemiss.edu;di.unipi.it;sns.it;psu.edu", "position": "PhD student;PhD student;Assistant Professor;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\ntripto2023hansen,\ntitle={{HANSEN}: Human and {AI} Spoken Text Benchmark for Authorship Analysis},\nauthor={Nafis Irtiza Tripto and Adaku Uchendu and Thai Le and Mattia Setzu and Fosca Giannotti and Dongwon Lee},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8NFU2kLql3}\n}", "github": "", "project": "", "reviewers": "sdko;Ky6M;bJGW", "site": "https://openreview.net/forum?id=8NFU2kLql3", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;3;3", "excitement": "3;4;4", "reproducibility": "2;4;3", "correctness": "2;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-9632-6870;0000-0001-8351-9999;0000-0003-3099-3835;0000-0001-8371-7629", "linkedin": "nafis-irtiza-tripto/;;;mattia-setzu-062b01158/;;", "aff_unique_index": "0;0;1;2;2;0", "aff_unique_norm": "Pennsylvania State University;University of Mississippi;University of Pisa", "aff_unique_dep": ";;", "aff_unique_url": "https://www.psu.edu;https://www.olemiss.edu;https://www.unipi.it", "aff_unique_abbr": "PSU;UM;UniPi", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pisa", "aff_country_unique_index": "0;0;0;1;1;0", "aff_country_unique": "United States;Italy" }, { "id": "8PNFSDJ3md", "title": "Empower Nested Boolean Logic via Self-Supervised Curriculum Learning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Beyond the great cognitive powers showcased by language models, it is crucial to scrutinize whether their reasoning capabilities stem from strong generalization or merely exposure to relevant data. As opposed to constructing increasingly complex logic, this paper probes into the boolean logic, the root capability of a logical reasoner. We find that any pre-trained language models even including large language models only behave like a random selector in the face of multi-nested boolean logic, a task that humans can handle with ease. To empower language models with this fundamental capability, this paper proposes a new self-supervised learning method Curriculum Logical Reasoning (Clr), where we augment the training data with nested boolean logic chain step-by-step, and program the training from simpler logical patterns gradually to harder ones. This new training paradigm allows language models to effectively generalize to much harder and longer-hop logic, which can hardly be learned through naive training. Furthermore, we show that boolean logic is a great foundation for improving the subsequent general logical tasks.", "keywords": "nested boolean logic;curriculum learning", "primary_area": "", "supplementary_material": "", "author": "Hongqiu Wu;Linfeng Liu;hai zhao;Min Zhang", "authorids": "~Hongqiu_Wu1;~Linfeng_Liu5;~hai_zhao1;~Min_Zhang9", "gender": "M;M;M;M", "homepage": "https://gingasan.github.io;https://github.com/Claude-Liu;http://bcmi.sjtu.edu.cn/~zhaohai/;https://zhangmin-nlp-ai.github.io/", "dblp": ";84/1540-3;25/1145-1.html;83/5342-5", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.com.tw/citations?user=4dU5KS0AAAAJ;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Hongqiu_Wu1;~Linfeng_Liu5;~hai_zhao1;~Min_Zhang9", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Harbin Institute of Technology, Shenzhen", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;hit.edu.cn", "position": "PhD student;Undergrad student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwu2023empower,\ntitle={Empower Nested Boolean Logic via Self-Supervised Curriculum Learning},\nauthor={Hongqiu Wu and Linfeng Liu and hai zhao and Min Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8PNFSDJ3md}\n}", "github": "", "project": "", "reviewers": "sk6i;hbvC;ZqCq", "site": "https://openreview.net/forum?id=8PNFSDJ3md", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;2", "excitement": "4;4;4", "reproducibility": "4;5;4", "correctness": "3;5;3", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Shanghai Jiao Tong University;Harbin Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;http://en.hhit.edu.cn/", "aff_unique_abbr": "SJTU;HIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "8POQ904HEc", "title": "HeQ: a Large and Diverse Hebrew Reading Comprehension Benchmark", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Current benchmarks for Hebrew Natural Language Processing (NLP) focus mainly on morpho-syntactic tasks, neglecting the semantic dimension of language understanding. To bridge this gap, we set out to deliver a Hebrew Machine Reading Comprehension (MRC) dataset, where MRC is to be realized as extractive Question Answering.\nThe morphologically-rich nature of Hebrew poses a challenge to this endeavor: the indeterminacy and non-transparency of span boundaries in morphologically complex forms lead to annotation inconsistencies, disagreements, and flaws of standard evaluation metrics.\nTo remedy this, we devise a novel set of guidelines, a controlled crowdsourcing protocol, and revised evaluation metrics, that are suitable for the morphologically rich nature of the language.\nOur resulting benchmark, HeQ (Hebrew QA), features 30,147 diverse question-answer pairs derived from both Hebrew Wikipedia articles and Israeli tech news. Our empirical investigation reveals that standard evaluation metrics such as F1 Scores and Exact Match (EM) are not appropriate for Hebrew (and other MRLs), and we propose a relevant enhancement. In addition, our experiments show low correlation between models' performance on morpho-syntactic tasks and on MRC, which suggests that models that are designed for the former might underperform on semantic-heavy tasks. The development and exploration of HeQ illustrate some of the challenges MRLs pose in natural language understanding (NLU), fostering progression towards more and better NLU models for Hebrew and other MRLs.", "keywords": "hebrew;machine reading comprehension;question answering;dataset", "primary_area": "", "supplementary_material": "", "author": "Amir David Nissan Cohen;Hilla Merhav-Fine;Yoav Goldberg;Reut Tsarfaty", "authorids": "~Amir_David_Nissan_Cohen1;~Hilla_Merhav-Fine1;~Yoav_Goldberg1;~Reut_Tsarfaty1", "gender": "M;M;F;F", "homepage": "http://www.cs.technion.ac.il/~amirc/;https://www.cs.biu.ac.il/~yogo;;", "dblp": ";68/5296;21/3716;", "google_scholar": "KRkQizcAAAAJ;https://scholar.google.co.il/citations?user=0rskDKgAAAAJ;;", "or_profile": "~Amir_David_Nissan_Cohen1;~Yoav_Goldberg1;~Reut_Tsarfaty1;~Hilla_Merhav_Fine1", "aff": "Bar Ilan University;Allen Institute for Artificial Intelligence;Bar-Ilan University, Technion;Tel Aviv University", "aff_domain": "biu.ac.il;allenai.org;biu.ac.il;tau.ac.il", "position": "PhD student;Principal Researcher;Associate Professor;MS student", "bibtex": "@inproceedings{\ncohen2023heq,\ntitle={HeQ: a Large and Diverse Hebrew Reading Comprehension Benchmark},\nauthor={Amir David Nissan Cohen and Hilla Merhav-Fine and Yoav Goldberg and Reut Tsarfaty},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8POQ904HEc}\n}", "github": "", "project": "", "reviewers": "ujfx;mCKP;cQ9w", "site": "https://openreview.net/forum?id=8POQ904HEc", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;2", "excitement": "4;3;3", "reproducibility": "4;5;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;hilla-merhav-fine-552ba216a", "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Bar-Ilan University;Allen Institute for Artificial Intelligence;Tel Aviv University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.biu.ac.il;https://allenai.org;https://www.tau.ac.il", "aff_unique_abbr": "BIU;AI2;TAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Israel;United States" }, { "id": "8Rif7M7Z6A", "title": "DepNeCTI: Dependency-based Nested Compound Type Identification for Sanskrit", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Multi-component compounding is a prevalent phenomenon in Sanskrit, and understanding the implicit structure of a compound\u2019s components is crucial for deciphering its meaning. Earlier approaches in Sanskrit have focused on binary compounds and neglected the multi-component compound setting. This work introduces the novel task of nested compound type identification (NeCTI), which aims to identify nested spans of a multi-component compound and decode the implicit semantic relations between them. To the best of our knowledge, this is the first attempt in the field of lexical semantics to propose this task. \n\nWe present 2 newly annotated datasets including an out-of-domain dataset for this task. We also benchmark these datasets by exploring the efficacy of the standard problem formulations such as nested named entity recognition, constituency parsing and seq2seq, etc. We present a novel framework named DepNeCTI: Dependency-based Nested Compound Type Identifier that surpasses the performance of the best baseline with an average absolute improvement of 13.1 points F1-score in terms of Labeled Span Score (LSS) and a 5-fold enhancement in inference efficiency. In line with the previous findings in the binary Sanskrit compound identification task, context provides benefits for the NeCTI task. The codebase and datasets are publicly available at: https://github.com/yaswanth-iitkgp/DepNeCTI", "keywords": "Nested compound type identification for Sanskrit;word-level lexical semantics;newly annotated dataset;benchmarking and dependency-based novel framework.", "primary_area": "", "supplementary_material": "", "author": "Jivnesh Sandhan;Yaswanth Narsupalli;Sreevatsa Muppirala;Sriram Krishnan;Pavankumar Satuluri;Amba Kulkarni;Pawan Goyal", "authorids": "~Jivnesh_Sandhan1;~Yaswanth_Narsupalli1;~Sreevatsa_Muppirala1;~Sriram_Krishnan2;~Pavankumar_Satuluri2;~Amba_Kulkarni1;~Pawan_Goyal1", "gender": "M;M;;M;M;F;M", "homepage": "https://jivnesh.github.io;;;;https://www.cvv.ac.in/faculties/dr-pavankumar-satuluri;http://sanskrit.uohyd.ac.in/faculty/amba;http://cse.iitkgp.ac.in/~pawang/", "dblp": ";359/0633.html;;;;https://dblp.uni-trier.de/pid/78/4303.html;77/2307-2", "google_scholar": ";0xLN_4gAAAAJ;;https://scholar.google.com/citations?hl=en;8OVJdlcAAAAJ;7I_hLzQAAAAJ;https://scholar.google.com.tw/citations?user=F14FHsIAAAAJ", "or_profile": "~Jivnesh_Sandhan1;~Yaswanth_Narsupalli1;~Sreevatsa_Muppirala1;~Sriram_Krishnan2;~Pavankumar_Satuluri2;~Amba_Kulkarni1;~Pawan_Goyal1", "aff": "Indian Institute of Technology, Kanpur;IIT Kharagpur;;University of Hyderabad;;University of Hyderabad;IIT Kharagpur", "aff_domain": "iitk.ac.in;iitkgp.ac.in;;uohyd.ernet.in;;uohyd.ernet.in;cse.iitkgp.ac.in", "position": "PhD student;Undergrad student;;PhD student;;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nsandhan2023depnecti,\ntitle={DepNe{CTI}: Dependency-based Nested Compound Type Identification for Sanskrit},\nauthor={Jivnesh Sandhan and Yaswanth Narsupalli and Sreevatsa Muppirala and Sriram Krishnan and Pavankumar Satuluri and Amba Kulkarni and Pawan Goyal},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8Rif7M7Z6A}\n}", "github": "", "project": "", "reviewers": "8vVf;7qSR;TGub", "site": "https://openreview.net/forum?id=8Rif7M7Z6A", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;2;4", "excitement": "4;2;3", "reproducibility": "4;4;3", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0001-7364-8188;;;;0000-0001-7617-3918;", "linkedin": ";narsupalli-yaswanth/;;;;amba-anantpur-rao-kulkarni-2bb8868/;", "aff_unique_index": "0;1;2;2;1", "aff_unique_norm": "Indian Institute of Technology Kanpur;Indian Institute of Technology Kharagpur;University of Hyderabad", "aff_unique_dep": ";;", "aff_unique_url": "https://www.iitk.ac.in;https://www.iitkgp.ac.in;https://www.uohyd.ac.in", "aff_unique_abbr": "IIT Kanpur;IIT KGP;UoH", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Kanpur;Kharagpur;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "India" }, { "id": "8UWPQboDq9", "title": "Multi-Task Learning of Query Generation and Classification for Generative Conversational Question Rewriting", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In conversational search settings, users ask questions and receive answers as part of a conversation. The ambiguity in the questions is a common challenge, which can be effectively addressed by leveraging contextual information from the conversation history. In this context, determining topic continuity and reformulating questions into well-defined queries are crucial tasks. Previous approaches have typically addressed these tasks either as a classification task in the case of topic continuity or as a text generation task for question reformulation. However, no prior work has combined both tasks to effectively identify ambiguous questions as part of a conversation. In this paper, we propose a Multi-Task Learning (MTL) approach that uses a text generation model for both question rewriting and classification. Our models, based on BART and T5, are trained to rewrite conversational questions and identify follow-up questions simultaneously. We evaluate our approach on multiple test sets and demonstrate that it outperforms single-task learning baselines on the three LIF test sets, with statistically significant improvements ranging from +3.5% to +10.5% in terms of F1 and Micro-F1 scores. We also show that our approach outperforms single-task question rewriting models in passage retrieval on a large OR-QuAC test set.", "keywords": "multi-task learning;follow-up question identification;conversational question rewriting;text generation model", "primary_area": "", "supplementary_material": "", "author": "Sarawoot Kongyoung;Craig MacDonald;Iadh Ounis", "authorids": "~Sarawoot_Kongyoung1;~Craig_MacDonald1;~Iadh_Ounis2", "gender": "M;M;M", "homepage": ";http://www.dcs.gla.ac.uk/~craigm/publications.shtml;https://www.gla.ac.uk/schools/computing/staff/iadhounis/", "dblp": "05/4342;02/2224;21/141", "google_scholar": ";IBjMKHQAAAAJ;rKQMXOEAAAAJ", "or_profile": "~Sarawoot_Kongyoung1;~Craig_MacDonald1;~Iadh_Ounis2", "aff": "University of Glasgow;University of Glasgow;University of Glasgow", "aff_domain": "gla.ac.uk;gla.ac.uk;glasgow.ac.uk", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nkongyoung2023multitask,\ntitle={Multi-Task Learning of Query Generation and Classification for Generative Conversational Question Rewriting},\nauthor={Sarawoot Kongyoung and Craig MacDonald and Iadh Ounis},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8UWPQboDq9}\n}", "github": "", "project": "", "reviewers": "Y11v;yEsQ;PJ74", "site": "https://openreview.net/forum?id=8UWPQboDq9", "pdf_size": 0, "rating": "2;2;2", "confidence": "5;2;4", "excitement": "3;3;3", "reproducibility": "3;5;4", "correctness": "3;4;3", "rating_avg": 2.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-3143-279X;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Glasgow", "aff_unique_dep": "", "aff_unique_url": "https://www.gla.ac.uk", "aff_unique_abbr": "Glasgow", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "8VK9XXgFHp", "title": "A Read-and-Select Framework for Zero-shot Entity Linking", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Zero-shot entity linking (EL) aims at aligning entity mentions to unseen entities to challenge the generalization ability. Previous methods largely focus on the candidate retrieval stage and ignore the essential candidate ranking stage, which disambiguates among entities and makes the final linking prediction. In this paper, we propose a read-and-select (ReS) framework by modeling the main components of entity disambiguation, i.e., mention-entity matching and cross-entity comparison. First, for each candidate, the reading module leverages mention context to output mention-aware entity representations, enabling mention-entity matching. Then, in the selecting module, we frame the choice of candidates as a sequence labeling problem, and all candidate representations are fused together to enable cross-entity comparison. Our method achieves the state-of-the-art performance on the established zero-shot EL dataset ZESHEL with a 2.55\\% micro-average accuracy gain, with no need for laborious multi-phase pre-training used in most of the previous work, showing the effectiveness of both mention-entity and cross-entity interaction.", "keywords": "entity linking;zero-shot learning", "primary_area": "", "supplementary_material": "", "author": "Zhenran Xu;Yulin Chen;Baotian Hu;Min Zhang", "authorids": "~Zhenran_Xu1;~Yulin_Chen3;~Baotian_Hu1;~Min_Zhang9", "gender": ";M;M;M", "homepage": ";https://github.com/LukeChen-go;;https://zhangmin-nlp-ai.github.io/", "dblp": "322/2310;;155/1902;83/5342-5", "google_scholar": "1m5X_28AAAAJ;AtlFU3kAAAAJ;5NiJ1VoAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Zhenran_Xu1;~Yulin_Chen3;~Baotian_Hu1;~Min_Zhang9", "aff": "Harbin Institute of Technology, Shenzhen;Harbin Institute of Technology(Shenzhen);Harbin Institute of Technology, Shenzhen;Harbin Institute of Technology, Shenzhen", "aff_domain": "hit.edu.cn;stu.hit.edu.cn;hhit.edu.cn;hit.edu.cn", "position": "PhD student;Undergrad student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nxu2023a,\ntitle={A Read-and-Select Framework for Zero-shot Entity Linking},\nauthor={Zhenran Xu and Yulin Chen and Baotian Hu and Min Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8VK9XXgFHp}\n}", "github": "", "project": "", "reviewers": "WPJY;cEiX;kXSc", "site": "https://openreview.net/forum?id=8VK9XXgFHp", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;4;4", "excitement": "2;3;3", "reproducibility": "4;5;4", "correctness": "1;3;3", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 2.3333333333333335, "replies_avg": 9, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-5536-806X;;0000-0001-7490-684X;", "linkedin": "zhenran-xu/;;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Harbin Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://en.hhit.edu.cn/", "aff_unique_abbr": "HIT", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Shenzhen", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "8WXwPUBFEb", "title": "Reading Order Matters: Information Extraction from Visually-rich Documents by Token Path Prediction", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recent advances in multimodal pre-trained models have significantly improved information extraction from visually-rich documents (VrDs), in which named entity recognition (NER) is treated as a sequence-labeling task of predicting the BIO entity tags for tokens, following the typical setting of NLP. However, BIO-tagging scheme relies on the correct order of model inputs, which is not guaranteed in real-world NER on scanned VrDs where text are recognized and arranged by OCR systems. Such reading order issue hinders the accurate marking of entities by BIO-tagging scheme, making it impossible for sequence-labeling methods to predict correct named entities. To address the reading order issue, we introduce Token Path Prediction (TPP), a simple prediction head to predict entity mentions as token sequences within documents. Alternative to token classification, TPP models the document layout as a complete directed graph of tokens, and predicts token paths within the graph as entities. For better evaluation of VrD-NER systems, we also propose two revised benchmark datasets of NER on scanned documents which can reflect real-world scenarios. Experiment results demonstrate the effectiveness of our method, and suggest its potential to be a universal solution to various information extraction tasks on documents.", "keywords": "visually-rich document understanding;information extraction;named entity recognition", "primary_area": "", "supplementary_material": "", "author": "Chong Zhang;Ya Guo;Yi Tu;Huan Chen;Jinyang Tang;Huijia Zhu;Qi Zhang;Tao Gui", "authorids": "~Chong_Zhang3;~Ya_Guo2;~Yi_Tu2;~Huan_Chen2;~Jinyang_Tang1;~Huijia_Zhu1;~Qi_Zhang8;~Tao_Gui1", "gender": "M;M;M;M;M;F;M;M", "homepage": ";https://homepage.com;https://homepage.com;https://www.linkedin.com/in/huan-chen-824a7b254/;https://github.com/AdolphTang;https://scholar.google.com/citations?hl=zh-CN&user=DT-cyucAAAAJ;http://qizhang.info;", "dblp": "74/3128-;;;;;50/7121;52/323-1;135/6973", "google_scholar": "Fd3aeGEAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;;;https://scholar.google.com/citations?hl=zh-CN;XfqR3yYAAAAJ;", "or_profile": "~Chong_Zhang3;~Ya_Guo2;~Yi_Tu2;~Huan_Chen2;~Jinyang_Tang1;~Huijia_Zhu1;~Qi_Zhang8;~Tao_Gui1", "aff": "Fudan University;Ant Group;Ant Group;Alibaba Group;;Ant Group;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;antgroup.com;antgroup.com;antgroup.com;;antgroup.com;fudan.edu.cn;fudan.edu.cn", "position": "PhD student;Researcher;Researcher;Engineer;;Researcher;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023reading,\ntitle={Reading Order Matters: Information Extraction from Visually-rich Documents by Token Path Prediction},\nauthor={Chong Zhang and Ya Guo and Yi Tu and Huan Chen and Jinyang Tang and Huijia Zhu and Qi Zhang and Tao Gui},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8WXwPUBFEb}\n}", "github": "", "project": "", "reviewers": "crwh;bY5f;kJnu", "site": "https://openreview.net/forum?id=8WXwPUBFEb", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;4", "excitement": "5;3;4", "reproducibility": "5;3;4", "correctness": "5;3;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0002-6083-7593;0000-0002-9242-493X;0000-0002-2184-4443;0000-0002-7280-2956;;0009-0008-5784-7225;;", "linkedin": ";;;https://www.linkedin.cn/incareer/in/ACoAAD7OfdwBzQSjOAHjerACg1s5MwBzWe_YFz8;;;;", "aff_unique_index": "0;1;1;2;1;0;0", "aff_unique_norm": "Fudan University;Ant Group;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.fudan.edu.cn;https://www.antgroup.com;https://www.alibaba.com", "aff_unique_abbr": "Fudan;Ant Group;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "8Y9G7579DP", "title": "You Told Me That Joke Twice: A Systematic Investigation of Transferability and Robustness of Humor Detection Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In this study, we focus on automatic humor detection, a highly relevant task for conversational AI. To date, there are several English datasets for this task, but little research on how models trained on them generalize and behave in the wild. To fill this gap, we carefully analyze existing datasets, train RoBERTa-based and Na\u00efve Bayes classifiers on each of them, and test on the rest. Training and testing on the same dataset yields good results, but the transferability of the models varies widely. Models trained on datasets with jokes from different sources show better transferability, while the amount of training data has a smaller impact. The behavior of the models on out-of-domain data is unstable, suggesting that some of the models overfit, while others learn non-specific humor characteristics. An adversarial attack shows that models trained on pun datasets are less robust. We also evaluate the sense of humor of the chatGPT and Flan-UL2 models in a zero-shot scenario. The LLMs demonstrate competitive results on humor datasets and a more stable behavior on out-of-domain data. We believe that the obtained results will facilitate the development of new datasets and evaluation methodologies in the field of computational humor. We've made all the data from the study and the trained models publicly available at https://github.com/Humor-Research/Humor-detection.", "keywords": "humor detection;evaluation;transferability;robustness", "primary_area": "", "supplementary_material": "", "author": "Alexander Baranov;Vladimir Kniazhevsky;Pavel Braslavski", "authorids": "~Alexander_Baranov1;~Vladimir_Kniazhevsky1;~Pavel_Braslavski1", "gender": "M;;", "homepage": ";;", "dblp": ";;", "google_scholar": ";;", "or_profile": "~Alexander_Baranov1;~Vladimir_Kniazhevsky1;~Pavel_Braslavski1", "aff": "Higher School of Economics;Higher School of Economics, Higher School of Economics;", "aff_domain": "hse.ru;edu.hse.ru;", "position": "PhD student;Undergrad student;", "bibtex": "@misc{\nanonymous2024you,\ntitle={You Told Me That Joke Twice: A Systematic Investigation of Transferability and Robustness of Humor Detection Models},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=8Y9G7579DP}\n}", "github": "", "project": "", "reviewers": "3afp;ff1i;JhfG", "site": "https://openreview.net/forum?id=8Y9G7579DP", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;5", "excitement": "4;4;3", "reproducibility": "4;4;5", "correctness": "4;5;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-9549-0265;0009-0009-4300-3950;", "linkedin": ";;", "aff_unique_index": "0;0", "aff_unique_norm": "Higher School of Economics", "aff_unique_dep": "", "aff_unique_url": "https://www.hse.ru", "aff_unique_abbr": "HSE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Russian Federation" }, { "id": "8cRL5fPwUI", "title": "Time-Aware Language Modeling for Historical Text Dating", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Automatic text dating(ATD) is a challenging task since explicit temporal mentions usually do not appear in texts. Existing state-of-the-art approaches learn word representations via language models, whereas most of them ignore diachronic change of words, which may affect the efforts of text modeling. Meanwhile, few of them consider text modeling for long diachronic documents. In this paper, we present a time-aware language model named TALM, to learn temporal word representations by transferring language models of general domains to those of time-specific ones. We also build a hierarchical modeling approach to represent diachronic documents by encoding them with temporal word representations. Experiments on a Chinese diachronic corpus show that our model effectively captures implicit temporal information of words, and outperforms state-of-the-art approaches in historical text dating as well.", "keywords": "Text Dating;Diachronic Text Evaluation;Time-Aware Language Model;Temporal Adaption;Hierarchical Model", "primary_area": "", "supplementary_material": "", "author": "Han Ren;Hai Wang;Yajie Zhao;Yafeng Ren", "authorids": "~Han_Ren1;~Hai_Wang11;~Yajie_Zhao2;~Yafeng_Ren2", "gender": ";;;M", "homepage": ";;;", "dblp": ";;;https://dblp.uni-trier.de/pid/153/9616", "google_scholar": ";;;", "or_profile": "~Han_Ren1;~Hai_Wang11;~Yajie_Zhao2;~Yafeng_Ren2", "aff": ";;Guangdong University of Foreign Studies;", "aff_domain": ";;gdufs.edu.cn;", "position": ";;MS student;", "bibtex": "@inproceedings{\nren2023timeaware,\ntitle={Time-Aware Language Modeling for Historical Text Dating},\nauthor={Han Ren and Hai Wang and Yajie Zhao and Yafeng Ren},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8cRL5fPwUI}\n}", "github": "", "project": "", "reviewers": "8wH2;KmTN;izi8;WaJ5", "site": "https://openreview.net/forum?id=8cRL5fPwUI", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;4;4;4", "excitement": "2;4;4;4", "reproducibility": "3;3;3;4", "correctness": "2;4;4;4", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 3.5, "reproducibility_avg": 3.25, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0009-0000-6253-2289;", "linkedin": ";;;", "aff_unique_index": "0", "aff_unique_norm": "Guangdong University of Foreign Studies", "aff_unique_dep": "", "aff_unique_url": "http://www.gdufs.edu.cn", "aff_unique_abbr": "GDUFS", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "8e9aFrksRq", "title": "Semantic Decomposition of Question and SQL for Text-to-SQL Parsing", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Text-to-SQL semantic parsing faces challenges in generalizing to cross-domain and complex queries. Recent research has employed a question decomposition strategy to enhance the parsing of complex SQL queries.However, this strategy encounters two major obstacles: (1) existing datasets lack question decomposition; \n(2) due to the syntactic complexity of SQL, most complex queries cannot be disentangled into sub-queries that can be readily recomposed.\n\nTo address these challenges, we propose a new modular Query Plan Language (QPL) that systematically decomposes SQL queries into simple and regular sub-queries. We develop a translator from SQL to QPL by leveraging analysis of SQL server query optimization plans, and we augment the Spider dataset with QPL programs. \n\nExperimental results demonstrate that the modular nature of QPL benefits existing semantic-parsing architectures, \nand training text-to-QPL parsers is more effective than text-to-SQL parsing for semantically equivalent queries.\n\nThe QPL approach offers two additional advantages: (1) QPL programs can be paraphrased as simple questions, which allows us to create a dataset of (complex question, decomposed questions). Training on this dataset, we obtain a Question Decomposer for data retrieval that is sensitive to database schemas. \n(2) QPL is more accessible to non-experts for complex queries, leading to more interpretable output from the semantic parser.", "keywords": "semantic parsing;text-to-sql;question decomposition;compositionality", "primary_area": "", "supplementary_material": "", "author": "Ben Eyal;Moran Mahabi;Ophir Haroche;Amir Bachar;Michael Elhadad", "authorids": "~Ben_Eyal1;~Moran_Mahabi1;~Ophir_Haroche1;~Amir_Bachar1;~Michael_Elhadad1", "gender": "M;;M;;M", "homepage": ";;https://github.com/ophirbh;;https://www.cs.bgu.ac.il/~elhadad/", "dblp": ";;;;69/1744", "google_scholar": ";;;;https://scholar.google.com.tw/citations?user=Is0pLz0AAAAJ", "or_profile": "~Ben_Eyal1;~Moran_Mahabi1;~Ophir_Haroche1;~Amir_Bachar1;~Michael_Elhadad1", "aff": "Ben-Gurion University of the Negev;;Bar-Ilan University;Ben-Gurion University of the Negev;Ben Gurion University of the Negev", "aff_domain": "cs.bgu.ac.il;;biu.ac.il;bgu.ac.il;bgu.ac.il", "position": "PhD student;;MS student;MS student;Full Professor", "bibtex": "@inproceedings{\neyal2023semantic,\ntitle={Semantic Decomposition of Question and {SQL} for Text-to-{SQL} Parsing},\nauthor={Ben Eyal and Moran Mahabi and Ophir Haroche and Amir Bachar and Michael Elhadad},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8e9aFrksRq}\n}", "github": "", "project": "", "reviewers": "zf7K;wWqA;fkDx", "site": "https://openreview.net/forum?id=8e9aFrksRq", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "3;4;3", "reproducibility": "3;5;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-5629-2351", "linkedin": "beneyal/;;;amirbachar;", "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Ben-Gurion University of the Negev;Bar-Ilan University;Ben Gurion University of the Negev", "aff_unique_dep": ";;", "aff_unique_url": "https://www.bgu.ac.il;https://www.biu.ac.il;https://www.bgu.ac.il", "aff_unique_abbr": "BGU;BIU;BGU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Israel" }, { "id": "8gYRHspcxK", "title": "Aligning Large Language Models through Synthetic Feedback", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Aligning large language models (LLMs) to human values has become increasingly important as it enables sophisticated steering of LLMs. However, it requires significant human demonstrations and feedback or distillation from proprietary LLMs such as ChatGPT.\nIn this work, we propose a novel alignment learning framework with synthetic feedback not dependent on extensive human annotations and proprietary LLMs. First, we perform reward modeling (RM) with synthetic feedback by contrasting responses from vanilla LLMs with various sizes and prompts. Then, we use the RM to simulate high-quality demonstrations to train a supervised policy and further optimize the model with reinforcement learning. Our resulting model, Aligned Language Model with Synthetic Training dataset (ALMoST), outperforms recent open-sourced models, which are trained on the outputs of InstructGPT or human-annotated demonstrations, in alignment benchmarks. In human evaluation, our model is preferred to Alpaca and Dolly-v2, 55.0% and 58.5% of the time, respectively. Further analyses demonstrate the efficacy and importance of synthetic feedback in our framework.", "keywords": "Alignment Learning;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "Sungdong Kim;Sanghwan Bae;Jamin Shin;Soyoung Kang;Donghyun Kwak;Kang Min Yoo;Minjoon Seo", "authorids": "~Sungdong_Kim1;~Sanghwan_Bae1;~Jamin_Shin1;~Soyoung_Kang1;~Donghyun_Kwak1;~Kang_Min_Yoo2;~Minjoon_Seo1", "gender": ";M;M;;M;M;M", "homepage": ";https://www.notion.so/Baaesh-8a26997d08a746b89daefbb53199c77f;https://jayshin.xyz;;https://newsight.tistory.com/;;https://seominjoon.github.io", "dblp": "118/1568;227/3411;225/5387;94/9279;265/6288;163/5657;149/1367", "google_scholar": "xKrSnDoAAAAJ;twQYpokAAAAJ;GuBHIwsAAAAJ;;MROzd8gAAAAJ;BqaWtH8AAAAJ;zYze5fIAAAAJ", "or_profile": "~Sungdong_Kim1;~Sanghwan_Bae1;~Jamin_Shin1;~Soyoung_Kang1;~Donghyun_Kwak1;~Kang_Min_Yoo2;~Minjoon_Seo1", "aff": "NAVER;NAVER;NAVER;NAVER Cloud;NAVER;NAVER;Twelve Labs", "aff_domain": "navercorp.com;navercorp.com;navercorp.com;navercorp.com;navercorp.com;navercorp.com;twelvelabs.io", "position": "Researcher;Researcher;Research Scientist;Researcher;Researcher;Researcher;Chief Scientist", "bibtex": "@inproceedings{\nkim2023aligning,\ntitle={Aligning Large Language Models through Synthetic Feedback},\nauthor={Sungdong Kim and Sanghwan Bae and Jamin Shin and Soyoung Kang and Donghyun Kwak and Kang Min Yoo and Minjoon Seo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8gYRHspcxK}\n}", "github": "", "project": "", "reviewers": "Zsnr;Pu1N;NN7k", "site": "https://openreview.net/forum?id=8gYRHspcxK", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "4;4;3", "reproducibility": "4;3;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;", "linkedin": ";baaesh;jayshin94/;;donghyun-kwak/;;minjoon-seo/", "aff_unique_index": "0;0;0;0;0;0;1", "aff_unique_norm": "NAVER Corporation;Twelve Labs", "aff_unique_dep": ";", "aff_unique_url": "https://www.naver.com;https://twelvelabs.com", "aff_unique_abbr": "NAVER;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;1", "aff_country_unique": "South Korea;United States" }, { "id": "8iB0FJmOfV", "title": "q2d: Turning Questions into Dialogs to Teach Models How to Search", "track": "main", "status": "Long Main", "tldr": "", "abstract": "One of the exciting capabilities of recent language models for dialog is their ability to independently search for relevant information to ground a given dialog response. However, obtaining training data to teach models how to issue search queries is time and resource consuming.\nIn this work, we propose $q2d$: an automatic data generation pipeline that generates information-seeking dialogs from questions. We prompt a large language model (PaLM) to create conversational versions of question answering datasets, and use it to improve query generation models that communicate with external search APIs to ground dialog responses. Unlike previous approaches which relied on human written dialogs with search queries, our method allows to automatically generate query-based grounded dialogs with better control and scale.\nOur experiments demonstrate that: (1) For query generation on the QReCC dataset, models trained on our synthetically-generated data achieve 90%-97% of the performance of models trained on the human-generated data; (2) We can successfully generate data for training dialog models in new domains without any existing dialog data as demonstrated on the multi-hop MuSiQue and Bamboogle QA datasets. (3) We perform a thorough analysis of the generated dialogs showing that humans find them of high quality and struggle to distinguish them from human-written dialogs.", "keywords": "Large language models;dialog generation;query generation;external search API;synthetic training data;QReCC dataset;information-seeking dialogs;q2d;data generation pipeline;synthetic dialogs;human-generated dialogs;grounded responses;anaphora;outdated information;hallucinations;factually consistent responses;multi-hop QA;PaLM", "primary_area": "", "supplementary_material": "", "author": "Yonatan Bitton;Shlomi Cohen-Ganor;Ido Hakimi;Yoad Lewenberg;Roee Aharoni;Enav Weinreb", "authorids": "~Yonatan_Bitton1;~Shlomi_Cohen-Ganor1;~Ido_Hakimi1;~Yoad_Lewenberg1;~Roee_Aharoni1;~Enav_Weinreb1", "gender": "M;M;;;M;M", "homepage": "https://yonatanbitton.github.io/;;;;http://www.roeeaharoni.com;https://www.linkedin.com/in/enav-weinreb-2137a611/", "dblp": "277/7042;;245/9227;131/5357;148/9506;", "google_scholar": "P9Fpf4sAAAAJ;;https://scholar.google.com/citations?hl=en;wPz9NscAAAAJ;https://scholar.google.co.il/citations?user=wV0mHWgAAAAJ;", "or_profile": "~Yonatan_Bitton1;~Shlomi_Cohen-Ganor1;~Ido_Hakimi1;~Yoad_Lewenberg1;~Roee_Aharoni1;~Enav_Weinreb1", "aff": "Hebrew University of Jerusalem;Google;Google;Research, Google;Google;", "aff_domain": "huji.ac.il;google.com;google.com;research.google.com;google.com;", "position": "PhD student;Researcher;Researcher;Researcher;Researcher;", "bibtex": "@inproceedings{\nbitton2023qd,\ntitle={q2d: Turning Questions into Dialogs to Teach Models How to Search},\nauthor={Yonatan Bitton and Shlomi Cohen-Ganor and Ido Hakimi and Yoad Lewenberg and Roee Aharoni and Enav Weinreb},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8iB0FJmOfV}\n}", "github": "", "project": "", "reviewers": "exvd;5Dsm;GsVR", "site": "https://openreview.net/forum?id=8iB0FJmOfV", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;4;4", "excitement": "4;4;4", "reproducibility": "5;4;4", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": "yonatanbitton/;shlomi-cohen-ganor-2931a38/;idohakimi/;;roeeaharoni;", "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Hebrew University of Jerusalem;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.huji.ac.il;https://www.google.com", "aff_unique_abbr": "HUJI;Google", "aff_campus_unique_index": "0;1;1;1;1", "aff_campus_unique": "Jerusalem;Mountain View", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "Israel;United States" }, { "id": "8l2m7jctGv", "title": "Focus on the Core: Efficient Attention via Pruned Token Compression for Document Classification", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Transformer-based models have achieved dominant performance in numerous NLP tasks. Despite their remarkable successes, pre-trained transformers such as BERT suffer from a computationally expensive self-attention mechanism that interacts with all tokens, including the ones unfavorable to classification performance. To overcome these challenges, we propose integrating two strategies: token pruning and token combining. Token pruning eliminates less important tokens in the attention mechanism\u2019s key and value as they pass through the layers. Additionally, we adopt fuzzy logic to handle uncertainty and alleviate potential mispruning risks arising from an imbalanced distribution of each token\u2019s importance. Token combining, on the other hand, condenses input sequences into smaller sizes in order to further compress the model. By integrating these two approaches, we not only improve the model\u2019s performance but also reduce its computational demands. Experiments with various datasets demonstrate superior performance compared to baseline models, especially with the best improvement over the existing BERT model, achieving +5%p in accuracy and +5.6%p in F1 score. Additionally, memory cost is reduced to 0.61x, and a speedup of 1.64x is achieved.", "keywords": "document classification;pre-trained transformer;attention;token pruning;token combining", "primary_area": "", "supplementary_material": "", "author": "JungMin Yun;MiHyeon Kim;YoungBin Kim", "authorids": "~JungMin_Yun3;~MiHyeon_Kim3;~YoungBin_Kim1", "gender": ";F;M", "homepage": "https://github.com/Jungmin-YUN-0;https://www.notion.so/cauiipl/IIPL-3f2c561d0e874f80bd1d9ec15cb4668a?pvs=4;https://sites.google.com/view/iiplcau/", "dblp": "359/4117.html;362/8471;89/8603.html", "google_scholar": "p582kbkAAAAJ;xrfzPfIAAAAJ;https://scholar.google.co.kr/citations?user=If6P518AAAAJ", "or_profile": "~JungMin_Yun3;~MiHyeon_Kim3;~YoungBin_Kim1", "aff": "Chung-Ang University;Chung-Ang University;Chung-Ang University", "aff_domain": "cau.ac.kr;cau.ac.kr;cau.ac.kr", "position": "MS student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nyun2023focus,\ntitle={Focus on the Core: Efficient Attention via Pruned Token Compression for Document Classification},\nauthor={JungMin Yun and MiHyeon Kim and YoungBin Kim},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8l2m7jctGv}\n}", "github": "", "project": "", "reviewers": "fmo5;HEpY;6yQm", "site": "https://openreview.net/forum?id=8l2m7jctGv", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;2;3", "reproducibility": "3;3;3", "correctness": "3;2;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-6868-286X;;", "linkedin": ";%EB%AF%B8%ED%98%84-%EA%B9%80-992b06315/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Chung-Ang University", "aff_unique_dep": "", "aff_unique_url": "http://www.cau.ac.kr", "aff_unique_abbr": "CAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "8mJujVetQv", "title": "Less than One-shot: Named Entity Recognition via Extremely Weak Supervision", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We study the named entity recognition (NER) problem under the extremely weak supervision (XWS) setting, where only one example entity per type is given in a context-free way. \nWhile one can see that XWS is \\emph{lighter than one-shot} in terms of the amount of supervision,\nwe propose a novel method X-NER that can outperform the state-of-the-art one-shot NER methods.\nWe first mine entity spans that are similar to the example entities from an unlabelled training corpus.\nInstead of utilizing entity span representations from language models, we find it more effective to compare the context distributions before and after the span is replaced by the entity example.\nWe then leverage the top-ranked spans as pseudo-labels to train an NER tagger.\nExtensive experiments and analyses on 4 NER datasets show the superior end-to-end NER performance of X-NER, outperforming the state-of-the-art few-shot methods with 1-shot supervision and ChatGPT annotations significantly.\nFinally, our X-NER possesses several notable properties, such as inheriting the cross-lingual abilities of the underlying language models.", "keywords": "extremely weak supervison;few-shot learning;named entity extraction", "primary_area": "", "supplementary_material": "", "author": "Letian Peng;Zihan Wang;Jingbo Shang", "authorids": "~Letian_Peng1;~Zihan_Wang1;~Jingbo_Shang2", "gender": "M;M;M", "homepage": "https://komeijiforce.github.io/;https://zihanwangki.github.io/;https://shangjingbo1226.github.io/", "dblp": "303/0630;152/5077-1;151/3145.html", "google_scholar": "vht13WkAAAAJ;6UWtYZQAAAAJ;0SkFI4MAAAAJ", "or_profile": "~Letian_Peng1;~Zihan_Wang1;~Jingbo_Shang2", "aff": "University of California, San Diego;University of California, San Diego;University of California, San Diego", "aff_domain": "ucsd.edu;ucsd.edu;ucsd.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\npeng2023less,\ntitle={Less than One-shot: Named Entity Recognition via Extremely Weak Supervision},\nauthor={Letian Peng and Zihan Wang and Jingbo Shang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8mJujVetQv}\n}", "github": "", "project": "", "reviewers": "anZR;o4z4;c5pW", "site": "https://openreview.net/forum?id=8mJujVetQv", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "3;4;3", "reproducibility": "3;4;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "8oy8hUeem9", "title": "InstOptima: Evolutionary Multi-objective Instruction Optimization via Large Language Model-based Instruction Operators", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Instruction-based language modeling has received significant attention in pretrained language models. However, the efficiency of instruction engineering remains low and hinders the development of instruction studies. Recent studies have focused on automating instruction generation, but they primarily aim to improve performance without considering other crucial objectives that impact instruction quality, such as instruction length and perplexity. Therefore, we propose a novel approach (i.e., InstOptima) that treats instruction generation as an evolutionary multi-objective optimization problem. In contrast to text edition-based methods, our approach utilizes a large language model (LLM) to simulate instruction operators, including mutation and crossover. Furthermore, we introduce an objective-guided mechanism for these operators, allowing the LLM to comprehend the objectives and enhance the quality of the generated instructions. Experimental results demonstrate improved fine-tuning performance and the generation of a diverse set of high-quality instructions.", "keywords": "instruction optimization;automated instruction generation;evolutionary multi-objective optimization;language model-based operators", "primary_area": "", "supplementary_material": "", "author": "Heng Yang;Ke Li", "authorids": "~Heng_Yang6;~Ke_Li5", "gender": "M;M", "homepage": "https://colalab.ai/;https://yangheng95.github.io", "dblp": "75/6627-1.html;83/415-8", "google_scholar": "https://scholar.google.co.uk/citations?user=lUFU8KsAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Ke_Li5;~HENG_YANG5", "aff": "University of Exeter;University of Exeter", "aff_domain": "exeter.ac.uk;exeter.ac.uk", "position": "Associate Professor;PhD student", "bibtex": "@inproceedings{\nyang2023instoptima,\ntitle={InstOptima: Evolutionary Multi-objective Instruction Optimization via Large Language Model-based Instruction Operators},\nauthor={Heng Yang and Ke Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8oy8hUeem9}\n}", "github": "", "project": "", "reviewers": "YhoU;RjTW;WrSn", "site": "https://openreview.net/forum?id=8oy8hUeem9", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "3;3;3", "reproducibility": "3;4;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-7200-4244;0000-0002-6831-196X", "linkedin": "ke-li-29423226/;heng-yang-3b6278243/", "aff_unique_index": "0;0", "aff_unique_norm": "University of Exeter", "aff_unique_dep": "", "aff_unique_url": "https://www.exeter.ac.uk", "aff_unique_abbr": "Exeter", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "8uSB79mZks", "title": "Relation-Aware Question Answering for Heterogeneous Knowledge Graphs", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Multi-hop Knowledge Base Question Answering(KBQA) aims to find the answer entity in a knowledge graph (KG), which requires multiple steps of reasoning. Existing retrieval-based approaches solve this task by concentrating on the specific relation\nat different hops and predicting the intermediate entity within the reasoning path. \nHowever, these models fail to utilize information from head-tail entities and the semantic connection between relations to enhance the current relation representation, which undermines the information capturing of relations in KGs. To address this issue, we construct a \\textbf{dual relation graph} where each node denotes a relation in the original KG (\\textbf{primal entity graph}) and edges are constructed between relations sharing same head or tail entities. Then we iteratively do primal entity graph reasoning, dual relation graph information propagation, and interaction between these two graphs. In this way, the interaction between entity and relation is enhanced, and we derive better entity and relation representations. Experiments on two public datasets, WebQSP and CWQ, show that our approach achieves a significant performance gain over the prior state-of-the-art.", "keywords": "Question Answering;Knowledge Graph;Heterogeneous", "primary_area": "", "supplementary_material": "", "author": "Haowei Du;Quzhe Huang;Chen Li;Chen Zhang;Yang Li;Dongyan Zhao", "authorids": "~Haowei_Du1;~Quzhe_Huang1;~Chen_Li37;~Chen_Zhang10;~Yang_Li45;~Dongyan_Zhao2", "gender": "M;;M;M;M;M", "homepage": ";https://andrewzhe.github.io/;https://github.com/ChenLi09;https://luciusssss.github.io/;;https://www.wict.pku.edu.cn/zhaodongyan/en/", "dblp": "303/7899.html;278/1884;;94/4084-19;;63/1870", "google_scholar": "uu9HarwAAAAJ;https://scholar.google.com/citations?hl=en;7-eyqnoAAAAJ;NSFlB88AAAAJ;AeCTbv8AAAAJ;lhR8-68AAAAJ", "or_profile": "~Haowei_Du1;~Quzhe_Huang1;~Chen_Li37;~Chen_Zhang10;~Yang_Li45;~Dongyan_Zhao2", "aff": "Peking University;Peking University;Ant Group;Peking University;Alibaba Group;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;antgroup.com;pku.edu.cn;alibaba-inc.com;pku.edu.cn", "position": "PhD student;PhD student;Researcher;PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\ndu2023relationaware,\ntitle={Relation-Aware Question Answering for Heterogeneous Knowledge Graphs},\nauthor={Haowei Du and Quzhe Huang and Chen Li and Chen Zhang and Yang Li and Dongyan Zhao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8uSB79mZks}\n}", "github": "", "project": "", "reviewers": "qVC6;h3Ex;ngck", "site": "https://openreview.net/forum?id=8uSB79mZks", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "4;3;3", "reproducibility": "4;3;3", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-7683-0279;;;0000-0001-5842-0516;;", "linkedin": ";;;;;", "aff_unique_index": "0;0;1;0;2;0", "aff_unique_norm": "Peking University;Ant Group;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;https://www.antgroup.com;https://www.alibaba.com", "aff_unique_abbr": "Peking U;Ant Group;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "8xyd9i1XLb", "title": "MoPe: Model Perturbation based Privacy Attacks on Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recent work has shown that Large Language Models (LLMs) can unintentionally leak sensitive information present in their training data. In this paper, we present Model Perturbations (MoPe), a new method to identify with high confidence if a given text is in the training data of a pre-trained language model, given white-box access to the models parameters. MoPe adds noise to the model in parameter space and measures the drop in log-likelihood at a given point $x$, a statistic we show approximates the trace of the Hessian matrix with respect to model parameters. Across language models ranging from $70$M to $12$B parameters, we show that MoPe is more effective than existing loss-based attacks and recently proposed perturbation-based methods. We also examine the role of training point order and model size in attack success, and empirically demonstrate that MoPe accurately approximate the trace of the Hessian in practice. Our results show that the loss of a point alone is insufficient to determine extractability---there are training points we can recover using our method that have average loss. This casts some doubt on prior works that use the loss of a point as evidence of memorization or unlearning.", "keywords": "large language model;membership inference;data extraction", "primary_area": "", "supplementary_material": "", "author": "Marvin Li;Jason Wang;Jeffrey George Wang;Seth Neel", "authorids": "~Marvin_Li1;~Jason_Wang3;~Jeffrey_George_Wang1;~Seth_Neel2", "gender": "M;M;Not Specified;M", "homepage": "https://marvinfli.com;https://www.jasonwang.app/;;https://sethneel.com", "dblp": ";;371/4465.html;188/6406", "google_scholar": "NhMTzpsAAAAJ;https://scholar.google.com/citations?hl=en;gF8ZG3cAAAAJ;", "or_profile": "~Marvin_Li1;~Jason_Wang3;~Jeffrey_George_Wang1;~Seth_Neel1", "aff": "Harvard University;Harvard University;Harvard University;Harvard University", "aff_domain": "harvard.edu;harvard.edu;harvard.edu;harvard.edu", "position": "Undergrad student;Undergrad student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nli2023mope,\ntitle={MoPe: Model Perturbation based Privacy Attacks on Language Models},\nauthor={Marvin Li and Jason Wang and Jeffrey George Wang and Seth Neel},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8xyd9i1XLb}\n}", "github": "", "project": "", "reviewers": "Dn8Y;EQtA;sLT5", "site": "https://openreview.net/forum?id=8xyd9i1XLb", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;3", "excitement": "3;4;4", "reproducibility": "3;4;4", "correctness": "3;4;3", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-5707-5113;", "linkedin": ";jasonwang292/;jeffreygwang/;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "8zQ77tPTMR", "title": "Consistency is Key: On Data-Efficient Modality Transfer in Speech Translation", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "End-to-end approaches have shown promising results for speech translation (ST), but they suffer from its data scarcity compared to machine translation (MT). To address this, progressive training has become a common practice, of using external MT data during the fine-tuning phase. Despite of its prevalence and computational overhead, its validity is not extensively corroborated yet. This paper conducts an empirical investigation and finds that progressive training is ineffective. We identify learning-forgetting trade-off as a critical obstacle, then hypothesize and verify that consistency learning (CL) breaks the dilemma of learning-forgetting. The proposed method, which combines knowledge distillation (KD) and CL, outperforms the previous methods on MuST-C dataset even without additional data, and our proposed consistency-informed KD achieves additional improvements against KD+CL. Code and models are availble at https://github.com/hjlee1371/consistency-s2tt.", "keywords": "Speech Translation;Cross-modal Transfer;Efficient Training", "primary_area": "", "supplementary_material": "", "author": "Hojin Lee;Changmin Lee;seung-won hwang", "authorids": "~Hojin_Lee1;~Changmin_Lee1;~seung-won_hwang2", "gender": ";M;", "homepage": ";;http://seungwonh.github.io", "dblp": ";;h/SeungwonHwang", "google_scholar": ";YMai6mEAAAAJ;63bBmc3mYrAC", "or_profile": "~Hojin_Lee1;~Changmin_Lee1;~seung-won_hwang2", "aff": "Kakao Enterprise;Kakao Enterprise;Seoul National University", "aff_domain": "kakaoenterprise.com;kakaoenterprise.com;snu.ac.kr", "position": "Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nlee2023consistency,\ntitle={Consistency is Key: On Data-Efficient Modality Transfer in Speech Translation},\nauthor={Hojin Lee and Changmin Lee and seung-won hwang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=8zQ77tPTMR}\n}", "github": "", "project": "", "reviewers": "8RCh;muUy;zMRf;mLFp", "site": "https://openreview.net/forum?id=8zQ77tPTMR", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;2;4;3", "excitement": "3;3;3;3", "reproducibility": "3;3;4;3", "correctness": "3;3;3;3", "rating_avg": 3.0, "confidence_avg": 3.25, "excitement_avg": 3.0, "reproducibility_avg": 3.25, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";changmin-lee-7a9b60259;", "aff_unique_index": "0;0;1", "aff_unique_norm": "Kakao Enterprise;Seoul National University", "aff_unique_dep": ";", "aff_unique_url": "https://enterprise.kakao.com;https://www.snu.ac.kr", "aff_unique_abbr": "Kakao Enterprise;SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "99msyVXHEq", "title": "CLAIR: Evaluating Image Captions with Large Language Models", "track": "main", "status": "Short Main", "tldr": "", "abstract": "The evaluation of machine-generated image captions poses an interesting yet persistent challenge. Effective evaluation measures must consider numerous dimensions of similarity, including semantic relevance, visual structure, object interactions, caption diversity, and specificity. Existing highly-engineered measures attempt to capture specific aspects, but fall short in providing a holistic score that aligns closely with human judgments. Here, we propose CLAIR, a novel method that leverages the zero-shot language modeling capabilities of large language models (LLMs) to evaluate candidate captions. In our evaluations, CLAIR demonstrates a stronger correlation with human judgments of caption quality compared to existing measures. Notably, on Flickr8K-Expert, CLAIR achieves relative correlation improvements over SPICE of 39.6% and over image-augmented methods such as RefCLIP-S of 18.3%. Moreover, CLAIR provides noisily interpretable results by allowing the language model to identify the underlying reasoning behind its assigned score.", "keywords": "Image Captioning;Evaluation;Metrics;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "David Chan;Suzanne Petryk;Joseph E. Gonzalez;Trevor Darrell;John Canny", "authorids": "~David_Chan3;~Suzanne_Petryk1;~Joseph_E._Gonzalez1;~Trevor_Darrell2;~John_Canny1", "gender": "M;F;M;M;M", "homepage": "https://people.eecs.berkeley.edu/~davidchan/;https://suziepetryk.com;http://eecs.berkeley.edu/~jegonzal;http://www.cs.berkeley.edu/~jfc/;https://people.eecs.berkeley.edu/~trevor/", "dblp": "80/9659;262/3936;61/8262;;d/TrevorDarrell", "google_scholar": "qa4M89wAAAAJ;nSpXpqMAAAAJ;https://scholar.google.com.tw/citations?user=gM2WW9UAAAAJ;https://scholar.google.com.tw/citations?user=LAv0HTEAAAAJ;https://scholar.google.com.tw/citations?user=bh-uRFMAAAAJ", "or_profile": "~David_Chan3;~Suzanne_Petryk1;~Joseph_E._Gonzalez1;~John_Canny1;~trevor_darrell1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;Electrical Engineering & Computer Science Department", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu;eecs.berkeley.edu", "position": "PhD student;PhD student;Associate Professor;Full Professor;Professor", "bibtex": "@inproceedings{\nchan2023clair,\ntitle={{CLAIR}: Evaluating Image Captions with Large Language Models},\nauthor={David Chan and Suzanne Petryk and Joseph E. Gonzalez and Trevor Darrell and John Canny},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=99msyVXHEq}\n}", "github": "", "project": "", "reviewers": "Cmgu;2NfD;kf2G;kPSe", "site": "https://openreview.net/forum?id=99msyVXHEq", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;4;4;3", "excitement": "3;3;3;3", "reproducibility": "2;4;4;1", "correctness": "3;3;4;3", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 3.0, "reproducibility_avg": 2.75, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-2921-956X;;", "linkedin": ";suzanne-petryk/;;;", "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "University of California, Berkeley;Electrical Engineering & Computer Science Department", "aff_unique_dep": ";Electrical Engineering & Computer Science", "aff_unique_url": "https://www.berkeley.edu;", "aff_unique_abbr": "UC Berkeley;", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States;" }, { "id": "9Ax0pyaLgh", "title": "Cross-modality Data Augmentation for End-to-End Sign Language Translation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "End-to-end sign language translation (SLT) aims to directly convert sign language videos into spoken language texts without intermediate representations. It has been challenging due to the data scarcity of labeled data and the modality gap between sign videos and texts. To tackle these challenges, we propose a novel Cross-modality Data Augmentation (XmDA) framework to transfer the powerful gloss-to-text translation capabilities to end-to-end sign language translation (i.e., video-to-text). Specifically, XmDA consists of two key components: cross-modality mix-up and cross-modality knowledge distillation. The former one explicitly encourages the alignment between sign video features and gloss embeddings to bridge the modality gap. The latter one utilizes the generation knowledge from gloss-to-text teacher models to guide the spoken language text generation. Experimental results on two widely used SLT datasets, i.e., PHOENIX-2014T and CSL-Daily, demonstrate that the proposed XmDA framework significantly and consistently outperforms the baseline models. Extensive analyses confirm our claim that XmDA enhances end-to-end sign language translation by reducing the representation distance between sign videos and glosses, as well as improving the translation of low-frequency words and long sentences.", "keywords": "Sign Language Translation;Cross Modality;Data Augmentation", "primary_area": "", "supplementary_material": "", "author": "Jinhui Ye;Wenxiang Jiao;Xing Wang;Zhaopeng Tu;Hui Xiong", "authorids": "~Jinhui_Ye1;~Wenxiang_Jiao1;~Xing_Wang1;~Zhaopeng_Tu1;~Hui_Xiong1", "gender": "M;M;M;M;M", "homepage": "https://jhuiye.com;https://wxjiao.github.io/;http://xingwang4nlp.com/;http://www.zptu.net;https://www.hkust-gz.edu.cn/people/hui-xiong/", "dblp": "254/8172;239/4883;02/3674-7;71/9281;262/1686-1.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;CvtODukAAAAJ;6AqRKa0AAAAJ;IvE2zRgAAAAJ;cVDF1tkAAAAJ", "or_profile": "~Jinhui_Ye1;~Wenxiang_Jiao1;~Xing_Wang1;~Zhaopeng_Tu1;~Hui_Xiong1", "aff": "Hong Kong University of Science and Technology(Guangzhou));Tencent AI Lab;Tencent AI Lab;Tencent AI Lab;Hong Kong University of Science and Technology (Guangzhou)", "aff_domain": "hkust.edu;tencent.com;tencent.com;tencent.com;hkust.edu", "position": "MS student;Researcher;Researcher;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nye2023crossmodality,\ntitle={Cross-modality Data Augmentation for End-to-End Sign Language Translation},\nauthor={Jinhui Ye and Wenxiang Jiao and Xing Wang and Zhaopeng Tu and Hui Xiong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=9Ax0pyaLgh}\n}", "github": "", "project": "", "reviewers": "KVjs;5AGz;VzGU", "site": "https://openreview.net/forum?id=9Ax0pyaLgh", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;3;4", "excitement": "3;3;4", "reproducibility": "4;4;3", "correctness": "4;4;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-0737-9653;;0000-0001-6016-6465", "linkedin": ";;;tuzhaopeng;", "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Tencent", "aff_unique_dep": ";Tencent AI Lab", "aff_unique_url": "https://www.ust.hk;https://ai.tencent.com", "aff_unique_abbr": "HKUST;Tencent AI Lab", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "9BuTdxSfIO", "title": "kNN-CM: A Non-parametric Inference-Phase Adaptation of Parametric Text Classifiers", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Semi-parametric models exhibit the properties of both parametric and non-parametric modeling and have been shown to be effective in the next-word prediction language modeling task. However, there is a lack of studies on the text-discriminating properties of such models. We propose an inference-phase approach---\\textit{k}-Nearest Neighbor Classification Model (\\textit{k}NN-CM)---that enhances the capacity of a pre-trained parametric text classifier by incorporating a simple neighborhood search through the representation space of (memorized) training samples. The final class prediction of \\textit{k}NN-CM is based on the convex combination of probabilities obtained from \\textit{k}NN search and prediction of the classifier. Our experiments show consistent performance improvements on eight SuperGLUE tasks, three adversarial natural language inference (ANLI) datasets, 11 question-answering (QA) datasets, and two sentiment classification datasets.", "keywords": "nearest neighbors;text classification;semi-parametric models;non-parametric models;kNN-CM;kNN-LM", "primary_area": "", "supplementary_material": "", "author": "Rishabh Bhardwaj;Yingting Li;Navonil Majumder;Bo Cheng;Soujanya Poria", "authorids": "~Rishabh_Bhardwaj1;~Yingting_Li1;~Navonil_Majumder1;~Bo_Cheng3;~Soujanya_Poria1", "gender": "M;F;M;M;M", "homepage": "https://www.rishabh.ai/;;;https://scs.bupt.edu.cn/info/1292/2714.htm;https://soujanyaporia.github.io", "dblp": "245/1413.html;234/7775;198/3608;05/2700-1;116/4904", "google_scholar": "nomHn1sAAAAJ;;jPfEvuQAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.co.in/citations?user=oS6gRc4AAAAJ", "or_profile": "~Rishabh_Bhardwaj1;~Yingting_Li1;~Navonil_Majumder1;~Bo_Cheng3;~Soujanya_Poria1", "aff": "Singapore University of Technology and Design;Beijing University of Posts and Telecommunications;Singapore University of Technology and Design;Beijing University of Posts and Telecommunications;Singapore University of Technology and Design", "aff_domain": "sutd.edu.sg;bupt.edu.cn;sutd.edu.sg;bupt.edu.cn;sutd.edu.sg", "position": "PhD student;PhD student;Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nbhardwaj2023knncm,\ntitle={k{NN}-{CM}: A Non-parametric Inference-Phase Adaptation of Parametric Text Classifiers},\nauthor={Rishabh Bhardwaj and Yingting Li and Navonil Majumder and Bo Cheng and Soujanya Poria},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=9BuTdxSfIO}\n}", "github": "", "project": "", "reviewers": "fnKr;ifUU;rD6m", "site": "https://openreview.net/forum?id=9BuTdxSfIO", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;2;3", "reproducibility": "4;4;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-3833-4754;;;;", "linkedin": "rishabh-bhardwaj-nlp/;yingting-li-a598481b8?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3Bn%2FyqyydjQC%2Bulg0GK%2Bhq%2FA%3D%3D;;;", "aff_unique_index": "0;1;0;1;0", "aff_unique_norm": "Singapore University of Technology and Design;Beijing University of Posts and Telecommunications", "aff_unique_dep": ";", "aff_unique_url": "https://www.sutd.edu.sg;http://www.bupt.edu.cn/", "aff_unique_abbr": "SUTD;BUPT", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;1;0;1;0", "aff_country_unique": "Singapore;China" }, { "id": "9EYS2EEqFq", "title": "CAR: Conceptualization-Augmented Reasoner for Zero-Shot Commonsense Question Answering", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The task of zero-shot commonsense question answering evaluates models on their capacity to reason about general scenarios beyond\nthose presented in specific datasets. Existing approaches for tackling this task leverage external knowledge from CommonSense Knowledge Bases (CSKBs) by pre-training the model on synthetic QA pairs constructed from CSKBs. In these approaches, negative examples (distractors) are formulated by randomly sampling from CSKBs using fairly primitive keyword constraints. However, two bottlenecks limit these approaches: the inherent incompleteness of CSKBs limits the semantic coverage of synthetic QA pairs, and the lack of human annotations makes the sampled negative examples potentially uninformative and contradictory. \n\nTo tackle these limitations above, we propose Conceptualization-Augmented Reasoner (CAR), a zero-shot commonsense question-answering framework that fully leverages the power of conceptualization. Specifically, CAR abstracts a commonsense knowledge triple to many higher-level instances, which increases the coverage of the CSKB and expands the ground-truth answer space, reducing the likelihood of selecting false negative distractors. Extensive experiments demonstrate that CAR more robustly generalizes to answering questions about zero-shot \ncommonsense scenarios than existing methods, including large language models, such as GPT3.5 and ChatGPT. Our code, data, and model checkpoints are available at https://github.com/HKUST-KnowComp/CAR.", "keywords": "commonsense reasoning;conceptualization;zero shot;question answering", "primary_area": "", "supplementary_material": "", "author": "Weiqi Wang;Tianqing Fang;Wenxuan Ding;Baixuan Xu;Xin Liu;Yangqiu Song;Antoine Bosselut", "authorids": "~Weiqi_Wang1;~Tianqing_Fang1;~Wenxuan_Ding1;~Baixuan_Xu1;~Xin_Liu9;~Yangqiu_Song1;~Antoine_Bosselut1", "gender": "M;M;F;M;M;M;M", "homepage": "https://mighty-weaver.github.io/;http://fangtq.com/;https://wenwen-d.github.io/;https://tonyxu12138.github.io/;https://www.cse.ust.hk/~xliucr/;https://www.cse.ust.hk/~yqsong/;https://atcbosselut.github.io/", "dblp": "51/5775-1;283/4921;36/1339-1;187/0059.html;76/1820-39.html;86/2159;184/3742", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=Tb3rc34AAAAJ;GyHBjwQAAAAJ;YhWGUKUAAAAJ;https://scholar.google.com.hk/citations?user=WvC4upQAAAAJ;MdQZ-q8AAAAJ;XD9hkJwAAAAJ", "or_profile": "~Weiqi_Wang1;~Tianqing_Fang1;~Wenxuan_Ding1;~Baixuan_Xu1;~Xin_Liu9;~Yangqiu_Song1;~Antoine_Bosselut1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Swiss Federal Institute of Technology Lausanne", "aff_domain": "ust.hk;ust.hk;ust.hk;ust.hk;ust.hk;ust.hk;epfl.ch", "position": "PhD student;PhD student;Undergrad student;Undergrad student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2023car,\ntitle={{CAR}: Conceptualization-Augmented Reasoner for Zero-Shot Commonsense Question Answering},\nauthor={Weiqi Wang and Tianqing Fang and Wenxuan Ding and Baixuan Xu and Xin Liu and Yangqiu Song and Antoine Bosselut},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=9EYS2EEqFq}\n}", "github": "", "project": "", "reviewers": "hZ2z;NWmS;qJZg", "site": "https://openreview.net/forum?id=9EYS2EEqFq", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;5", "excitement": "4;3;3", "reproducibility": "5;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-1617-9805;;;0000-0001-8175-7598;0000-0001-9610-9526;0000-0002-7818-6090;", "linkedin": "weiqi-wang-a49b5019a/;;wenxuan-ding-0b299923b/;;xin-liu-179830143;yqsong/;", "aff_unique_index": "0;0;0;0;0;0;1", "aff_unique_norm": "Hong Kong University of Science and Technology;Swiss Federal Institute of Technology Lausanne", "aff_unique_dep": ";", "aff_unique_url": "https://www.ust.hk;https://www.epfl.ch", "aff_unique_abbr": "HKUST;EPFL", "aff_campus_unique_index": "0;0;0;0;0;0;1", "aff_campus_unique": "Hong Kong SAR;Lausanne", "aff_country_unique_index": "0;0;0;0;0;0;1", "aff_country_unique": "China;Switzerland" }, { "id": "9EYaUfyRYk", "title": "Beyond Testers\u2019 Biases: Guiding Model Testing with Knowledge Bases using LLMs", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Current model testing work has mostly focused on creating test cases.\nIdentifying what to test is a step that is largely ignored and poorly supported.\nWe propose Weaver, an interactive tool that supports requirements elicitation for guiding model testing.\nWeaver uses large language models to generate knowledge bases and recommends concepts from them interactively, allowing testers to elicit requirements for further testing.\nWeaver provides rich external knowledge to testers and encourages testers to systematically explore diverse concepts beyond their own biases. \nIn a user study, we show that both NLP experts and non-experts identified more, as well as more diverse concepts worth testing when using Weaver.\nCollectively, they found more than 200 failing test cases for stance detection with zero-shot ChatGPT.\nOur case studies further show that Weaver can help practitioners test models in real-world settings,\nwhere developers define more nuanced application scenarios (e.g., code understanding and transcript summarization) using LLMs.", "keywords": "model testing;knowledge base;large language models;human-centered NLP", "primary_area": "", "supplementary_material": "", "author": "Chenyang Yang;Rishabh Rustogi;Rachel Brower-Sinning;Grace Lewis;Christian Kaestner;Tongshuang Wu", "authorids": "~Chenyang_Yang1;~Rishabh_Rustogi1;~Rachel_Brower-Sinning1;~Grace_Lewis1;~Christian_Kaestner1;~Tongshuang_Wu1", "gender": "M;M;F;F;;F", "homepage": "https://www.cs.cmu.edu/~cyang3/;;;https://www.sei.cmu.edu/staff/glewis;;http://cs.cmu.edu/~sherryw", "dblp": "32/2760-2.html;;;51/1171.html;;179/3791", "google_scholar": "E5gadxYAAAAJ;;;uwshWMoAAAAJ;;CeQd_DsAAAAJ", "or_profile": "~Chenyang_Yang1;~Rishabh_Rustogi1;~Rachel_Brower-Sinning1;~Grace_Lewis1;~Christian_Kaestner1;~Tongshuang_Wu1", "aff": "Carnegie Mellon University;;Carnegie Mellon University;Carnegie Mellon Software Engineering Institute;;School of Computer Science, Carnegie Mellon University", "aff_domain": "cmu.edu;;cmu.edu;cmu.edu;;cs.cmu.edu", "position": "PhD student;;Researcher;Principal Researcher;;Assistant Professor", "bibtex": "@inproceedings{\nyang2023beyond,\ntitle={Beyond Testers{\\textquoteright} Biases: Guiding Model Testing with Knowledge Bases using {LLM}s},\nauthor={Chenyang Yang and Rishabh Rustogi and Rachel Brower-Sinning and Grace Lewis and Christian Kaestner and Tongshuang Wu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=9EYaUfyRYk}\n}", "github": "", "project": "", "reviewers": "Mdj9;hoyj;rtMG", "site": "https://openreview.net/forum?id=9EYaUfyRYk", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;5;4", "excitement": "4;2;3", "reproducibility": "1;4;2", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 4.666666666666667, "excitement_avg": 3.0, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-9128-9863;;", "linkedin": ";rishabh-rustogi/;;gracelewis/;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "9F6h0oIYsP", "title": "Leveraging Contrastive Learning and Knowledge Distillation for Incomplete Modality Rumor Detection", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Rumors spread rapidly through online social microblogs at a relatively low cost, causing substantial economic losses and negative consequences in our daily lives. Existing rumor detection models often neglect the underlying semantic coherence between text and image components in multimodal posts, as well as the challenges posed by incomplete modalities in single modal posts, such as missing text or images. This paper presents CLKD-IMRD, a novel framework for Incomplete Modality Rumor Detection. CLKD-IMRD employs Contrastive Learning and Knowledge Distillation to capture the semantic consistency between text and image pairs, while also enhancing model generalization to incomplete modalities within individual posts. Extensive experimental results demonstrate that our CLKD-IMRD outperforms state-of-the-art methods on two English and two Chinese benchmark datasets for rumor detection in social media.", "keywords": "Rumor detection;Contrastive learning;Knowledge distillation;Incomplete modality", "primary_area": "", "supplementary_material": "", "author": "Fan Xu;Pinyun Fu;Qi Huang;Bowei Zou;AiTi Aw;Mingwen Wang", "authorids": "~Fan_Xu6;~Pinyun_Fu1;~Qi_Huang1;~Bowei_Zou1;~AiTi_Aw1;~Mingwen_Wang1", "gender": ";;M;M;;", "homepage": "https://www.semanticscholar.org/author/Fan-Xu/38612664;;;;;https://www.semanticscholar.org/author/Mingwen-Wang/145554569", "dblp": ";;;136/9191;;", "google_scholar": ";;https://scholar.google.com.hk/citations?user=adq6F1QAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;", "or_profile": "~Fan_Xu6;~Pinyun_Fu1;~Qi_Huang1;~Bowei_Zou1;~AiTi_Aw1;~Mingwen_Wang1", "aff": "Jiangxi Normal University;;Jiangxi Normal University;A*STAR;;Jiangxi Normal University", "aff_domain": "jxnu.edu.cn;;jxnu.edu.cn;a-star.edu.sg;;jxnu.edu.cn", "position": "Associate Professor;;Lecturer;Researcher;;Full Professor", "bibtex": "@inproceedings{\nxu2023leveraging,\ntitle={Leveraging Contrastive Learning and Knowledge Distillation for Incomplete Modality Rumor Detection},\nauthor={Fan Xu and Pinyun Fu and Qi Huang and Bowei Zou and AiTi Aw and Mingwen Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=9F6h0oIYsP}\n}", "github": "", "project": "", "reviewers": "4H7e;Ytxq;DUWX", "site": "https://openreview.net/forum?id=9F6h0oIYsP", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;3", "excitement": "3;3;3", "reproducibility": "3;4;4", "correctness": "2;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";;;;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Jiangxi Normal University;Agency for Science, Technology and Research", "aff_unique_dep": ";", "aff_unique_url": "http://www.jxnu.edu.cn;https://www.a-star.edu.sg", "aff_unique_abbr": "JXNU;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;Singapore" }, { "id": "9GxP2Kw8IC", "title": "Synthesize, if you do not have: Effective Synthetic Dataset Creation Strategies for Self-Supervised Opinion Summarization in E-commerce", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "In e-commerce, opinion summarization is the process of condensing the opinions presented in product reviews. However, the absence of\nlarge amounts of supervised datasets presents challenges in generating both aspect-specific and general opinion summaries. Existing\napproaches have attempted to address these challenges through synthetic dataset creation (SDC). However, general opinion summarization models struggle to generate summaries faithful to the input reviews whereas aspect-specific opinion summarization models are limited due to their reliance on human-specified aspects and seed words. To address this, we propose SDC strategies tailored for general and aspect-specific opinion summarization. We experimented on three e-commerce test sets: Oposum+, Amazon, and Flipkart. For general opinion summarization, pre-trained language model (PLM) fine-tuned on our general synthetic dataset surpass the SOTA on average by 2.3 R1 points. Faithfulness evaluation metrics and human evaluations indicate that our model-generated summaries are more faithful to the input compared to others. For aspect-specific opinion summarization, PLM fine-tuned on our aspect-specific synthetic dataset surpass SOTA by \u223c 1 R1 point without the aid of any human-specified aspects or seed words.", "keywords": "opinion summarization;summarization;ecommerce;nlp", "primary_area": "", "supplementary_material": "", "author": "Tejpalsingh Siledar;Suman Banerjee;Amey Patil;Sudhanshu Shekhar Singh;Muthusamy Chelliah;Nikesh Garera;Pushpak Bhattacharyya", "authorids": "~Tejpalsingh_Siledar2;~Suman_Banerjee4;~Amey_Patil1;~Sudhanshu_Shekhar_Singh1;~Muthusamy_Chelliah1;~Nikesh_Garera1;~Pushpak_Bhattacharyya1", "gender": "M;M;M;M;M;M;M", "homepage": ";;;;;;https://www.cse.iitb.ac.in/~pb/", "dblp": ";345/5455-3;;;70/3304;76/322;p/PushpakBhattacharyya", "google_scholar": "-vOma0QAAAAJ;;;;;https://scholar.google.com/scholar?q=nikesh+garera;https://scholar.google.com.tw/citations?user=vvg-pAkAAAAJ", "or_profile": "~Tejpalsingh_Siledar2;~Suman_Banerjee4;~Amey_Patil1;~Sudhanshu_Shekhar_Singh1;~Muthusamy_Chelliah1;~Nikesh_Garera1;~Pushpak_Bhattacharyya1", "aff": "Indian Institute of Technology, Bombay;Flipkart;;;Flipkart;Flipkart ;Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology", "aff_domain": "iitb.ac.in;flipkart.com;;;flipkart.com;flipkart.com;iitb.ac.in", "position": "MS student;Data Scientist;;;Principal Researcher;Principal Data Scientist;Full Professor", "bibtex": "@inproceedings{\nsiledar2023synthesize,\ntitle={Synthesize, if you do not have: Effective Synthetic Dataset Creation Strategies for Self-Supervised Opinion Summarization in E-commerce},\nauthor={Tejpalsingh Siledar and Suman Banerjee and Amey Patil and Sudhanshu Shekhar Singh and Muthusamy Chelliah and Nikesh Garera and Pushpak Bhattacharyya},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=9GxP2Kw8IC}\n}", "github": "", "project": "", "reviewers": "GYEt;7Tmj;UMd3", "site": "https://openreview.net/forum?id=9GxP2Kw8IC", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "3;3;3", "reproducibility": "2;4;1", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-4474-6851;;;;;;", "linkedin": "tjsiledar;;amey-patil-692245128/;sudhanshu-shekhar-singh-85402515b/;muthusamy-chelliah-28b3b0/;https://in.linkedin.com/in/ngarera;pushpakbh/?originalSubdomain=in", "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "Indian Institute of Technology Bombay;Flipkart;Indian Institute of Technology, Bombay", "aff_unique_dep": ";;", "aff_unique_url": "https://www.iitb.ac.in;https://www.flipkart.com;https://www.iitb.ac.in", "aff_unique_abbr": "IIT Bombay;Flipkart;IIT Bombay", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Bombay;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "India" }, { "id": "9HbJGoe4a8", "title": "Sound of Story: Multi-modal Storytelling with Audio", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Storytelling is multi-modal in the real world. When one tells a story, one may use all of the visualizations and sounds along with the story itself. However, prior studies on storytelling datasets and tasks have paid little attention to sound even though sound also conveys meaningful semantics of the story. Therefore, we propose to extend story understanding and telling areas by establishing a new component called background sound which is story context-based audio without any linguistic information. For this purpose, we introduce a new dataset, called Sound of Story (SoS), which has paired image and text sequences with corresponding sound or background music for a story. \nTo the best of our knowledge, this is the largest well-curated dataset for storytelling with sound.\nOur SoS dataset consists of 27,354 stories with 19.6 images per story and 984 hours of speech-decoupled audio such as background music and other sounds. As benchmark tasks for storytelling with sound and the dataset, we propose retrieval tasks between modalities, and audio generation tasks from image-text sequences, introducing strong baselines for them. We believe the proposed dataset and tasks may shed light on the multi-modal understanding of storytelling in terms of sound.", "keywords": "Multi-modal;Story understanding;Audio-video retrieval;Audio generation", "primary_area": "", "supplementary_material": "", "author": "Jaeyeon BAE;SEOKHOON JEONG;Seokun Kang;Namgi Han;Jae-Yon Lee;Hyounghun Kim;Taehwan Kim", "authorids": "~Jaeyeon_BAE1;~SEOKHOON_JEONG1;~Seokun_Kang2;~Namgi_Han2;~Jae-Yon_Lee1;~Hyounghun_Kim1;~Taehwan_Kim1", "gender": "M;;M;;M;M;M", "homepage": "https://jeje910.github.io/;;;;https://hyounghk.github.io/;http://ttic.uchicago.edu/~taehwan;", "dblp": ";;144/2783.html;;228/9951;86/3976;359/5988", "google_scholar": ";;;;https://scholar.google.com/citations?hl=en;5dGWexcAAAAJ;z5cuaKEAAAAJ", "or_profile": "~Jaeyeon_BAE1;~SEOKHOON_JEONG1;~Namgi_Han2;~Jae-Yon_Lee1;~Hyounghun_Kim1;~Taehwan_Kim1;~SEOKUN_KANG1", "aff": "Ulsan National Institute of Science and Technology;Ulsan National Institute of Science and Technology;Ulsan National Institute of Science and Technology;Ulsan National Institute of Science and Technology;Ulsan National Institute of Science and Technology;Ulsan National Institute of Science and Technology;Ulsan National Institute of Science and Technology", "aff_domain": "unist.ac.kr;unist.ac.kr;unist.ac.kr;unist.ac.kr;unist.ac.kr;unist.ac.kr;unist.ac.kr", "position": "MS student;Undergrad student;Postdoc;Associate Professor;Assistant Professor;Assistant Professor;MS student", "bibtex": "@inproceedings{\nbae2023sound,\ntitle={Sound of Story: Multi-modal Storytelling with Audio},\nauthor={Jaeyeon BAE and SEOKHOON JEONG and Seokun Kang and Namgi Han and Jae-Yon Lee and Hyounghun Kim and Taehwan Kim},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=9HbJGoe4a8}\n}", "github": "", "project": "", "reviewers": "s1GB;sX7X;Lmj8", "site": "https://openreview.net/forum?id=9HbJGoe4a8", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;4", "excitement": "4;3;3", "reproducibility": "5;4;4", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-8854-500X;;0000-0002-6571-4632;0000-0003-4243-8214", "linkedin": ";seokhoon-jeong-56b1ba276/;;;;;seokun-kang-6b548a283/", "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Ulsan National Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.unist.ac.kr", "aff_unique_abbr": "UNIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "9HjxuDwTNG", "title": "Towards a Unified Conversational Recommendation System: Multi-task Learning via Contextualized Knowledge Distillation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In Conversational Recommendation System (CRS), an agent is asked to recommend a set of items to users within natural language conversations. To address the need for both conversational capability and personalized recommendations, prior works have utilized separate recommendation and dialogue modules. However, such approach inevitably results in a discrepancy between recommendation results and generated responses. To bridge the gap, we propose a multi-task learning for a unified CRS, where a single model jointly learns both tasks via Contextualized Knowledge Distillation (ConKD). We introduce two versions of ConKD: hard gate and soft gate. The former selectively gates between two task-specific teachers, while the latter integrates knowledge from both teachers. Our gates are computed on-the-fly in a context-specific manner, facilitating flexible integration of relevant knowledge. Extensive experiments demonstrate that our single model significantly improves recommendation performance while enhancing fluency, and achieves comparable results in terms of diversity.", "keywords": "Recommendation;Multi-task learning", "primary_area": "", "supplementary_material": "", "author": "Yeongseo Jung;Eunseo Jung;Lei Chen", "authorids": "~Yeongseo_Jung1;~Eunseo_Jung1;~Lei_Chen7", "gender": "F;;M", "homepage": ";;http://www.cs.ust.hk/~leichen/", "dblp": ";;c/LeiChen0002", "google_scholar": ";;gtglwgYAAAAJ", "or_profile": "~Yeongseo_Jung1;~Eunseo_Jung1;~Lei_Chen7", "aff": "Department of Computer Science and Engineering, Hong Kong University of Science and Technology;;Hong Kong University of Science and Technology", "aff_domain": "cse.ust.hk;;hkust.edu", "position": "PhD student;;Full Professor", "bibtex": "@inproceedings{\njung2023towards,\ntitle={Towards a Unified Conversational Recommendation System: Multi-task Learning via Contextualized Knowledge Distillation},\nauthor={Yeongseo Jung and Eunseo Jung and Lei Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=9HjxuDwTNG}\n}", "github": "", "project": "", "reviewers": "mGM1;nTHZ;hs9f", "site": "https://openreview.net/forum?id=9HjxuDwTNG", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;3;3", "reproducibility": "4;3;3", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-8373-6472;;0000-0002-8257-5806", "linkedin": ";;", "aff_unique_index": "0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "Department of Computer Science and Engineering", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "9K1urVN7ti", "title": "DueT: Image-Text Contrastive Transfer Learning with Dual-adapter Tuning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "This paper presents DueT, a novel transfer learning method for vision and language models built by contrastive learning.\nIn DueT, adapters are inserted into the image and text encoders, which have been initialized using models pre-trained on uni-modal corpora and then frozen.\nBy training only these adapters, DueT enables efficient learning with a reduced number of trainable parameters.\nMoreover, unlike traditional adapters, those in DueT are equipped with a gating mechanism, enabling effective transfer and connection of knowledge acquired from pre-trained uni-modal encoders while preventing catastrophic forgetting.\nWe report that DueT outperformed simple fine-tuning, the conventional method fixing only the image encoder and training only the text encoder, and the LoRA-based adapter method in accuracy and parameter efficiency for 0-shot image and text retrieval in both English and Japanese domains.", "keywords": "Vision and Language", "primary_area": "", "supplementary_material": "", "author": "Taku Hasegawa;Kyosuke Nishida;Koki Maeda;Kuniko Saito", "authorids": "~Taku_Hasegawa1;~Kyosuke_Nishida2;~Koki_Maeda1;~Kuniko_Saito1", "gender": "M;M;M;", "homepage": ";http://www.knishida.info/;https://sites.google.com/view/silviase/english;", "dblp": "148/0922.html;01/5962;;05/59", "google_scholar": "https://scholar.google.co.jp/citations?user=8MQwGgoAAAAJ;https://scholar.google.co.jp/citations?user=sNCGB6UAAAAJ;https://scholar.google.co.jp/citations?user=TOHpU1IAAAAJ;", "or_profile": "~Taku_Hasegawa1;~Kyosuke_Nishida2;~Koki_Maeda1;~Kuniko_Saito1", "aff": "NTT corporation;NTT corporation;Tokyo Institute of Technology;NTT", "aff_domain": "ntt.co.jp;ntt.co.jp;titech.ac.jp;ntt.co.jp", "position": "Researcher;Researcher;MS student;Researcher", "bibtex": "@inproceedings{\nhasegawa2023duet,\ntitle={DueT: Image-Text Contrastive Transfer Learning with Dual-adapter Tuning},\nauthor={Taku Hasegawa and Kyosuke Nishida and Koki Maeda and Kuniko Saito},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=9K1urVN7ti}\n}", "github": "", "project": "", "reviewers": "a7vW;pT3q;Tr3S", "site": "https://openreview.net/forum?id=9K1urVN7ti", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "4;4;2", "reproducibility": "3;4;4", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-8443-0651;0009-0008-0529-3152;", "linkedin": ";;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "NTT Corporation;Tokyo Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntt.co.jp;https://www.titech.ac.jp", "aff_unique_abbr": "NTT;Titech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Japan" }, { "id": "9LPJK81xy1", "title": "Conversation Chronicles: Towards Diverse Temporal and Relational Dynamics in Multi-Session Conversations", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In the field of natural language processing, open-domain chatbots have emerged as an important research topic. However, a major limitation of existing open-domain chatbot research is its singular focus on short single-session dialogue, neglecting the potential need for understanding contextual information in multiple consecutive sessions that precede an ongoing dialogue. Among the elements that compose the context in multi-session conversation settings, the time intervals between sessions and the relationships between speakers would be particularly important. Despite their importance, current research efforts have not sufficiently addressed these dialogical components. In this paper, we introduce a new 1M multi-session dialogue dataset, called Conversation Chronicles, for implementing a long-term conversation setup in which time intervals and fine-grained speaker relationships are incorporated. Following recent works, we exploit a large language model to produce the data. The extensive human evaluation shows that dialogue episodes in Conversation Chronicles reflect those properties while maintaining coherent and consistent interactions across all the sessions. We also propose a dialogue model, called ReBot, which consists of chronological summarization and dialogue generation modules using only around 630M parameters. When trained on Conversation Chronicles, ReBot demonstrates long-term context understanding with a high human engagement score.", "keywords": "Multi-Session Dialogue;Long-Term Conversation", "primary_area": "", "supplementary_material": "", "author": "Jihyoung Jang;MinSeong Boo;Hyounghun Kim", "authorids": "~Jihyoung_Jang1;~MinSeong_Boo1;~Hyounghun_Kim1", "gender": "M;M;M", "homepage": "https://wlgud2757.github.io/;https://github.com/helisss;https://hyounghk.github.io/", "dblp": "359/3060;;228/9951", "google_scholar": "5eIZ6CoAAAAJ;;https://scholar.google.com/citations?hl=en", "or_profile": "~Jihyoung_Jang1;~MinSeong_Boo1;~Hyounghun_Kim1", "aff": "Handong Global University;Ulsan National Institute of Science and Technology;Ulsan National Institute of Science and Technology", "aff_domain": "handong.ac.kr;unist.ac.kr;unist.ac.kr", "position": "Undergrad student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\njang2023conversation,\ntitle={Conversation Chronicles: Towards Diverse Temporal and Relational Dynamics in Multi-Session Conversations},\nauthor={Jihyoung Jang and MinSeong Boo and Hyounghun Kim},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=9LPJK81xy1}\n}", "github": "", "project": "", "reviewers": "xS68;Eydv;fauZ", "site": "https://openreview.net/forum?id=9LPJK81xy1", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;4", "excitement": "4;3;4", "reproducibility": "4;4;3", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;1;1", "aff_unique_norm": "Handong Global University;Ulsan National Institute of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.handong.ac.kr;https://www.unist.ac.kr", "aff_unique_abbr": "HGU;UNIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "9NGR4GdLII", "title": "Coarse-to-Fine Dual Encoders are Better Frame Identification Learners", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Frame identification aims to find semantic frames associated with target words in a sentence. Recent researches measure the similarity or matching score between targets and candidate frames by modeling frame definitions. However, they either lack sufficient representation learning of the definitions or face challenges in efficiently selecting the most suitable frame from over 1000 candidate frames. Moreover, commonly used lexicon filtering ($lf$) to obtain candidate frames for the target may ignore out-of-vocabulary targets and cause inadequate frame modeling. In this paper, we propose CoFFTEA, a $\\underline{Co}$arse-to-$\\underline{F}$ine $\\underline{F}$rame and $\\underline{T}$arget $\\underline{E}$ncoders $\\underline{A}$rchitecture. With contrastive learning and dual encoders, CoFFTEA efficiently and effectively models the alignment between frames and targets. By employing a coarse-to-fine curriculum learning procedure, CoFFTEA gradually learns to differentiate frames with varying degrees of similarity. Experimental results demonstrate that CoFFTEA outperforms previous models by 0.93 overall scores and 1.53 R@1 without $lf$. Further analysis suggests that CoFFTEA can better model the relationships between frame and frame, as well as target and target. The code for our approach is available at https://github.com/pkunlp-icler/COFFTEA.", "keywords": "Frame Identification;Frame Semantics;Contrastive Learning;Metric Learing", "primary_area": "", "supplementary_material": "", "author": "Kaikai An;Ce Zheng;Bofei Gao;Haozhe Zhao;Baobao Chang", "authorids": "~Kaikai_An1;~Ce_Zheng2;~Bofei_Gao1;~Haozhe_Zhao1;~Baobao_Chang1", "gender": "M;M;M;M;M", "homepage": "https://github.com/kkk-an;;https://kbsdjames.github.io;;http://eecs.pku.edu.cn/EN/People/Faculty/Detail/?ID=6027", "dblp": ";99/6967;330/2755;299/7199;91/6051", "google_scholar": "6TrBRiEAAAAJ;r7qFs7UAAAAJ;;skIXywUAAAAJ;LaKNyhQAAAAJ", "or_profile": "~Kaikai_An1;~Ce_Zheng2;~Bofei_Gao1;~Haozhe_Zhao1;~Baobao_Chang1", "aff": "Peking University;Peking University;Beijing University of Posts and Telecommunications;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;bupt.edu.cn;pku.edu.cn;pku.edu.cn", "position": "MS student;MS student;Undergrad student;MS student;Associate Professor", "bibtex": "@inproceedings{\nan2023coarsetofine,\ntitle={Coarse-to-Fine Dual Encoders are Better Frame Identification Learners},\nauthor={Kaikai An and Ce Zheng and Bofei Gao and Haozhe Zhao and Baobao Chang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=9NGR4GdLII}\n}", "github": "", "project": "", "reviewers": "nLkN;9bh3;wf6g", "site": "https://openreview.net/forum?id=9NGR4GdLII", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;1;3", "excitement": "4;3;4", "reproducibility": "4;3;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 2.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-0502-4426;0000-0003-2824-6750", "linkedin": ";;;;", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Peking University;Beijing University of Posts and Telecommunications", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;http://www.bupt.edu.cn/", "aff_unique_abbr": "Peking U;BUPT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "9OPtgQlxVD", "title": "BioDEX: Large-Scale Biomedical Adverse Drug Event Extraction for Real-World Pharmacovigilance", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Timely and accurate extraction of Adverse Drug Events (ADE) from biomedical literature is paramount for public safety, but involves slow and costly manual labor. We set out to improve drug safety monitoring (pharmacovigilance, PV) through the use of Natural Language Processing (NLP). We introduce BioDEX, a large-scale resource for Biomedical adverse Drug Event eXtraction, rooted in the historical output of drug safety reporting in the U.S. BioDEX consists of 65k abstracts and 19k full-text biomedical papers with 256k associated document-level safety reports created by medical experts. The core features of these reports include the reported weight, age, and biological sex of a patient, a set of drugs taken by the patient, the drug dosages, the reactions experienced, and whether the reaction was life threatening. In this work, we consider the task of predicting the core information of the report given its originating paper. We estimate human performance to be 72.0% F1, whereas our best model achieves 59.1% F1 (62.3 validation), indicating significant headroom. We also begin to explore ways in which these models could help professional PV reviewers. Our code and data are available at https://github.com/KarelDO/BioDEX.", "keywords": "pharmacovigilance;datasets;healthcare;adverse drug events", "primary_area": "", "supplementary_material": "", "author": "Karel D'Oosterlinck;Fran\u00e7ois Remy;Johannes Deleu;Thomas Demeester;Chris Develder;Klim Zaporojets;Aneiss Ghodsi;Simon Ellershaw;Jack Collins;Christopher Potts", "authorids": "~Karel_D'Oosterlinck1;~Fran\u00e7ois_Remy1;~Johannes_Deleu1;~Thomas_Demeester1;~Chris_Develder1;~Klim_Zaporojets1;~Aneiss_Ghodsi1;~Simon_Ellershaw1;~Jack_Collins3;~Christopher_Potts1", "gender": ";M;;M;M;M;;M;M;M", "homepage": "https://www.kareldoosterlinck.com/;http://fremycompany.com/;;;https://users.ugent.be/~cdvelder;http://klimzaporojets.github.io/;https://www.linkedin.com/in/aneiss/;;;http://web.stanford.edu/~cgpotts/", "dblp": ";;84/7629;;74/1846;;;292/3662.html;;13/2617", "google_scholar": ";pyCw7HcAAAAJ;;;https://scholar.google.com/citations?hl=en;oFjUJvwAAAAJ;;;;3j08YoAAAAAJ", "or_profile": "~Karel_D'Oosterlinck1;~Fran\u00e7ois_Remy1;~Johannes_Deleu1;~Thomas_Demeester1;~Chris_Develder1;~Klim_Zaporojets1;~Aneiss_Ghodsi1;~Simon_Ellershaw1;~Jack_Collins3;~Christopher_Potts1", "aff": "Stanford University;Universiteit Gent;Universiteit Gent;Ghent University - imec;Universiteit Gent;Aarhus University;Parexel;University College London, University of London;;Stanford University", "aff_domain": "stanford.edu;ugent.be;ugent.be;ugent.be;ugent.be;au.dk;parexel.com;ucl.ac.uk;;stanford.edu", "position": "PhD student;PhD student;Researcher;Assistant Professor;Full Professor;Postdoc;Researcher;PhD student;;Full Professor", "bibtex": "@inproceedings{\nd'oosterlinck2023biodex,\ntitle={Bio{DEX}: Large-Scale Biomedical Adverse Drug Event Extraction for Real-World Pharmacovigilance},\nauthor={Karel D'Oosterlinck and Fran{\\c{c}}ois Remy and Johannes Deleu and Thomas Demeester and Chris Develder and Klim Zaporojets and Aneiss Ghodsi and Simon Ellershaw and Jack Collins and Christopher Potts},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=9OPtgQlxVD}\n}", "github": "", "project": "", "reviewers": "4Yca;aAaA;Qsyq", "site": "https://openreview.net/forum?id=9OPtgQlxVD", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "4;4;4", "reproducibility": "5;4;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 10, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-4775-6975;;0000-0002-9901-5768;0000-0003-2707-4176;0000-0003-4988-978X;;;;0000-0002-7978-6055", "linkedin": "karel-doosterlinck/;;;;chris-develder-7b979336/;klim-zaporojets-9102b0a/;aneiss/;simon-ellershaw-41327b154/;jackmpcollins;", "aff_unique_index": "0;1;1;2;1;3;4;5;0", "aff_unique_norm": "Stanford University;University of Ghent;Ghent University;Aarhus University;Parexel;University College London", "aff_unique_dep": ";;imec;;;", "aff_unique_url": "https://www.stanford.edu;https://www.ugent.be/en;https://www.ugent.be/en;https://au.dk;https://www.parexel.com;https://www.ucl.ac.uk", "aff_unique_abbr": "Stanford;UGent;UGent;AU;;UCL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;1;1;1;1;2;0;3;0", "aff_country_unique": "United States;Belgium;Denmark;United Kingdom" }, { "id": "9RFBVLwiOn", "title": "SEER : A Knapsack approach to Exemplar Selection for In-Context HybridQA", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Question answering over hybrid contexts is a complex task, which requires the combination of information extracted from unstructured texts and structured tables in various ways. Recently, In-Context Learning demonstrated significant performance advances for reasoning tasks. In this paradigm, a large language model performs predictions based on a small set of supporting exemplars. The performance of In-Context Learning depends heavily on the selection procedure of the supporting exemplars, particularly in the case of HybridQA, where considering the diversity of reasoning chains and the large size of the hybrid contexts becomes crucial. In this work, we present Selection of ExEmplars for hybrid Reasoning (SEER), a novel method for selecting a set of exemplars that is both representative and diverse. The key novelty of SEER is that it formulates exemplar selection as a Knapsack Integer Linear Program. The Knapsack framework provides the flexibility to incorporate diversity constraints that prioritize exemplars with desirable attributes, and capacity constraints that ensure that the prompt size respects the provided capacity budgets. The effectiveness of SEER is demonstrated on FinQA and TAT-QA, two real-world benchmarks for HybridQA, where it outperforms previous exemplar selection methods.", "keywords": "Hybrid Question Answering;In-Context Learning;Integer Linear Programming", "primary_area": "", "supplementary_material": "", "author": "Jonathan Tonglet;Manon Reusens;Philipp Borchert;Bart Baesens", "authorids": "~Jonathan_Tonglet1;~Manon_Reusens1;~Philipp_Borchert1;~Bart_Baesens1", "gender": "M;F;M;M", "homepage": "https://jtonglet.github.io/;;https://icma.ieseg.fr/philipp-borchert/;https://bluecourses.com", "dblp": "358/9311;320/3006;338/1017;43/4264", "google_scholar": "QOEvEWYAAAAJ;https://scholar.google.be/citations?hl=nl;efKKfygAAAAJ;IC7ghFwAAAAJ", "or_profile": "~Jonathan_Tonglet1;~Manon_Reusens1;~Philipp_Borchert1;~Bart_Baesens1", "aff": "KU Leuven;KU Leuven;KU Leuven;KU Leuven", "aff_domain": "kuleuven.be;kuleuven.be;kuleuven.be;kuleuven.be", "position": "MS student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\ntonglet2023seer,\ntitle={{SEER} : A Knapsack approach to Exemplar Selection for In-Context Hybrid{QA}},\nauthor={Jonathan Tonglet and Manon Reusens and Philipp Borchert and Bart Baesens},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=9RFBVLwiOn}\n}", "github": "", "project": "", "reviewers": "Fa3v;aS63;dH6X", "site": "https://openreview.net/forum?id=9RFBVLwiOn", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;3;4", "reproducibility": "3;4;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0005-7576-4659;;0000-0002-5533-4281;0000-0002-5831-5668", "linkedin": "jonathan-tonglet/;;;bart-baesens-403bb83/", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Katholieke Universiteit Leuven", "aff_unique_dep": "", "aff_unique_url": "https://www.kuleuven.be", "aff_unique_abbr": "KU Leuven", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Belgium" }, { "id": "9RugvdmIBa", "title": "PARROT: Zero-Shot Narrative Reading Comprehension via Parallel Reading", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Narrative comprehension is a challenging task that requires a deep understanding of the foundational elements of narratives. Acquiring this skill requires extensive annotated data. To mitigate the burden of data annotation, we present Parrot, a zero-shot approach for narrative reading comprehension through parallel reading, which involves two parallel narratives that tell the same story. By leveraging one narrative as a source of supervision signal to guide the understanding of the other, Parrot abstracts the textual content and develops genuine narrative understanding. Evaluation conducted on two narrative comprehension benchmarks demonstrates that Parrot surpasses previous zero-shot approaches and achieves comparable performance to fully supervised models. The code will be available at https://github.com/zhaochaocs/Parrot.", "keywords": "narrative reading comprehension;zero-shot learning", "primary_area": "", "supplementary_material": "", "author": "Chao Zhao;Anvesh Rao Vijjini;Snigdha Chaturvedi", "authorids": "~Chao_Zhao3;~Anvesh_Rao_Vijjini2;~Snigdha_Chaturvedi2", "gender": "M;M;F", "homepage": "https://zhaochaocs.github.io/;https://nvshrao.github.io;https://sites.google.com/site/snigdhac/", "dblp": ";222/9403.html;77/8700", "google_scholar": "https://scholar.google.co.jp/citations?user=QGfqQtAAAAAJ;https://scholar.google.co.in/citations?user=Byn_NGYAAAAJ;gZD3EesAAAAJ", "or_profile": "~Chao_Zhao3;~Anvesh_Rao_Vijjini2;~Snigdha_Chaturvedi2", "aff": "University of North Carolina, Chapel Hill;Department of Computer Science, University of North Carolina, Chapel Hill;Department of Computer Science, University of North Carolina, Chapel Hill", "aff_domain": "unc.edu;cs.unc.edu;cs.unc.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhao2023parrot,\ntitle={{PARROT}: Zero-Shot Narrative Reading Comprehension via Parallel Reading},\nauthor={Chao Zhao and Anvesh Rao Vijjini and Snigdha Chaturvedi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=9RugvdmIBa}\n}", "github": "", "project": "", "reviewers": "z7i5;bdc5;jhYe", "site": "https://openreview.net/forum?id=9RugvdmIBa", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;4;4", "excitement": "3;3;4", "reproducibility": "3;4;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";anvesh-rao/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of North Carolina", "aff_unique_dep": "", "aff_unique_url": "https://www.unc.edu", "aff_unique_abbr": "UNC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Chapel Hill", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "9S0MFwEkc3", "title": "Rethinking the Construction of Effective Metrics for Understanding the Mechanisms of Pretrained Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Pretrained language models are expected to effectively map input text to a set of vectors while preserving the inherent relationships within the text. Consequently, designing a white-box model to compute metrics that reflect the presence of specific internal relations in these vectors has become a common approach for post-hoc interpretability analysis of pretrained language models. However, achieving interpretability in white-box models and ensuring the rigor of metric computation becomes challenging when the source model lacks inherent interpretability. Therefore, in this paper, we discuss striking a balance in this trade-off and propose a novel line to constructing metrics for understanding the mechanisms of pretrained language models. We have specifically designed a family of metrics along this line of investigation, and the model used to compute these metrics is referred to as the tree topological probe. We conducted measurements on BERT-large by using these metrics. Based on the experimental results, we propose a speculation regarding the working mechanism of BERT-like pretrained language models, as well as a strategy for enhancing fine-tuning performance by leveraging the topological probe to improve specific submodules.", "keywords": "Metrics;Probe;Mechanisms;Pretrained Language Models", "primary_area": "", "supplementary_material": "", "author": "You Li;Jinhui Yin;Yuming Lin", "authorids": "~You_Li3;~Jinhui_Yin1;~Yuming_Lin1", "gender": "F;;M", "homepage": "https://www.guet.edu.cn/people/info/1003/2376.htm;;", "dblp": ";;42/166", "google_scholar": ";;", "or_profile": "~You_Li3;~Jinhui_Yin1;~Yuming_Lin1", "aff": ";;Guilin University of Electronic Technology", "aff_domain": ";;guet.edu.cn", "position": ";;Full Professor", "bibtex": "@inproceedings{\nli2023rethinking,\ntitle={Rethinking the Construction of Effective Metrics for Understanding the Mechanisms of Pretrained Language Models},\nauthor={You Li and Jinhui Yin and Yuming Lin},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=9S0MFwEkc3}\n}", "github": "", "project": "", "reviewers": "cpmL;RAmS;QnWV", "site": "https://openreview.net/forum?id=9S0MFwEkc3", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;3", "excitement": "3;3;5", "reproducibility": "3;3;5", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0", "aff_unique_norm": "Guilin University of Electronic Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.gliet.edu.cn/", "aff_unique_abbr": "", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "9V0M45lJAs", "title": "Towards a Better Understanding of Variations in Zero-Shot Neural Machine Translation Performance", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Multilingual Neural Machine Translation (MNMT) facilitates knowledge sharing but often suffers from poor zero-shot (ZS) translation qualities. While prior work has explored the causes of overall low zero-shot translation qualities, our work introduces a fresh perspective: the presence of significant variations in zero-shot performance. This suggests that MNMT does not uniformly exhibit poor zero-shot capability; instead, certain translation directions yield reasonable results. Through systematic experimentation, spanning 1,560 language directions across 40 languages, we identify three key factors contributing to high variations in ZS NMT performance: 1) target-side translation quality, 2) vocabulary overlap, and 3) linguistic properties. Our findings highlight that the target side translation quality is the most influential factor, with vocabulary overlap consistently impacting zero-shot capabilities. Additionally, linguistic properties, such as language family and writing system, play a role, particularly with smaller models. Furthermore, we suggest that the off-target issue is a symptom of inadequate performance, emphasizing that zero-shot translation challenges extend beyond addressing the off-target problem. To support future research, we release the data and models as a benchmark for the study of ZS NMT.", "keywords": "Zero-shot Machine Translation;Multilingual Machine Translation", "primary_area": "", "supplementary_material": "", "author": "Shaomu Tan;Christof Monz", "authorids": "~Shaomu_Tan1;~Christof_Monz1", "gender": "Non-Binary;M", "homepage": "https://smu-tan.github.io/;https://staff.fnwi.uva.nl/c.monz/", "dblp": "336/3005;m/ChristofMonz", "google_scholar": "KJRzX-gAAAAJ;0r3PWLQAAAAJ", "or_profile": "~Shaomu_Tan1;~Christof_Monz1", "aff": "University of Amsterdam;University of Amsterdam, University of Amsterdam", "aff_domain": "uva.nl;ivi.uva.nl", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\ntan2023towards,\ntitle={Towards a Better Understanding of Variations in Zero-Shot Neural Machine Translation Performance},\nauthor={Shaomu Tan and Christof Monz},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=9V0M45lJAs}\n}", "github": "", "project": "", "reviewers": "3cCd;2RvR;jc8P", "site": "https://openreview.net/forum?id=9V0M45lJAs", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;3;3", "reproducibility": "5;4;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": "shaomutan/;", "aff_unique_index": "0;0", "aff_unique_norm": "University of Amsterdam", "aff_unique_dep": "", "aff_unique_url": "https://www.uva.nl", "aff_unique_abbr": "UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Netherlands" }, { "id": "9cALtYoAEy", "title": "Vector-Quantized Prompt Learning for Paraphrase Generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Deep generative modeling of natural languages has achieved many successes, such as\nproducing fluent sentences and translating from one language into another. However, the development of generative modeling techniques for paraphrase generation still lags behind largely due to the challenges in addressing the complex conflicts between expression diversity and semantic preservation. This paper proposes to generate diverse and high-quality paraphrases by exploiting the pre-trained models with instance-dependent prompts. To learn generalizable prompts, we assume that the number of abstract transforming patterns of paraphrase generation (governed by prompts) is finite and usually not large. Therefore, we present vector-quantized prompts as the cues to control the generation of pre-trained models.\nExtensive experiments demonstrate that the proposed method achieves new state-of-art results on three benchmark datasets, including Quora, Wikianswers, and MSCOCO. We will release all the code upon acceptance.", "keywords": "Natural Language Processing;Natural Language Generation;Text generation;Paraphrase", "primary_area": "", "supplementary_material": "", "author": "Haotian Luo;Yixin Liu;Peidong Liu;Xianggen Liu", "authorids": "~Haotian_Luo1;~Yixin_Liu5;~Peidong_Liu4;~Xianggen_Liu1", "gender": "M;F;;M", "homepage": "https://github.com/StarDewXXX;https://github.com/lyxcode1;https://github.com/HUGHNew;", "dblp": "292/6755;;;150/5942", "google_scholar": ";;;qxNzQfQAAAAJ", "or_profile": "~Haotian_Luo1;~Yixin_Liu5;~Peidong_Liu4;~Xianggen_Liu1", "aff": "Sichuan University;Sichuan University;Sichuan University;Sichuan University", "aff_domain": "scu.edu.cn;scu.edu.cn;scu.edu.cn;scu.edu.cn", "position": "Undergrad student;MS student;Undergrad student;Associate Professor", "bibtex": "@inproceedings{\nluo2023vectorquantized,\ntitle={Vector-Quantized Prompt Learning for Paraphrase Generation},\nauthor={Haotian Luo and Yixin Liu and Peidong Liu and Xianggen Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=9cALtYoAEy}\n}", "github": "", "project": "", "reviewers": "CUyp;j1cc;F6VU;PqsA", "site": "https://openreview.net/forum?id=9cALtYoAEy", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;3;3;2", "excitement": "2;4;4;3", "reproducibility": "3;3;3;3", "correctness": "3;4;3;3", "rating_avg": 3.0, "confidence_avg": 2.75, "excitement_avg": 3.25, "reproducibility_avg": 3.0, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Sichuan University", "aff_unique_dep": "", "aff_unique_url": "https://www.scu.edu.cn", "aff_unique_abbr": "SCU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "9edEJfhOFL", "title": "Towards Unsupervised Recognition of Token-level Semantic Differences in Related Documents", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Automatically highlighting words that cause semantic differences between two documents could be useful for a wide range of applications. We formulate recognizing semantic differences (RSD) as a token-level regression task and study three unsupervised approaches that rely on a masked language model. To assess the approaches, we begin with basic English sentences and gradually move to more complex, cross-lingual document pairs. Our results show that an approach based on word alignment and sentence-level contrastive learning has a robust correlation to gold labels. However, all unsupervised approaches still leave a large margin of improvement.", "keywords": "recognizing semantic differences;semantic similarity;multilinguality", "primary_area": "", "supplementary_material": "", "author": "Jannis Vamvas;Rico Sennrich", "authorids": "~Jannis_Vamvas1;~Rico_Sennrich1", "gender": ";M", "homepage": ";http://cl.uzh.ch/sennrich", "dblp": ";00/8341.html", "google_scholar": ";https://scholar.google.ch/citations?user=XTpJvCgAAAAJ", "or_profile": "~Jannis_Vamvas1;~Rico_Sennrich1", "aff": ";University of Zurich", "aff_domain": ";uzh.ch", "position": ";Assistant Professor", "bibtex": "@inproceedings{\nvamvas2023towards,\ntitle={Towards Unsupervised Recognition of Token-level Semantic Differences in Related Documents},\nauthor={Jannis Vamvas and Rico Sennrich},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=9edEJfhOFL}\n}", "github": "", "project": "", "reviewers": "RPjL;4d51;uPJb", "site": "https://openreview.net/forum?id=9edEJfhOFL", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;2", "excitement": "3;3;3", "reproducibility": "4;4;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 9, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-1438-4741", "linkedin": ";", "aff_unique_index": "0", "aff_unique_norm": "University of Zurich", "aff_unique_dep": "", "aff_unique_url": "https://www.unizh.ch", "aff_unique_abbr": "UZH", "aff_country_unique_index": "0", "aff_country_unique": "Switzerland" }, { "id": "9qydIw5ux1", "title": "Ethical Reasoning over Moral Alignment: A Case and Framework for In-Context Ethical Policies in LLMs", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In this position paper, we argue that instead of morally aligning LLMs to specific set of ethical principles, we should infuse generic ethical reasoning capabilities into them so that they can handle value pluralism at a global scale.\nWhen provided with an ethical policy, an LLM should be capable of making decisions that are ethically consistent to the policy. We develop a framework that integrates moral dilemmas with moral principles pertaining to different foramlisms of normative ethics, and at different levels of abstractions. Initial experiments with GPT-x models shows that while GPT-4 is a nearly perfect ethical reasoner, the models still have bias towards the moral values of Western and English speaking societies.", "keywords": "Ethics in NLP;Ethical Reasoning;Value Pluralism;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "Abhinav Sukumar Rao;Aditi Khandelwal;Kumar Tanmay;Utkarsh Agarwal;Monojit Choudhury", "authorids": "~Abhinav_Sukumar_Rao1;~Aditi_Khandelwal1;~Kumar_Tanmay1;~Utkarsh_Agarwal1;~Monojit_Choudhury1", "gender": "M;F;M;M;M", "homepage": "https://abhinavrao.netlify.app;https://github.com/aditi184;;;https://mbzuai.ac.ae/study/faculty/monojit-choudhury/", "dblp": ";;279/3304;217/0988;29/5841", "google_scholar": "U_wk4ssAAAAJ;https://scholar.google.com/citations?view_op=list_works;vNKua5AAAAAJ;DtDZKeoAAAAJ;WR1ImCMAAAAJ", "or_profile": "~Abhinav_Sukumar_Rao1;~Aditi_Khandelwal1;~Kumar_Tanmay1;~Utkarsh_Agarwal1;~Monojit_Choudhury1", "aff": "Microsoft;Microsoft;Microsoft;Microsoft;Microsoft", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com", "position": "Researcher;Researcher;Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nrao2023ethical,\ntitle={Ethical Reasoning over Moral Alignment: A Case and Framework for In-Context Ethical Policies in {LLM}s},\nauthor={Abhinav Sukumar Rao and Aditi Khandelwal and Kumar Tanmay and Utkarsh Agarwal and Monojit Choudhury},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=9qydIw5ux1}\n}", "github": "", "project": "", "reviewers": "88UN;Namt;YU2p", "site": "https://openreview.net/forum?id=9qydIw5ux1", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;4;3", "excitement": "3;4;4", "reproducibility": "4;5;4", "correctness": "2;2;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0009-0006-3635-7206;;", "linkedin": "https://linkedin.com/in/abhinav-rao;aditi-khandelwal-991b1b19b/;kumar-tanmay-906203189/;utkarsh-agarwal-263a8116a/;monojit-choudhury-54225898/", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Corporation", "aff_unique_url": "https://www.microsoft.com", "aff_unique_abbr": "Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "9r8WwpJv7M", "title": "Ling-CL: Understanding NLP Models through Linguistic Curricula", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We employ a characterization of linguistic complexity from psycholinguistic and language acquisition research to develop data-driven curricula to understand the underlying linguistic knowledge that models learn to address NLP tasks. The novelty of our approach is in the development of linguistic curricula derived from data, existing knowledge about linguistic complexity, and model behavior during training. Through the evaluation of several benchmark NLP datasets, our curriculum learning approaches identify sets of linguistic metrics (indices) that inform the challenges and reasoning required to address each task. Our work will inform future research in all NLP areas, allowing linguistic complexity to be considered early in the research and development process. In addition, our work prompts an examination of gold standards and fair evaluation in NLP.", "keywords": "linguistic index;curriculum learning", "primary_area": "", "supplementary_material": "", "author": "Mohamed Elgaar;Hadi Amiri", "authorids": "~Mohamed_Elgaar2;~Hadi_Amiri1", "gender": ";Not Specified", "homepage": "https://mohdelgaar.github.io/;https://cs.uml.edu/~hadi/", "dblp": "270/4824;41/7403", "google_scholar": "xF9fh9MAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Mohamed_Elgaar2;~Hadi_Amiri1", "aff": "University of Massachusetts, Lowell;University of Massachusetts Lowell", "aff_domain": "uml.edu;uml.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nelgaar2023lingcl,\ntitle={Ling-{CL}: Understanding {NLP} Models through Linguistic Curricula},\nauthor={Mohamed Elgaar and Hadi Amiri},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=9r8WwpJv7M}\n}", "github": "", "project": "", "reviewers": "jVRL;5jzZ;7FZF", "site": "https://openreview.net/forum?id=9r8WwpJv7M", "pdf_size": 0, "rating": "4;4;4", "confidence": "1;4;3", "excitement": "4;4;4", "reproducibility": "2;4;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "University of Massachusetts Lowell", "aff_unique_dep": "", "aff_unique_url": "https://www.uml.edu", "aff_unique_abbr": "UMass Lowell", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lowell", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "9rWqOgvGpc", "title": "PTP: Boosting Stability and Performance of Prompt Tuning with Perturbation-Based Regularizer", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recent studies show that prompt tuning can better leverage the power of large language models than fine-tuning on downstream natural language understanding tasks. However, the existing prompt tuning methods have training instability issues, as the variance of scores under different random seeds is quite large. To address this critical problem, we first investigate and find that the loss landscape of vanilla prompt tuning is precipitous when it is visualized, where a slight change of input data can cause a big fluctuation in the loss landscape. This is an essential factor that leads to the instability of prompt tuning. Based on this observation, we introduce perturbation-based regularizers, which can smooth the loss landscape, into prompt tuning. We propose a new algorithm, called Prompt Tuning with Perturbation-based regularizer~(PTP), which can not only alleviate training instability dramatically but also boost the performance of prompt tuning. We design two kinds of perturbation-based regularizers, including random-noise-based and adversarial-based. In particular, our proposed perturbations are flexible on both text space and embedding space. Extensive experiments show the effectiveness of our proposed methods in stabilizing the training. Our new algorithms improve the state-of-the-art prompt tuning methods by 1.94\\% and 2.34\\% on SuperGLUE and FewGLUE benchmarks, respectively.", "keywords": "prompt tuning; training with regularizers", "primary_area": "", "supplementary_material": "", "author": "Lichang Chen;Jiuhai Chen;Heng Huang;Minhao Cheng", "authorids": "~Lichang_Chen2;~Jiuhai_Chen1;~Heng_Huang1;~Minhao_Cheng1", "gender": "M;M;M;M", "homepage": "https://www.linkedin.com/in/jiuhai-chen-6a486715a/;https://www.cs.umd.edu/~heng/;https://cmhcbb.github.io/;", "dblp": ";03/281;174/1717;151/6212", "google_scholar": ";4OqLaDwAAAAJ;_LkC1yoAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Jiuhai_Chen1;~Heng_Huang1;~Minhao_Cheng1;~LICHANG_CHEN1", "aff": "University of Maryland, College Park;University of Pittsburgh;Hong Kong University of Science and Technology;Department of Computer Science, University of Maryland, College Park", "aff_domain": "umd.edu;pitt.edu;ust.hk;cs.umd.edu", "position": "PhD student;Full Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nchen2023ptp,\ntitle={{PTP}: Boosting Stability and Performance of Prompt Tuning with Perturbation-Based Regularizer},\nauthor={Lichang Chen and Jiuhai Chen and Heng Huang and Minhao Cheng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=9rWqOgvGpc}\n}", "github": "", "project": "", "reviewers": "uMXZ;uB8g;RZJH", "site": "https://openreview.net/forum?id=9rWqOgvGpc", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;5", "excitement": "3;3;4", "reproducibility": "3;4;5", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 5, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-3965-4215;", "linkedin": ";;;lichang-chen-b7a506173/", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Maryland;University of Pittsburgh;Hong Kong University of Science and Technology;University of Maryland, College Park", "aff_unique_dep": ";;;Department of Computer Science", "aff_unique_url": "https://www/umd.edu;https://www.pitt.edu;https://www.ust.hk;https://www/umd.edu", "aff_unique_abbr": "UMD;Pitt;HKUST;UMD", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "College Park;;Hong Kong SAR", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;China" }, { "id": "9s7QooDInQ", "title": "SGP-TOD: Building Task Bots Effortlessly via Schema-Guided LLM Prompting", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Building and maintaining end-to-end task bots using minimal human effort is a long-standing challenge in dialog research. In this work, we introduce SGP-TOD, Schema-Guided Prompting for building Task-Oriented Dialog systems effortlessly based on large language models (LLMs). Utilizing the predefined task schema, i.e., belief instruction and dialog policy, we instruct fixed LLMs to generate appropriate responses on novel tasks, without the need for training data. Specifically, SGP-TOD comprises three components: an LLM for interacting with users, a Dialog State Tracking (DST) Prompter to aid the LLM in tracking dialog states with the given belief instruction, and a Policy Prompter to direct the LLM to generate proper responses adhering to the provided dialog policy. Experimental results on Multiwoz, RADDLE, and STAR datasets show that our training-free strategy, SGP-TOD, yields state-of-the-art (SOTA) zero-shot performance, significantly surpassing the few-shot approaches. In a domain-extension setting, SGP-TOD aptly adapts to new functionalities by merely adding supplementary schema rules. We make our code and data publicly available.", "keywords": "schema-guided LLM prompting;task bot;zero-shot dialog generation", "primary_area": "", "supplementary_material": "", "author": "Xiaoying Zhang;Baolin Peng;Kun LI;Jingyan Zhou;Helen M. Meng", "authorids": "~Xiaoying_Zhang2;~Baolin_Peng2;~Kun_LI6;~Jingyan_Zhou1;~Helen_M._Meng1", "gender": "F;M;;;F", "homepage": ";;;https://para-zhou.github.io;http://www.se.cuhk.edu.hk/people/academic-staff/prof-meng-mei-ling-helen/", "dblp": ";144/2759;;254/1808;92/3270", "google_scholar": ";u1CNjgwAAAAJ;;5U2DBhUAAAAJ;", "or_profile": "~Xiaoying_Zhang2;~Baolin_Peng2;~Kun_LI6;~Jingyan_Zhou1;~Helen_M._Meng1", "aff": "The Chinese University of Hong Kong;Tencent AI Lab;;The Chinese University of Hong Kong;The Chinese University of Hong Kong", "aff_domain": "cuhk.edu.hk;tencent.com;;cuhk.edu.hk;cuhk.edu.hk", "position": "PhD student;Researcher;;PhD student;Full Professor", "bibtex": "@inproceedings{\nzhang2023sgptod,\ntitle={{SGP}-{TOD}: Building Task Bots Effortlessly via Schema-Guided {LLM} Prompting},\nauthor={Xiaoying Zhang and Baolin Peng and Kun LI and Jingyan Zhou and Helen M. Meng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=9s7QooDInQ}\n}", "github": "", "project": "", "reviewers": "msez;t1ZP;5kY2", "site": "https://openreview.net/forum?id=9s7QooDInQ", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;4;4", "reproducibility": "3;4;4", "correctness": "2;4;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "xyzhang-cuhk/;;;;", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Chinese University of Hong Kong;Tencent", "aff_unique_dep": ";Tencent AI Lab", "aff_unique_url": "https://www.cuhk.edu.hk;https://ai.tencent.com", "aff_unique_abbr": "CUHK;Tencent AI Lab", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "9z2yznFVw5", "title": "Decomposed Prompt Tuning via Low-Rank Reparameterization", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "While prompt tuning approaches have achieved competitive performance with high efficiency, we observe that they invariably employ the same initialization process, wherein the soft prompt is either randomly initialized or derived from an existing embedding vocabulary.\nIn contrast to these conventional methods, this study aims to investigate an alternative way to derive soft prompt. Our empirical studies show that the soft prompt typically exhibits a low ``intrinsic rank'' characteristic. With such observations, we propose decomposed prompt tuning, a novel approach that utilizes low-rank matrices to initialize the soft prompt. Through the low-rank reparameterization, our method significantly reduces the number of trainable parameters while maintaining effectiveness. Experimental results on the SuperGLUE benchmark in both high-resource and low-resource scenarios demonstrate the effectiveness of the proposed method.", "keywords": "prompt tuning;parameter-efficient tuning", "primary_area": "", "supplementary_material": "", "author": "Yao Xiao;Lu Xu;Jiaxi Li;Wei Lu;Xiaoli Li", "authorids": "~Yao_Xiao8;~Lu_Xu2;~Jiaxi_Li3;~Wei_Lu10;~Xiaoli_Li1", "gender": "M;;F;M;M", "homepage": "https://statnlp-research.github.io/people/;https://xuuuluuu.github.io/;;https://personal.ntu.edu.sg/xlli/;https://istd.sutd.edu.sg/people/faculty/lu-wei", "dblp": ";83/4243-7;;l/XiaoliLi.html;98/6613-11.html", "google_scholar": ";https://scholar.google.com/citations?hl=en;;E3yQKloAAAAJ;n41KN9AAAAAJ", "or_profile": "~Yao_Xiao8;~Lu_Xu2;~Jiaxi_Li3;~Xiaoli_Li1;~Wei_Lu9", "aff": "Singapore University of Technology and Design;;Singapore University of Technology and Design;A*STAR;Singapore University of Technology and Design", "aff_domain": "sutd.edu.sg;;sutd.edu.sg;a-star.edu.sg;sutd.edu.sg", "position": "PhD student;;PhD student;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nxiao2023decomposed,\ntitle={Decomposed Prompt Tuning via Low-Rank Reparameterization},\nauthor={Yao Xiao and Lu Xu and Jiaxi Li and Wei Lu and Xiaoli Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=9z2yznFVw5}\n}", "github": "", "project": "", "reviewers": "4fcj;JCRp;1oRu", "site": "https://openreview.net/forum?id=9z2yznFVw5", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "3;3;4", "reproducibility": "4;4;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-0762-6562;0000-0003-0827-0382", "linkedin": ";;https://linkedin.com/in/jiaxi-li-725759195;li-xiaoli-41027ba/;wei-lu-59aa9615/", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Singapore University of Technology and Design;Agency for Science, Technology and Research", "aff_unique_dep": ";", "aff_unique_url": "https://www.sutd.edu.sg;https://www.a-star.edu.sg", "aff_unique_abbr": "SUTD;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore" }, { "id": "9zZWPEo8et", "title": "LogicAttack: Adversarial Attacks for Evaluating Logical Consistency of Natural Language Inference", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Recently Large Language Models (LLMs) such as GPT-3, ChatGPT, and FLAN have led to impressive progress in Natural Language Inference (NLI) tasks. However, these models may rely on simple heuristics or artifacts in the evaluation data to achieve their high performance, which suggests that they still suffer from logical inconsistency. To assess the logical consistency of these models, we propose a LogicAttack, a method to attack NLI models using diverse logical forms of premise and hypothesis, providing a more robust evaluation of their performance. Our approach leverages a range of inference rules from propositional logic, such as Modus Tollens and Bidirectional Dilemma, to generate effective adversarial attacks and identify common vulnerabilities across multiple NLI models. We achieve an average ~53% Attack Success Rate (ASR) across multiple logic-based attacks. Moreover, we demonstrate that incorporating generated attack samples into training enhances the logical reasoning ability of the target model and decreases its vulnerability to logic-based attacks. Data and source code are available at https://github.com/msantoshmadhav/LogicAttack.", "keywords": "Logical Reasoning;Large Language Models;Natural Language Inference;Adversarial Attacks", "primary_area": "", "supplementary_material": "", "author": "Mutsumi Nakamura;Santosh Mashetty;Mihir Parmar;Neeraj Varshney;Chitta Baral", "authorids": "~Mutsumi_Nakamura1;~Santosh_Mashetty1;~Mihir_Parmar1;~Neeraj_Varshney1;~Chitta_Baral1", "gender": ";M;M;M;M", "homepage": ";;;https://nrjvarshney.github.io/;http://chitta.orissalinks.com", "dblp": ";;253/6105;139/3970;b/ChittaBaral", "google_scholar": ";eQN-aNAAAAAJ;2UPwJC4AAAAJ;Ju9nR0IAAAAJ;9Yd716IAAAAJ", "or_profile": "~Mutsumi_Nakamura1;~Santosh_Mashetty1;~Mihir_Parmar1;~Neeraj_Varshney1;~Chitta_Baral1", "aff": ";Arizona State University;Arizona State University;Tencent AI Lab;Arizona State University", "aff_domain": ";asu.edu;asu.edu;tencent.com;asu.edu", "position": ";PhD student;PhD student;Intern;Full Professor", "bibtex": "@inproceedings{\nnakamura2023logicattack,\ntitle={LogicAttack: Adversarial Attacks for Evaluating Logical Consistency of Natural Language Inference},\nauthor={Mutsumi Nakamura and Santosh Mashetty and Mihir Parmar and Neeraj Varshney and Chitta Baral},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=9zZWPEo8et}\n}", "github": "", "project": "", "reviewers": "59xA;jb1M;n17E", "site": "https://openreview.net/forum?id=9zZWPEo8et", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;5", "excitement": "3;3;3", "reproducibility": "4;5;4", "correctness": "2;3;3", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 2.6666666666666665, "replies_avg": 5, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-7549-723X", "linkedin": ";santoshmashetty/;mihir-parmar-b44003157/;neerajvarshney97/;chitta-baral-8a8438b", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Arizona State University;Tencent", "aff_unique_dep": ";Tencent AI Lab", "aff_unique_url": "https://www.asu.edu;https://ai.tencent.com", "aff_unique_abbr": "ASU;Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;China" }, { "id": "A0xVOahTiw", "title": "MaNtLE: Model-agnostic Natural Language Explainer", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Understanding the internal reasoning behind the predictions of machine learning systems is increasingly vital, given their rising adoption and acceptance. While previous approaches, such as LIME generate algorithmic explanations by attributing importance to input features for individual examples, recent research indicates that practitioners prefer examining language explanations that explain sub-groups of examples (Lakkaraju et al., 2022). In this paper, we introduce MaNtLE, a model-agnostic natural language explainer that analyzes a set of classifier predictions and generates faithful natural language explanations of classifier rationale for structured classification tasks. MaNtLE uses multi-task training on thousands of synthetic classification tasks to generate faithful explanations. Our experiments indicate that, on average, MaNtLE-generated explanations are at least 11% more faithful compared to LIME and Anchors explanations across three tasks. Human evaluations demonstrate that users can better predict model behavior using explanations from MaNtLE compared to other techniques.", "keywords": "explainable AI;interpretability", "primary_area": "", "supplementary_material": "", "author": "Rakesh R Menon;Kerem Zaman;Shashank Srivastava", "authorids": "~Rakesh_R_Menon3;~Kerem_Zaman1;~Shashank_Srivastava1", "gender": "M;M;M", "homepage": "http://keremzaman.github.io/;https://www.ssriva.com/;https://cs.unc.edu/~rrmenon", "dblp": "318/2866;;206/6504.html", "google_scholar": "https://scholar.google.co.in/citations?user=sQ_npIsAAAAJ;-vKI5s0AAAAJ;GyFb98kAAAAJ", "or_profile": "~Kerem_Zaman1;~Shashank_Srivastava1;~Rakesh_R_Menon2", "aff": "Department of Computer Science, University of North Carolina at Chapel Hill;University of North Carolina at Chapel Hill;Department of Computer Science, University of North Carolina, Chapel Hill", "aff_domain": "cs.unc.edu;unc.edu;cs.unc.edu", "position": "PhD student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nmenon2023mantle,\ntitle={MaNt{LE}: Model-agnostic Natural Language Explainer},\nauthor={Rakesh R Menon and Kerem Zaman and Shashank Srivastava},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=A0xVOahTiw}\n}", "github": "", "project": "", "reviewers": "CbXw;tLzw;vjwc;P3Ea", "site": "https://openreview.net/forum?id=A0xVOahTiw", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;3;4;3", "excitement": "4;3;3;2", "reproducibility": "3;3;3;2", "correctness": "4;3;3;3", "rating_avg": 3.0, "confidence_avg": 3.25, "excitement_avg": 3.0, "reproducibility_avg": 2.75, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;1;1", "aff_unique_norm": "University of North Carolina at Chapel Hill;University of North Carolina", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.unc.edu;https://www.unc.edu", "aff_unique_abbr": "UNC Chapel Hill;UNC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Chapel Hill", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "A2oBdekFgv", "title": "Dialogue Medical Information Extraction with Medical-Item Graph and Dialogue-Status Enriched Representation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The multi-turn doctor-patient dialogue includes rich medical knowledge, like the symptoms of the patient, the diagnosis and medication suggested by the doctor. If mined and represented properly, such medical knowledge can benefit a large range of clinical applications, including diagnosis assistance and medication recommendation. To derive structured knowledge from free text dialogues, we target a critical task: the Dialogue Medical Information Extraction (DMIE). DMIE aims to detect pre-defined clinical meaningful medical items (symptoms, surgery, etc.) as well as their statuses (positive, negative, etc.) from the dialogue. Existing approaches mainly formulate DMIE as a multi-label classification problem and ignore the relationships among medical items and statuses. Different from previous approaches, we propose a heterogeneous graph to model the relationship between items. We further propose two consecutive attention based modules to enrich the item representation with the dialogue and status. In this manner, we are able to model the relationships among medical items and statuses in the DMIE task. Experimental results on the public benchmark data set show that the proposed model outperforms previous works and achieves the state-of-the-art performance.", "keywords": "Dialogue medical information extraction;Multi-label text classification;Graph neural network;Natural language processing", "primary_area": "", "supplementary_material": "", "author": "Lei Gao;Xinnan Zhang;Xian Wu;Shen Ge;Yefeng Zheng", "authorids": "~Lei_Gao2;~Xinnan_Zhang1;~Xian_Wu1;~Shen_Ge1;~Yefeng_Zheng3", "gender": "M;M;Not Specified;M;M", "homepage": "https://github.com/zhangzuizui;;;https://github.com/gao-lex;https://en.westlake.edu.cn/faculty/yefeng-zheng.html", "dblp": ";03/5595;;;44/6510", "google_scholar": ";lslB5jkAAAAJ;;;vAIECxgAAAAJ", "or_profile": "~Xinnan_Zhang1;~Xian_Wu1;~Shen_Ge1;~Gao_Lei1;~Yefeng_Zheng2", "aff": "ByteDance Inc.;Tencent;Tencent;;Tencent Jarvis Lab", "aff_domain": "bytedance.com;tencent.com;tencent.com;;tencent.com", "position": "Researcher;Principal Researcher;Researcher;;Director", "bibtex": "@inproceedings{\ngao2023dialogue,\ntitle={Dialogue Medical Information Extraction with Medical-Item Graph and Dialogue-Status Enriched Representation},\nauthor={Lei Gao and Xinnan Zhang and Xian Wu and Shen Ge and Yefeng Zheng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=A2oBdekFgv}\n}", "github": "", "project": "", "reviewers": "hgH8;DNoq;4JHj", "site": "https://openreview.net/forum?id=A2oBdekFgv", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;4", "excitement": "3;4;3", "reproducibility": "4;4;4", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 12, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-1118-9710;;;0000-0003-2195-2847", "linkedin": "https://www.linkedin.cn/injobs/in/xinnan-zhang-993aa1227;;;;yefeng-zheng-bb45641/?originalSubdomain=cn", "aff_unique_index": "0;1;1;1", "aff_unique_norm": "ByteDance;Tencent", "aff_unique_dep": ";Tencent Holdings Limited", "aff_unique_url": "https://www.bytedance.com;https://www.tencent.com", "aff_unique_abbr": "ByteDance;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "A68W11vA8o", "title": "Skill-Based Few-Shot Selection for In-Context Learning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "*In-context learning* is the paradigm that adapts large language models to downstream tasks by providing a few examples.\n*Few-shot selection*---selecting appropriate examples for each test instance separately---is important for in-context learning.\nIn this paper, we propose **Skill-KNN**, a skill-based few-shot selection method for in-context learning.\nThe key advantages of Skill-KNN include: (1) it addresses the problem that existing methods based on pre-trained embeddings can be easily biased by surface natural language features that are not important for the target task; (2) it does not require training or fine-tuning of any models, making it suitable for frequently expanding or changing example banks.\nThe key insight is to optimize the inputs fed into the embedding model, rather than tuning the model itself.\nTechnically, Skill-KNN generates the skill-based descriptions for each test case and candidate example by utilizing a pre-processing few-shot prompting, thus eliminating unimportant surface features.\nExperimental results across five cross-domain semantic parsing datasets and six backbone models show that Skill-KNN significantly outperforms existing methods.", "keywords": "In-Context Learning;Few-Shot Selection;Semantic Parsing", "primary_area": "", "supplementary_material": "", "author": "Shengnan An;Bo Zhou;Zeqi Lin;Qiang Fu;Bei Chen;Nanning Zheng;Weizhu Chen;Jian-Guang Lou", "authorids": "~Shengnan_An1;~Bo_Zhou11;~Zeqi_Lin1;~Qiang_Fu7;~Bei_Chen3;~Nanning_Zheng1;~Weizhu_Chen1;~Jian-Guang_Lou1", "gender": "M;M;M;M;F;M;M;M", "homepage": "https://shengnanan.github.io/;https://www.cnblogs.com/smartisn/;https://www.microsoft.com/en-us/research/people/zelin/;;http://ml.cs.tsinghua.edu.cn/~beichen/;;https://www.microsoft.com/en-us/research/people/wzchen/;https://www.microsoft.com/en-us/research/people/jlou/", "dblp": "267/9518;;https://dblp.uni-trier.de/pid/155/4370.html;;;07/256-1;79/2536;37/1917", "google_scholar": "oPiRHWMAAAAJ;;;bwTLZSIAAAAJ;Po65v_MAAAAJ;https://scholar.google.com/citations?hl=zh-CN;LG_E-4EAAAAJ;alDxINIAAAAJ", "or_profile": "~Shengnan_An1;~Bo_Zhou11;~Zeqi_Lin1;~Qiang_Fu7;~Bei_Chen3;~Nanning_Zheng1;~Weizhu_Chen1;~Jian-Guang_Lou1", "aff": "Microsoft;Northeastern University;Microsoft Research;Microsoft;Microsoft;Xi'an Jiaotong University;Microsoft GenAI;Microsoft Research Asia", "aff_domain": "microsoft.com;neu.edu.cn;microsoft.com;microsoft.com;microsoft.com;xjtu.edu.cn;microsoft.com;microsoft.com", "position": "Intern;MS student;Researcher;Researcher;Researcher;Full Professor;Vice President;Principal Researcher", "bibtex": "@inproceedings{\nan2023skillbased,\ntitle={Skill-Based Few-Shot Selection for In-Context Learning},\nauthor={Shengnan An and Bo Zhou and Zeqi Lin and Qiang Fu and Bei Chen and Nanning Zheng and Weizhu Chen and Jian-Guang Lou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=A68W11vA8o}\n}", "github": "", "project": "", "reviewers": "khfp;zEH1;CEQW", "site": "https://openreview.net/forum?id=A68W11vA8o", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;4", "excitement": "4;3;3", "reproducibility": "4;4;4", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 9, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-5821-7267;;;;", "linkedin": ";;;qiang-fu-08301285/;;;;", "aff_unique_index": "0;1;0;0;0;2;0;0", "aff_unique_norm": "Microsoft;Northeastern University;Xi'an Jiao Tong University", "aff_unique_dep": "Microsoft Corporation;;", "aff_unique_url": "https://www.microsoft.com;https://www.northeastern.edu;https://www.xjtu.edu.cn", "aff_unique_abbr": "Microsoft;NEU;XJTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;0;0;0;1;0;1", "aff_country_unique": "United States;China" }, { "id": "A6FGmwsH7x", "title": "ByteSized32: A Corpus and Challenge Task for Generating Task-Specific World Models Expressed as Text Games", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In this work we investigate the capacity of language models to generate explicit, interpretable, and interactive world models of scientific and common-sense reasoning tasks. We operationalize this as a task of generating text games, expressed as hundreds of lines of Python code. To facilitate this task, we introduce ByteSized32, a corpus of 32 reasoning-focused text games totalling 20k lines of Python code. We empirically demonstrate that GPT-4 can use these games as templates for single-shot in-context learning, successfully producing runnable games on unseen topics in 28% of cases. When allowed to self-reflect on program errors, game runnability substantially increases to 58%. While evaluating simulation fidelity is labor intensive, we introduce a suite of automated metrics to assess game fidelity, technical validity, adherence to task specifications, and winnability, showing a high-degree of agreement with expert human ratings. We pose this as a challenge task to spur further development at the juncture of world modeling and code generation.", "keywords": "text games;code generation;simulation", "primary_area": "", "supplementary_material": "", "author": "Ruoyao Wang;Graham Todd;Xingdi Yuan;Ziang Xiao;Marc-Alexandre C\u00f4t\u00e9;Peter Jansen", "authorids": "~Ruoyao_Wang1;~Graham_Todd1;~Xingdi_Yuan2;~Ziang_Xiao1;~Marc-Alexandre_C\u00f4t\u00e92;~Peter_Jansen1", "gender": "M;;;;M;M", "homepage": "https://wsxzwps.github.io/;;;http://www.cognitiveai.org;https://www.microsoft.com/en-us/research/people/macote;https://xingdi-eric-yuan.github.io/", "dblp": ";;196;72/5962;118/9636;40/10147", "google_scholar": ";NyBWyCIAAAAJ;MjkODLEAAAAJ;wc1Hbl8AAAAJ;https://scholar.google.ca/citations?user=L83CE5gAAAAJ;hYfE-B8AAAAJ", "or_profile": "~Ruoyao_Wang1;~Graham_Todd1;~Ziang_Xiao1;~Peter_Jansen1;~Marc-Alexandre_Cote1;~Eric_Yuan1", "aff": "University of Arizona;New York University;Department of Computer Science, Whiting School of Engineering;University of Arizona;Microsoft;Microsoft Research", "aff_domain": "arizona.edu;nyu.edu;cs.jhu.edu;arizona.edu;microsoft.com;microsoft.com", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor;Principal Researcher;Senior Researcher", "bibtex": "@inproceedings{\nwang2023bytesized,\ntitle={ByteSized32: A Corpus and Challenge Task for Generating Task-Specific World Models Expressed as Text Games},\nauthor={Ruoyao Wang and Graham Todd and Xingdi Yuan and Ziang Xiao and Marc-Alexandre C{\\^o}t{\\'e} and Peter Jansen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=A6FGmwsH7x}\n}", "github": "", "project": "", "reviewers": "sjFC;QfRB;nYh3", "site": "https://openreview.net/forum?id=A6FGmwsH7x", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "4;4;4", "reproducibility": "3;5;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";;;;;", "aff_unique_index": "0;1;2;0;3;3", "aff_unique_norm": "University of Arizona;New York University;Johns Hopkins University;Microsoft", "aff_unique_dep": ";;Department of Computer Science;Microsoft Corporation", "aff_unique_url": "https://www.arizona.edu;https://www.nyu.edu;https://www.jhu.edu;https://www.microsoft.com", "aff_unique_abbr": "UA;NYU;JHU;Microsoft", "aff_campus_unique_index": "1", "aff_campus_unique": ";Baltimore", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "AAYXFyvNbr", "title": "Tokenization Consistency Matters for Generative Models on Extractive NLP Tasks", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Generative models have been widely applied to solve extractive tasks, where parts of the input is extracted to form the desired output, and achieved significant success. For example, in extractive question answering (QA), generative models have constantly yielded state-of-the-art results. In this work, we study the issue of tokenization inconsistency that is commonly neglected in training these models. This issue damages the extractive nature of these tasks after the input and output are tokenized inconsistently by the tokenizer, and thus leads to performance drop as well as hallucination. We propose a simple yet effective fix to this issue and conduct a case study on extractive QA. We show that, with consistent tokenization, the model performs better in both in-domain and out-of-domain datasets, with a notable average of +1.7 F1 gain when a BART model is trained on SQuAD and evaluated on 8 QA datasets. Further, the model converges faster, and becomes less likely to generate out-of-context answers. Our results demonstrate the need for increased scrutiny regarding how tokenization is done in extractive tasks and the benefits of consistent tokenization during training.", "keywords": "Tokenization;Question Answering", "primary_area": "", "supplementary_material": "", "author": "Kaiser Sun;Peng Qi;Yuhao Zhang;Lan Liu;William Yang Wang;zhiheng huang", "authorids": "~Kaiser_Sun1;~Peng_Qi1;~Yuhao_Zhang3;~Lan_Liu3;~William_Yang_Wang2;~zhiheng_huang4", "gender": "Non-Binary;;;F;M;M", "homepage": "http://kaiserwholearns.github.io/;https://qipeng.me;http://yuhao.im/;;;https://www.cs.ucsb.edu/~william/", "dblp": ";59/9474-3.html;139/5876-4;;;08/9282", "google_scholar": "cfWUuRAAAAAJ;quJME0oAAAAJ;2d-0ybAAAAAJ;;uW8JaBsAAAAJ;gf8Ms_8AAAAJ", "or_profile": "~Kaiser_Sun1;~Peng_Qi1;~Yuhao_Zhang3;~Lan_Liu3;~zhiheng_huang4;~William_Wang1", "aff": "FAIR;Amazon;Amazon AWS AI;Amazon;Amazon;UC Santa Barbara", "aff_domain": "meta.com;amazon.com;amazon.com;amazon.com;amazon.com;ucsb.edu", "position": "Researcher;Researcher;Scientist;scientist;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nsun2023tokenization,\ntitle={Tokenization Consistency Matters for Generative Models on Extractive {NLP} Tasks},\nauthor={Kaiser Sun and Peng Qi and Yuhao Zhang and Lan Liu and William Yang Wang and zhiheng huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=AAYXFyvNbr}\n}", "github": "", "project": "", "reviewers": "rr2n;Yyrb;gJMX;ty6n", "site": "https://openreview.net/forum?id=AAYXFyvNbr", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;3;4;4", "excitement": "4;3;3;4", "reproducibility": "3;4;5;4", "correctness": "3;4;4;4", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 3.5, "reproducibility_avg": 4.0, "correctness_avg": 3.75, "replies_avg": 13, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-2995-5129;;", "linkedin": "kaiser-sun-70858316b/;;;lan-liu-2516a0114/;;", "aff_unique_index": "0;1;1;1;1;2", "aff_unique_norm": "Meta;Amazon;University of California, Santa Barbara", "aff_unique_dep": "Facebook AI Research;Amazon.com, Inc.;", "aff_unique_url": "https://research.facebook.com;https://www.amazon.com;https://www.ucsb.edu", "aff_unique_abbr": "FAIR;Amazon;UCSB", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Barbara", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "AAnYBhWKRv", "title": "FOCUS: Effective Embedding Initialization for Monolingual Specialization of Multilingual Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Using model weights pretrained on a high-resource language as a warm start can reduce the need for data and compute to obtain high-quality language models for other, especially low-resource, languages.\nHowever, if we want to use a new tokenizer specialized for the target language, we cannot transfer the source model's embedding matrix.\nIn this paper, we propose FOCUS - **F**ast **O**verlapping Token **C**ombinations **U**sing **S**parsemax, a novel embedding initialization method that effectively initializes the embedding matrix for a new tokenizer based on information in the source model's embedding matrix.\nFOCUS represents newly added tokens as combinations of tokens in the overlap of the source and target vocabularies.\nThe overlapping tokens are selected based on semantic similarity in an auxiliary static token embedding space.\nWe focus our study on using the multilingual XLM-R as a source model and empirically show that FOCUS outperforms random initialization and previous work on language modeling and on a range of downstream tasks (NLI, QA, and NER).\nWe publish our model checkpoints and code on GitHub.", "keywords": "model transfer;multilingual;crosslingual", "primary_area": "", "supplementary_material": "", "author": "Konstantin Dobler;Gerard de Melo", "authorids": "~Konstantin_Dobler1;~Gerard_de_Melo3", "gender": "M;M", "homepage": "https://konstantindobler.me;http://gerard.demelo.org/", "dblp": "314/6525;86/1747", "google_scholar": "fJEat40AAAAJ;https://scholar.google.com.tw/citations?user=WCQXaGkAAAAJ", "or_profile": "~Konstantin_Dobler1;~Gerard_Melo1", "aff": "Hasso Plattner Institute;University of Potsdam", "aff_domain": "hpi.de;uni-potsdam.de", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\ndobler2023focus,\ntitle={{FOCUS}: Effective Embedding Initialization for Monolingual Specialization of Multilingual Models},\nauthor={Konstantin Dobler and Gerard de Melo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=AAnYBhWKRv}\n}", "github": "", "project": "", "reviewers": "s1L6;edfE;EJyH", "site": "https://openreview.net/forum?id=AAnYBhWKRv", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-2930-2059", "linkedin": ";gdemelo/", "aff_unique_index": "0;1", "aff_unique_norm": "Hasso Plattner Institute;University of Potsdam", "aff_unique_dep": ";", "aff_unique_url": "https://www.hpi.de;https://www.uni-potsdam.de", "aff_unique_abbr": "HPI;UP", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "AAuVIl8Aeo", "title": "Characterizing and Verifying Scientific Claims: Qualitative Causal Structure is All You Need", "track": "main", "status": "Long Main", "tldr": "", "abstract": "A scientific claim typically begins with the formulation of a research question or hypothesis, which is a tentative statement or proposition about a phenomenon or relationship between variables. Within the realm of scientific claim verification, considerable research efforts have been dedicated to attention architectures and leveraging the text comprehension capabilities of Pre-trained Language Models (PLMs), yielding promising performances. However, these models overlook the causal structure information inherent in scientific claims, thereby failing to establish a comprehensive chain of causal inference. This paper delves into the exploration to highlight the crucial role of qualitative causal structure in characterizing and verifying scientific claims based on evidence. We organize the qualitative causal structure into a heterogeneous graph and propose a novel attention-based graph neural network model to facilitate causal reasoning across relevant causally-potent factors. Our experiments demonstrate that by solely utilizing the qualitative causal structure, the proposed model achieves comparable performance to PLM-based models. Furthermore, by incorporating semantic features, our model outperforms state-of-the-art approaches comprehensively.", "keywords": "Scientific Claim Verification;Heterogeneous Graph;reasoning", "primary_area": "", "supplementary_material": "", "author": "Jinxuan Wu;WenHan Chao;Xian Zhou;Zhunchen Luo", "authorids": "~Jinxuan_Wu2;~WenHan_Chao1;~Xian_Zhou1;~Zhunchen_Luo2", "gender": "M;M;F;M", "homepage": "https://vulndetector.github.io/;;https://dblp.org/pid/09/3140-3.html;https://dblp.org/pid/82/11518.html", "dblp": ";83/4112;09/3140-3;82/11518.html", "google_scholar": ";;;https://scholar.google.co.uk/citations?user=-4u9k60AAAAJ", "or_profile": "~Jinxuan_Wu2;~WenHan_Chao1;~Xian_Zhou1;~Zhunchen_Luo2", "aff": "Beihang University;Beihang University\uff0c Beijing, China;PLA Academy of Military Science;National University of Defense Technology", "aff_domain": "buaa.edu.cn;buaa.edu.cn;pla.edu.cn;nudt.edu.cn", "position": "MS student;Associate Professor;Associate Professor;Researcher", "bibtex": "@inproceedings{\nwu2023characterizing,\ntitle={Characterizing and Verifying Scientific Claims: Qualitative Causal Structure is All You Need},\nauthor={Jinxuan Wu and WenHan Chao and Xian Zhou and Zhunchen Luo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=AAuVIl8Aeo}\n}", "github": "", "project": "", "reviewers": "54T1;8wEx;7skt", "site": "https://openreview.net/forum?id=AAuVIl8Aeo", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;3;4", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-7465-1365;", "linkedin": ";;;", "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Beihang University;PLA Academy of Military Science;National University of Defense Technology", "aff_unique_dep": ";;", "aff_unique_url": "http://www.buaa.edu.cn/;;http://www.nudt.edu.cn/", "aff_unique_abbr": "BUAA;;NUDT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "ACogU4OVFK", "title": "From Speculation Detection to Trustworthy Relational Tuples in Information Extraction", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Speculation detection is an important NLP task to identify text factuality. However, the extracted speculative information (e.g., speculative polarity, cue, and scope) lacks structure and poses challenges for direct utilization in downstream tasks. Open Information Extraction (OIE), on the other hand, extracts structured tuples as facts, without examining the certainty of these tuples. Bridging this gap between speculation detection and information extraction becomes imperative to generate structured speculative information and trustworthy relational tuples. Existing studies on speculation detection are defined at sentence level; but even if a sentence is determined to be speculative, not all factual tuples extracted from it are speculative. In this paper, we propose to study speculations in OIE tuples and determine whether a tuple is speculative. We formally define the research problem of tuple-level speculation detection. We then conduct detailed analysis on the LSOIE dataset which provides labels for speculative tuples. Lastly, we propose a baseline model SpecTup for this new research task.", "keywords": "speculation detection;text classification;information extraction", "primary_area": "", "supplementary_material": "", "author": "Kuicai Dong;Aixin Sun;Jung-jae Kim;Xiaoli Li", "authorids": "~Kuicai_Dong1;~Aixin_Sun1;~Jung-jae_Kim1;~Xiaoli_Li1", "gender": "M;M;;M", "homepage": ";https://personal.ntu.edu.sg/axsun/;;https://personal.ntu.edu.sg/xlli/", "dblp": "292/3629;78/5155;21/4791;l/XiaoliLi.html", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.sg/citations?user=wyKGVKUAAAAJ;iMKgkrQAAAAJ;E3yQKloAAAAJ", "or_profile": "~Kuicai_Dong1;~Aixin_Sun1;~Jung-jae_Kim1;~Xiaoli_Li1", "aff": "Nanyang Technological University;Nanyang Technological University;A*STAR;A*STAR", "aff_domain": "e.ntu.edu.sg;ntu.edu.sg;a-star.edu.sg;a-star.edu.sg", "position": "PhD student;Associate Professor;Researcher;Principal Researcher", "bibtex": "@inproceedings{\ndong2023from,\ntitle={From Speculation Detection to Trustworthy Relational Tuples in Information Extraction},\nauthor={Kuicai Dong and Aixin Sun and Jung-jae Kim and Xiaoli Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ACogU4OVFK}\n}", "github": "", "project": "", "reviewers": "6jWm;Qo8K;hXdC;g5sr", "site": "https://openreview.net/forum?id=ACogU4OVFK", "pdf_size": 0, "rating": "2;2;2;2", "confidence": "3;5;3;3", "excitement": "3;4;3;3", "reproducibility": "5;4;3;3", "correctness": "4;3;2;3", "rating_avg": 2.0, "confidence_avg": 3.5, "excitement_avg": 3.25, "reproducibility_avg": 3.75, "correctness_avg": 3.0, "replies_avg": 12, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-0764-4258;;0000-0002-0762-6562", "linkedin": ";aixin-sun-%E5%AD%99%E7%88%B1%E6%AC%A3-43056622/;;li-xiaoli-41027ba/", "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Nanyang Technological University;Agency for Science, Technology and Research", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;https://www.a-star.edu.sg", "aff_unique_abbr": "NTU;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore" }, { "id": "AD0o090nDJ", "title": "An Exploration of Left-Corner Transformations", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The left-corner transformation (Rosenkrantz and Lewis, 1970) is used to remove left recursion from context-free grammars, which is an important step towards making the grammar parsable top-down with simple techniques. This paper generalizes prior left-corner transformations to support semiring-weighted production rules and to provide finer-grained control over which left corners may be moved. Our generalized left-corner transformation (GLCT) arose from unifying the left-corner transformation and speculation transformation (Eisner and Blatz, 2007), originally for logic programming. Our new transformation and speculation define equivalent weighted languages. Yet, their derivation trees are structurally different in an important way: GLCT replaces left recursion with right recursion, and speculation does not. We also provide several technical results regarding the formal relationships between the outputs of GLCT, speculation, and the original grammar. Lastly, we empirically investigate the efficiency of GLCT for left-recursion elimination from grammars of nine languages.\n\nCode: https://github.com/rycolab/left-corner", "keywords": "left-corner transformation;top-down parsing;left-recursion;grammar transformations;speculation;formal language theory;weighted CFG;left-corner transform", "primary_area": "", "supplementary_material": "", "author": "Andreas Opedal;Eleftheria Tsipidi;Tiago Pimentel;Ryan Cotterell;Tim Vieira", "authorids": "~Andreas_Opedal1;~Eleftheria_Tsipidi1;~Tiago_Pimentel1;~Ryan_Cotterell1;~Tim_Vieira1", "gender": "M;;M;;M", "homepage": "https://opedal.github.io/;;https://tpimentelms.github.io/;;http://timvieira.github.io", "dblp": "292/2838;;203/8292;;127/0214", "google_scholar": "https://scholar.google.de/citations?hl=en;;XjZ8NRsAAAAJ;;Avtv7FkAAAAJ", "or_profile": "~Andreas_Opedal1;~Eleftheria_Tsipidi1;~Tiago_Pimentel1;~Ryan_Cotterell1;~Tim_Vieira1", "aff": "Department of Computer Science, ETHZ - ETH Zurich;;University of Cambridge;;Johns Hopkins University", "aff_domain": "inf.ethz.ch;;cam.ac.uk;;johnshopkins.edu", "position": "PhD student;;PhD student;;PhD student", "bibtex": "@inproceedings{\nopedal2023an,\ntitle={An Exploration of Left-Corner Transformations},\nauthor={Andreas Opedal and Eleftheria Tsipidi and Tiago Pimentel and Ryan Cotterell and Tim Vieira},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=AD0o090nDJ}\n}", "github": "", "project": "", "reviewers": "WpEo;FNxN;tJ8c", "site": "https://openreview.net/forum?id=AD0o090nDJ", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "4;4;2", "reproducibility": "5;0;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 4.0, "replies_avg": 9, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-2043-1073", "linkedin": "andreasopedal;;;;tim-vieira-608b0396/", "aff_unique_index": "0;1;2", "aff_unique_norm": "ETH Zurich;University of Cambridge;Johns Hopkins University", "aff_unique_dep": "Department of Computer Science;;", "aff_unique_url": "https://www.ethz.ch;https://www.cam.ac.uk;https://www.jhu.edu", "aff_unique_abbr": "ETHZ;Cambridge;JHU", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Zurich;Cambridge;", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Switzerland;United Kingdom;United States" }, { "id": "ADHMUuN7CE", "title": "EXPLAIN, EDIT, GENERATE: Rationale-Sensitive Counterfactual Data Augmentation for Multi-hop Fact Verification", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Automatic multi-hop fact verification task has gained significant attention in recent years. Despite impressive results, these well-designed models perform poorly on out-of-domain data. One possible solution is to augment the training data with counterfactuals, which are generated by minimally altering the causal features of the original data. However, current counterfactual data augmentation techniques fail to handle multi-hop fact verification due to their incapability to preserve the complex logical relationships within multiple correlated texts. In this paper, we overcome this limitation by developing a rationale-sensitive method to generate linguistically diverse and label-flipping counterfactuals while preserving logical relationships. In specific, the diverse and fluent counterfactuals are generated via an Explain-Edit-Generate architecture. Moreover, the checking and filtering modules are proposed to regularize the counterfactual data with logical relations and flipped labels. Experimental results show that the proposed approach outperforms the SOTA baselines and can generate linguistically diverse counterfactual data without disrupting their logical relationships.", "keywords": "multi-hop fact verification;counterfactual data augmentation;out-of-domain generalization", "primary_area": "", "supplementary_material": "", "author": "Yingjie Zhu;Jiasheng Si;Yibo Zhao;Haiyang Zhu;Deyu Zhou;Yulan He", "authorids": "~Yingjie_Zhu1;~Jiasheng_Si1;~Yibo_Zhao3;~Haiyang_Zhu1;~Deyu_Zhou1;~Yulan_He1", "gender": "M;M;M;M;M;F", "homepage": "https://aaandy-zhu.github.io/;https://jasenchn.github.io/;http://palm.seu.edu.cn/homepage/zhaoyibo/index.html;http://palm.seu.edu.cn/homepage/zhuhaiyang/index.html;http://palm.seu.edu.cn/zhoudeyu/Home.html;https://www.kcl.ac.uk/people/yulan-he", "dblp": "11/10627;238/9187;;;79/2854;75/5430", "google_scholar": "https://scholar.google.com.hk/citations?user=fQdWCnAAAAAJ;https://scholar.google.com.hk/citations?user=mla8MA4AAAAJ;;;DvVelLcAAAAJ;https://scholar.google.co.uk/citations?user=SP9r32UAAAAJ", "or_profile": "~Yingjie_Zhu1;~Jiasheng_Si1;~Yibo_Zhao3;~Haiyang_Zhu1;~Deyu_Zhou1;~Yulan_He1", "aff": "Southeast University;Southeast University;Southeast University;Southeast University;Southeast University;King's College London, University of London", "aff_domain": "seu.edu.cn;seu.edu.cn;seu.edu.cn;seu.edu.cn;seu.edu.cn;kcl.ac.uk", "position": "MS student;PhD student;MS student;MS student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhu2023explain,\ntitle={{EXPLAIN}, {EDIT}, {GENERATE}: Rationale-Sensitive Counterfactual Data Augmentation for Multi-hop Fact Verification},\nauthor={Yingjie Zhu and Jiasheng Si and Yibo Zhao and Haiyang Zhu and Deyu Zhou and Yulan He},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ADHMUuN7CE}\n}", "github": "", "project": "", "reviewers": "gZJT;HUk1;gQhw", "site": "https://openreview.net/forum?id=ADHMUuN7CE", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;2;3", "excitement": "3;4;4", "reproducibility": "4;3;4", "correctness": "3;4;3", "rating_avg": 5.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0009-3078-1964;0000-0002-6870-5678;;;;0000-0003-3948-5845", "linkedin": ";;;;;yulan-he-277234a/?originalSubdomain=uk", "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Southeast University;King's College London", "aff_unique_dep": ";", "aff_unique_url": "https://www.seu.edu.cn/;https://www.kcl.ac.uk", "aff_unique_abbr": "SEU;KCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "China;United Kingdom" }, { "id": "ADsEdyI32n", "title": "LLMLingua: Compressing Prompts for Accelerated Inference of Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) have been applied in various applications due to their astonishing capabilities. With advancements in technologies such as chain-of-thought (CoT) prompting and in-context learning (ICL), the prompts fed to LLMs are becoming increasingly lengthy, even exceeding tens of thousands of tokens. To accelerate model inference and reduce cost, this paper presents LLMLingua, a coarse-to-fine prompt compression method that involves a budget controller to maintain semantic integrity under high compression ratios, a token-level iterative compression algorithm to better model the interdependence between compressed contents, and an instruction tuning based method for distribution alignment between language models. We conduct experiments and analysis over four datasets from different scenarios, i.e., GSM8K, BBH, ShareGPT, and Arxiv-March23; showing that the proposed approach yields state-of-the-art performance and allows for up to 20x compression with little performance loss.", "keywords": "Prompt Compression;LLMs;Inference Acceleration;Black-box LLMs;Efficient LLMs", "primary_area": "", "supplementary_material": "", "author": "Huiqiang Jiang;Qianhui Wu;Chin-Yew Lin;Yuqing Yang;Lili Qiu", "authorids": "~Huiqiang_Jiang2;~Qianhui_Wu1;~Chin-Yew_Lin1;~Yuqing_Yang1;~Lili_Qiu3", "gender": "M;F;M;;", "homepage": "https://hqjiang.com;https://qianhuiwu.github.io/;https://www.microsoft.com/en-us/research/people/cyl/;;https://www.microsoft.com/en-us/research/people/liliqiu/", "dblp": "204/2497;204/2307;64/6843;91/9064-1.html;", "google_scholar": "99KtvpYAAAAJ;BLZieokAAAAJ;cDF07aYAAAAJ;4BtNQAEAAAAJ;", "or_profile": "~Huiqiang_Jiang2;~Qianhui_Wu1;~Chin-Yew_Lin1;~Yuqing_Yang1;~Lili_Qiu3", "aff": "Microsoft;Microsoft;Microsoft;Microsoft Research;University of Texas at Austin", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;research.microsoft.com;utexas.edu", "position": "RSDE;Researcher;Senior Principal Research Manager;Researcher;Full Professor", "bibtex": "@inproceedings{\njiang2023llmlingua,\ntitle={{LLML}ingua: Compressing Prompts for Accelerated Inference of Large Language Models},\nauthor={Huiqiang Jiang and Qianhui Wu and Chin-Yew Lin and Yuqing Yang and Lili Qiu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ADsEdyI32n}\n}", "github": "", "project": "", "reviewers": "ZEXc;7WkN;89tu", "site": "https://openreview.net/forum?id=ADsEdyI32n", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "3;4;4", "reproducibility": "4;5;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-1327-4882;;;0000-0003-3518-5212;", "linkedin": ";qianhui-wu-2b1608b7?originalSubdomain=cn;chin-yew-lin-32585a4;;", "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Microsoft;University of Texas at Austin", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://www.utexas.edu", "aff_unique_abbr": "Microsoft;UT Austin", "aff_campus_unique_index": "1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "AEkFAAprvF", "title": "ViStruct: Visual Structural Knowledge Extraction via Curriculum Guided Code-Vision Representation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "State-of-the-art vision-language models (VLMs) still have limited performance in structural knowledge extraction, such as relations between objects. In this work, we present ViStruct, a training framework to learn VLMs for effective visual structural knowledge extraction. Two novel designs are incorporated. First, we propose to leverage the inherent structure of programming language to depict visual structural information. This approach enables explicit and consistent representation of visual structural information of multiple granularities, such as concepts, relations, and events, in a well-organized structured format. Second, we introduce curriculum-based learning for VLMs to progressively comprehend visual structures, from fundamental visual concepts to intricate event structures. Our intuition is that lower-level knowledge may contribute to complex visual structure understanding. Furthermore, we compile and release a collection of datasets tailored for visual structural knowledge extraction. We adopt a weakly-supervised approach to directly generate visual event structures from captions for ViStruct training, capitalizing on abundant image-caption pairs from the web. In experiments, we evaluate ViStruct on visual structure prediction tasks, demonstrating its effectiveness in improving the understanding of visual structures. The code will be made public to facilitate future research.", "keywords": "Visual Structural Knowledge Extraction;Code-Vision Representation;Curriculum-based Learning", "primary_area": "", "supplementary_material": "", "author": "Yangyi Chen;Xingyao Wang;Manling Li;Derek Hoiem;Heng Ji", "authorids": "~Yangyi_Chen1;~Xingyao_Wang1;~Manling_Li1;~Derek_Hoiem1;~Heng_Ji3", "gender": "M;M;F;M;F", "homepage": "https://yangyi-chen.github.io/;https://xwang.dev;https://limanling.github.io/;http://dhoiem.cs.illinois.edu/;http://blender.cs.illinois.edu/hengji.html", "dblp": "05/10083;264/9892;178/3620;08/6948;", "google_scholar": "https://scholar.google.com/citations?hl=en;F7qq3YcAAAAJ;6U4SXnUAAAAJ;8Sfj7q8AAAAJ;z7GCqT4AAAAJ", "or_profile": "~Yangyi_Chen1;~Xingyao_Wang1;~Manling_Li1;~Derek_Hoiem1;~Heng_Ji3", "aff": "Department of Computer Science, University of Illinois at Urbana-Champaign;Research, Google;University of Illinois, Urbana Champaign;Reconstruct;University of Illinois, Urbana-Champaign", "aff_domain": "cs.illinois.edu;research.google.com;illinois.edu;reconstructinc.com;uiuc.edu", "position": "PhD student;Intern;PhD student;Chief Scientist;Full Professor", "bibtex": "@inproceedings{\nchen2023vistruct,\ntitle={ViStruct: Visual Structural Knowledge Extraction via Curriculum Guided Code-Vision Representation},\nauthor={Yangyi Chen and Xingyao Wang and Manling Li and Derek Hoiem and Heng Ji},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=AEkFAAprvF}\n}", "github": "", "project": "", "reviewers": "YaB4;4qDb;CE6x;cMsY", "site": "https://openreview.net/forum?id=AEkFAAprvF", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "4;4;4;4", "excitement": "4;4;4;4", "reproducibility": "4;5;4;4", "correctness": "3;4;2;3", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 4.25, "correctness_avg": 3.0, "replies_avg": 13, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-3483-8624;;;", "linkedin": "yangyi-chen-4006a11b2/;;;;", "aff_unique_index": "0;1;0;3", "aff_unique_norm": "University of Illinois Urbana-Champaign;Google;;University of Illinois", "aff_unique_dep": "Department of Computer Science;Google Research;;", "aff_unique_url": "https://illinois.edu;https://research.google;;https://illinois.edu", "aff_unique_abbr": "UIUC;Google;;UIUC", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Urbana-Champaign;Mountain View;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States;" }, { "id": "AGVANImv7S", "title": "Systematic Assessment of Factual Knowledge in Large Language Models", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Previous studies have relied on existing question-answering benchmarks to evaluate the knowledge stored in large language models (LLMs). However, this approach has limitations regarding factual knowledge coverage, as it mostly focuses on generic domains which may overlap with the pretraining data. This paper proposes a framework to systematically assess the factual knowledge of LLMs by leveraging knowledge graphs (KGs). Our framework automatically generates a set of questions and expected answers from the facts stored in a given KG, and then evaluates the accuracy of LLMs in answering these questions. We systematically evaluate the state-of-the-art LLMs with KGs in generic and specific domains. The experiment shows that ChatGPT is consistently the top performer across all domains. We also find that LLMs performance depends on the instruction finetuning, domain and question complexity and is prone to adversarial context.", "keywords": "large language models;hallucination;knowledge graph", "primary_area": "", "supplementary_material": "", "author": "LINHAO LUO;Trang Vu;Dinh Phung;Gholamreza Haffari", "authorids": "~LINHAO_LUO1;~Thuy-Trang_Vu1;~Dinh_Phung2;~Gholamreza_Haffari2", "gender": "M;;M;M", "homepage": "https://rmanluo.github.io/;;https://research.monash.edu/en/persons/dinh-phung;https://rezahaffari.github.io/HomePage/HomePage.html", "dblp": "251/5530;228/5538;71/5859;", "google_scholar": "https://scholar.google.com.hk/citations?user=RO46HpcAAAAJ;https://scholar.google.com.au/citations?user=cx2eAe0AAAAJ;https://scholar.google.com.au/citations?user=OtA9SwIAAAAJ;https://scholar.google.com.tw/citations?user=Perjx5EAAAAJ", "or_profile": "~LINHAO_LUO1;~Thuy-Trang_Vu1;~Dinh_Phung1;~Gholamreza_Haffari1", "aff": "Monash University;Monash University;Monash University;Monash University", "aff_domain": "monash.edu;monash.edu;monash.edu;monash.edu", "position": "PhD student;Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\nluo2023systematic,\ntitle={Systematic Assessment of Factual Knowledge in Large Language Models},\nauthor={LINHAO LUO and Trang Vu and Dinh Phung and Gholamreza Haffari},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=AGVANImv7S}\n}", "github": "", "project": "", "reviewers": "ezC3;yo9m;HmbN;njva", "site": "https://openreview.net/forum?id=AGVANImv7S", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "5;4;4;4", "excitement": "3;2;2;3", "reproducibility": "4;5;3;3", "correctness": "3;2;3;3", "rating_avg": 3.0, "confidence_avg": 4.25, "excitement_avg": 2.5, "reproducibility_avg": 3.75, "correctness_avg": 2.75, "replies_avg": 13, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-0027-942X;;0000-0002-9977-8247;", "linkedin": "linhao-luo-36b489134/;;https://linkedin.com/in/dinh-phung-6b537a6;gholamrezahaffari/?originalSubdomain=au", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Monash University", "aff_unique_dep": "", "aff_unique_url": "https://www.monash.edu", "aff_unique_abbr": "Monash", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Australia" }, { "id": "AJDSZ2YVI6", "title": "PALS: Personalized Active Learning for Subjective Tasks in NLP", "track": "main", "status": "Long Main", "tldr": "", "abstract": "For subjective NLP problems, such as classification of hate speech, aggression, or emotions, personalized solutions can be exploited. Then, the learned models infer about the perception of the content independently for each reader. To acquire training data, texts are commonly randomly assigned to users for annotation, which is expensive and highly inefficient. Therefore, for the first time, we suggest applying an active learning paradigm in a personalized context to better learn individual preferences. It aims to alleviate the labeling effort by selecting more relevant training samples. In this paper, we present novel Personalized Active Learning techniques for Subjective NLP tasks (PALS) to either reduce the cost of the annotation process or to boost the learning effect. Our five new measures allow us to determine the relevance of a text in the context of learning users personal preferences. We validated them on three datasets: Wiki discussion texts individually labeled with aggression and toxicity, and on Unhealthy Conversations dataset. Our PALS techniques outperform random selection even by more than 30%. They can also be used to reduce the number of necessary annotations while maintaining a given quality level. Personalized annotation assignments based on our controversy measure decrease the amount of data needed to just 25%-40% of the initial size.", "keywords": "personalization;user modeling;active learning;natural language processing;subjective NLP tasks;subjective NLP", "primary_area": "", "supplementary_material": "", "author": "Kamil Kanclerz;Konrad Karanowski;Julita Bielaniewicz;Marcin Gruza;Piotr Mi\u0142kowski;Jan Kocon;Przemyslaw Kazienko", "authorids": "~Kamil_Kanclerz1;~Konrad_Karanowski1;~Julita_Bielaniewicz1;~Marcin_Gruza2;~Piotr_Mi\u0142kowski1;~Jan_Kocon1;~Przemyslaw_Kazienko1", "gender": "M;;F;M;;M;M", "homepage": "https://ai.pwr.edu.pl/author/kamil-kanclerz/;;;;;;https://kazienko.eu/en", "dblp": "277/8681;;279/1447;297/8679.html;;117/2896;k/PrzemyslawKazienko", "google_scholar": "eLnKhqkAAAAJ;;https://scholar.google.pl/citations?user=rAdCZncAAAAJ;;;pmQHb5IAAAAJ;https://scholar.google.pl/citations?user=cxLgNccAAAAJ", "or_profile": "~Kamil_Kanclerz1;~Konrad_Karanowski1;~Julita_Bielaniewicz1;~Marcin_Gruza2;~Piotr_Mi\u0142kowski1;~Jan_Kocon1;~Przemyslaw_Kazienko1", "aff": "Wroclaw University of Science and Technology;;Wroclaw University of Science and Technology;;;Wroclaw University of Science and Technology;Wroclaw University of Science and Technology", "aff_domain": "pwr.edu.pl;;pwr.edu.pl;;;pwr.edu.pl;pwr.edu.pl", "position": "PhD student;;PhD student;;;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nkanclerz2023pals,\ntitle={{PALS}: Personalized Active Learning for Subjective Tasks in {NLP}},\nauthor={Kamil Kanclerz and Konrad Karanowski and Julita Bielaniewicz and Marcin Gruza and Piotr Mi{\\l}kowski and Jan Kocon and Przemyslaw Kazienko},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=AJDSZ2YVI6}\n}", "github": "", "project": "", "reviewers": "FazH;66GU;X3Lx", "site": "https://openreview.net/forum?id=AJDSZ2YVI6", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "4;4;4", "reproducibility": "4;4;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-7375-7544;;my-orcid?orcid=0000-0003-3400-7721;;;my-orcid?orcid=0000-0002-7665-6896;0000-0001-5868-356X", "linkedin": "kamil-kanclerz/;;julita-bielaniewicz/;;;jankocon/;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Wroclaw University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.pwr.edu.pl", "aff_unique_abbr": "WUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Poland" }, { "id": "AQiuwWLvim", "title": "Conditioning on Dialog Acts improves Empathy Style Transfer", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We explore the role of dialog acts in style transfer, specifically empathy style transfer -- rewriting a sentence to make it more empathetic without changing its meaning. Specifically, we use two novel few-shot prompting strategies: target prompting, which only uses examples of the target style (unlike traditional prompting with source/target pairs), and dialog-act-conditioned prompting, which first estimates the dialog act of the source sentence and then makes it more empathetic using few-shot examples of the same dialog act. Our study yields two key findings: (1) Target prompting typically improves empathy more effectively while maintaining the same level of semantic similarity; (2) Dialog acts matter. Dialog-act-conditioned prompting enhances empathy while preserving both semantics and the dialog-act type. Different dialog acts benefit differently from different prompting methods, highlighting the need for further investigation of the role of dialog acts in style transfer.", "keywords": "empathy style transfer;text style transfer;empathy;GPT-4;large language models;dialog acts;pragmatics;prompt engineering;in-context learning;few-shot prompting", "primary_area": "", "supplementary_material": "", "author": "Renyi Qu;Lyle Ungar;Jo\u00e3o Sedoc", "authorids": "~Renyi_Qu1;~Lyle_Ungar1;~Jo\u00e3o_Sedoc1", "gender": "M;M;M", "homepage": ";http://www.cis.upenn.edu/~ungar/;", "dblp": ";u/LyleHUngar;", "google_scholar": ";https://scholar.google.com.tw/citations?user=KCiDjbkAAAAJ;vv355NgAAAAJ", "or_profile": "~Renyi_Qu1;~Lyle_Ungar1;~Jo\u00e3o_Sedoc1", "aff": "University of Pennsylvania;University of Pennsylvania;New York University", "aff_domain": "upenn.edu;upenn.edu;nyu.edu", "position": "MS student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nqu2023conditioning,\ntitle={Conditioning on Dialog Acts improves Empathy Style Transfer},\nauthor={Renyi Qu and Lyle Ungar and Jo{\\~a}o Sedoc},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=AQiuwWLvim}\n}", "github": "", "project": "", "reviewers": "rbqx;89A6;kZET", "site": "https://openreview.net/forum?id=AQiuwWLvim", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "3;4;2", "reproducibility": "4;3;4", "correctness": "4;3;2", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "renyi-qu-6645a8150/;;joao-sedoc-9085714/", "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Pennsylvania;New York University", "aff_unique_dep": ";", "aff_unique_url": "https://www.upenn.edu;https://www.nyu.edu", "aff_unique_abbr": "UPenn;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "ARtBIBAmNR", "title": "Visually Guided Generative Text-Layout Pre-training for Document Intelligence", "track": "main", "status": "Reject", "tldr": "", "abstract": "Prior study shows that pre-training techniques can boost the performance of visual document processing, which typically requires models to gain abilities to perceive and reason both document texts and layouts (e.g., text and table cell locations). To this end, we propose visually guided generative text-layout pre-training, named ViTLP. Given an input document image, the model optimizes hierarchical language and layout modeling objectives to generate a mixed target sequence of texts and layouts. ViTLP can function as a native OCR model to locate and recognize texts of document images. In addition, to address the limitation of processing long documents by Transformers, we introduce a straightforward yet effective multi-segment generative pre-training scheme, facilitating ViTLP to process word-intensive documents of any length. Experiments show that ViTLP achieves promising performance compared to existing pre-trained baselines on various visual document understanding (VDU) tasks, including information extraction, document classification, and visual document question answering.", "keywords": "Multimodal Pre-training;Visual Document Understanding", "primary_area": "", "supplementary_material": "", "author": "Zhiming Mao;Haoli Bai;Lu Hou;Jiansheng Wei;Xin Jiang;Kam-Fai Wong;Qun Liu", "authorids": "~Zhiming_Mao1;~Haoli_Bai2;~Lu_Hou2;~Jiansheng_Wei1;~Xin_Jiang1;~Kam-Fai_Wong2;~Qun_Liu1", "gender": "M;M;M;M;M;M;F", "homepage": "https://aclanthology.org/people/z/zhiming-mao/;https://haolibai.github.io;http://www.huawei.com;;http://www.se.cuhk.edu.hk/~kfwong;http://liuquncn.github.io/;https://houlu369.github.io/", "dblp": "258/8430;195/9712;;42/4142-2;w/KamFaiWong;75/4402-1;", "google_scholar": "HVn5AZYAAAAJ;;;DUfcez0AAAAJ;;2HhiGzcAAAAJ;https://scholar.google.com.hk/citations?user=rnjoL5cAAAAJ", "or_profile": "~Zhiming_Mao1;~Haoli_Bai2;~Jiansheng_Wei1;~Xin_Jiang1;~Kam-Fai_Wong2;~Qun_Liu1;~LU_HOU1", "aff": "The Chinese University of Hong Kong;Huawei Technologies Ltd.;Huawei Technologies Co. Ltd.;Noah\u2019s Ark Lab, Huawei Technologies;The Chinese University of Hong Kong;Huawei Noah's Ark Lab;Huawei Technologies Ltd.", "aff_domain": "cuhk.edu.hk;huawei.com;huawei.com;huawei.com;cuhk.edu.hk;huawei.com;huawei.com", "position": "PhD student;Researcher;Researcher;Principal Researcher;Full Professor;Chief Scientist of Speech and Language Computing;researcher", "bibtex": "@misc{\nmao2023visually,\ntitle={Visually Guided Generative Text-Layout Pre-training for Document Intelligence},\nauthor={Zhiming Mao and Haoli Bai and Lu Hou and Jiansheng Wei and Xin Jiang and Kam-Fai Wong and Qun Liu},\nyear={2023},\nurl={https://openreview.net/forum?id=ARtBIBAmNR}\n}", "github": "", "project": "", "reviewers": "5J31;BkUh;a82W;Ks4h", "site": "https://openreview.net/forum?id=ARtBIBAmNR", "pdf_size": 0, "rating": "1;1;1;1", "confidence": "4;4;3;4", "excitement": "3;3;3;3", "reproducibility": "3;4;3;3", "correctness": "2;2;2;2", "rating_avg": 1.0, "confidence_avg": 3.75, "excitement_avg": 3.0, "reproducibility_avg": 3.25, "correctness_avg": 2.0, "replies_avg": 14, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-9117-8247;0000-0002-9427-5659;0000-0002-7000-1792;", "linkedin": ";;;xin-jiang-9577b76/;;qunliu/;", "aff_unique_index": "0;1;1;1;0;1;1", "aff_unique_norm": "Chinese University of Hong Kong;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.huawei.com", "aff_unique_abbr": "CUHK;Huawei", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "AU2Oq0z4xA", "title": "IMU2CLIP: Language-grounded Motion Sensor Translation with Multimodal Contrastive Learning", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "We present IMU2CLIP, a novel pre-training approach to align Inertial Measurement Unit (IMU) motion sensor recordings with text and video, by projecting them into the joint representation space of Contrastive Language-Image Pre-training (CLIP). The proposed approach allows IMU2CLIP to translate human motions (as measured by IMU sensors) into their corresponding textual descriptions and videos -- while preserving the transitivity across these modalities. We introduce several new IMU-based Wearable AI applications such as motion-based media search, or an LM-based multimodal reasoning with motion sensor data -- all using text as the grounding platform. In addition, we show that IMU2CLIP significantly improves downstream performances when fine-tuned for each application, demonstrating its universal usage as a new pre-trained resource. Our code and models will be released publicly.", "keywords": "Contrastive Learning;NLP Applications in Sensor Signals", "primary_area": "", "supplementary_material": "", "author": "Seungwhan Moon;Andrea Madotto;Zhaojiang Lin;Aparajita Saraf;Amy L. Bearman;Babak Damavandi", "authorids": "~Seungwhan_Moon1;~Andrea_Madotto1;~Zhaojiang_Lin1;~Aparajita_Saraf1;~Amy_L._Bearman1;~Babak_Damavandi1", "gender": "M;M;M;F;F;M", "homepage": "https://shanemoon.com;http://andreamad8.github.io/;https://zlinao.github.io;;;", "dblp": "120/4131;174/2905;228/9217;266/9867;164/5978;", "google_scholar": "HJTLcDsAAAAJ;https://scholar.google.it/citations?user=JBnyLicAAAAJ;https://scholar.google.co.uk/citations?user=cPtgl3wAAAAJ;3JMfe8kAAAAJ;R_hUpqUAAAAJ;O6y2l1sAAAAJ", "or_profile": "~Seungwhan_Moon1;~Andrea_Madotto1;~Zhaojiang_Lin1;~Aparajita_Saraf1;~Amy_L._Bearman1;~Babak_Damavandi1", "aff": "Meta;FAIR;Meta;Meta Facebook;Research, Facebook;Meta Facebook", "aff_domain": "meta.com;meta.com;meta.com;meta.com;research.facebook.com;meta.com", "position": "Researcher;Researcher;Researcher;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nmoon2023imuclip,\ntitle={{IMU}2{CLIP}: Language-grounded Motion Sensor Translation with Multimodal Contrastive Learning},\nauthor={Seungwhan Moon and Andrea Madotto and Zhaojiang Lin and Aparajita Saraf and Amy L. Bearman and Babak Damavandi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=AU2Oq0z4xA}\n}", "github": "", "project": "", "reviewers": "b6Gx;JXWg;X66w", "site": "https://openreview.net/forum?id=AU2Oq0z4xA", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "3;4;3", "reproducibility": "4;4;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0003-2507-1884;;;0009-0009-9141-3217;;", "linkedin": "shmoon;;;aparajita-saraf/;amy-bearman/;", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Meta", "aff_unique_dep": "Meta Platforms, Inc.", "aff_unique_url": "https://meta.com", "aff_unique_abbr": "Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "AXY8GJzm2K", "title": "Learn From One Specialized Sub-Teacher: One-to-One Mapping for Feature-Based Knowledge Distillation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Knowledge distillation is known as an effective technique for compressing over-parameterized language models. In this work, we propose to break down the global feature distillation task into N local sub-tasks. In this new framework, we consider each neuron in the last hidden layer of the teacher network as a specialized sub-teacher. We also consider each neuron in the last hidden layer of the student network as a focused sub-student. We make each focused sub-student learn from one corresponding specialized sub-teacher and ignore the others. This will facilitate the task for the sub-student and keep it focused. Our proposed method is novel and can be combined with other distillation techniques. Empirical results show that our proposed approach outperforms the state-of-the-art methods by maintaining higher performance on most benchmark datasets. Furthermore, we propose a randomized variant of our approach, called Masked One-to-One Mapping.\nRather than learning all the N sub-tasks simultaneously, we focus on learning a subset of these sub-tasks at each optimization step. This variant enables the student to digest the received flow of knowledge more effectively and yields superior results.", "keywords": "Knowledge Distillation;Compression;NLP;Large Language Models;Feature Distillation", "primary_area": "", "supplementary_material": "", "author": "Khouloud Saadi;Jelena Mitrovi\u0107;Michael Granitzer", "authorids": "~Khouloud_Saadi1;~Jelena_Mitrovi\u01071;~Michael_Granitzer1", "gender": ";F;M", "homepage": ";https://ca-roll.github.io/;https://mgrani.github.io/", "dblp": ";133/1347.html;32/1270", "google_scholar": ";5j_sFToAAAAJ;https://scholar.google.de/citations?user=nZKtn6AAAAAJ", "or_profile": "~Khouloud_Saadi1;~Jelena_Mitrovi\u01071;~Michael_Granitzer1", "aff": "Universit\u00e4t Passau;University of Passau;Universit\u00e4t Passau", "aff_domain": "uni-passau.de;uni-passau.de;uni-passau.de", "position": "Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nsaadi2023learn,\ntitle={Learn From One Specialized Sub-Teacher: One-to-One Mapping for Feature-Based Knowledge Distillation},\nauthor={Khouloud Saadi and Jelena Mitrovi{\\'c} and Michael Granitzer},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=AXY8GJzm2K}\n}", "github": "", "project": "", "reviewers": "BBGK;tgjk;pptK", "site": "https://openreview.net/forum?id=AXY8GJzm2K", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "2;3;2", "reproducibility": "4;4;2", "correctness": "3;3;2", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-3220-8749;0000-0003-3566-5507", "linkedin": "khouloud-saadi-61b64b182;jelena-mitrovi%C4%87-78354711/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Passau", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-passau.de", "aff_unique_abbr": "UP", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "AYOfbWMRSd", "title": "To Build Our Future, We Must Know Our Past: Contextualizing Paradigm Shifts in Natural Language Processing", "track": "main", "status": "Long Main", "tldr": "", "abstract": "NLP is in a period of disruptive change that is impacting our methodologies, funding sources, and public perception.\nIn this work, we seek to understand how to shape our future by better understanding our past. \nWe study factors that shape NLP as a field, including culture, incentives, and infrastructure by conducting long-form interviews with 26 NLP researchers of varying seniority, research area, institution, and social identity.\nOur interviewees identify cyclical patterns in the field, as well as new shifts without historical parallel, including changes in benchmark culture and software infrastructure.\nWe complement this discussion with quantitative analysis of citation, authorship, and language use in the ACL Anthology over time.\nWe conclude by discussing shared visions, concerns, and hopes for the future of NLP.\nWe hope that this study of our field's past and present can prompt informed discussion of our community's implicit norms and more deliberate action to consciously shape the future.", "keywords": "paradigm shift;future of nlp research;incentives;benchmarking;software;science of science", "primary_area": "", "supplementary_material": "", "author": "Sireesh Gururaja;Amanda Bertsch;Clara Na;David Gray Widder;Emma Strubell", "authorids": "~Sireesh_Gururaja1;~Amanda_Bertsch1;~Clara_Na1;~David_Gray_Widder1;~Emma_Strubell1", "gender": "M;F;;M;Non-Binary", "homepage": "https://www.siree.sh;https://www.cs.cmu.edu/~abertsch/;;https://davidwidder.me;http://strubell.github.io", "dblp": "321/0493;305/7615;;;153/2253", "google_scholar": "tOTGjJMAAAAJ;G1Jw4CYAAAAJ;;OG_qAA4AAAAJ;UCDMtM0AAAAJ", "or_profile": "~Sireesh_Gururaja1;~Amanda_Bertsch1;~Clara_Na1;~David_Gray_Widder1;~Emma_Strubell1", "aff": "School of Computer Science, Carnegie Mellon University;Carnegie Mellon University;;Carnegie Mellon University;Allen Institute for Artificial Intelligence", "aff_domain": "cs.cmu.edu;cmu.edu;;cmu.edu;allenai.org", "position": "MS student;PhD student;;PhD student;Visiting Researcher", "bibtex": "@inproceedings{\ngururaja2023to,\ntitle={To Build Our Future, We Must Know Our Past: Contextualizing Paradigm Shifts in Natural Language Processing},\nauthor={Sireesh Gururaja and Amanda Bertsch and Clara Na and David Gray Widder and Emma Strubell},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=AYOfbWMRSd}\n}", "github": "", "project": "", "reviewers": "tGxC;nTbt;bhU3", "site": "https://openreview.net/forum?id=AYOfbWMRSd", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;2;4", "reproducibility": "0;0;4", "correctness": "4;1;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 1.3333333333333333, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-1368-1111;;0000-0002-6912-8067;", "linkedin": "sireesh-gururaja-aa629389/;amandabertsch;;;", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Carnegie Mellon University;Allen Institute for Artificial Intelligence", "aff_unique_dep": "School of Computer Science;", "aff_unique_url": "https://www.cmu.edu;https://allenai.org", "aff_unique_abbr": "CMU;AI2", "aff_campus_unique_index": "0", "aff_campus_unique": "Pittsburgh;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "AZ8sFZtLHD", "title": "Difference-Masking: Choosing What to Mask in Continued Pretraining", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The self-supervised objective of masked prediction has led to promising performance gains on a variety of downstream tasks. However, while most approaches randomly mask tokens, there is strong intuition that deciding what to mask can substantially improve learning outcomes. We investigate this in continued pretraining setting in which pretrained models continue to pretrain on domain-specific data before performing some downstream task. We introduce Difference-Masking, a masking strategy that automatically chooses what to mask during continued pretraining by considering what makes a task domain different from the pretraining domain. Empirically, we find that Difference-Masking outperforms baselines on continued pretraining settings across four diverse language-only and multimodal video tasks.", "keywords": "Machine Learning;Self-Supervised Learning;Multimodal;NLP", "primary_area": "", "supplementary_material": "", "author": "Alex Wilf;Syeda Nahida Akter;Leena Mathur;Paul Pu Liang;Sheryl Mathew;Mengrou Shou;Eric Nyberg;Louis-Philippe Morency", "authorids": "~Alex_Wilf1;~Syeda_Nahida_Akter1;~Leena_Mathur1;~Paul_Pu_Liang1;~Sheryl_Mathew1;~Mengrou_Shou1;~Eric_Nyberg1;~Louis-Philippe_Morency1", "gender": "M;F;;M;F;;;M", "homepage": "https://abwilf.github.io/;https://snat1505027.github.io;https://l-mathur.github.io;https://pliang279.github.io/;;;https://www.cs.cmu.edu/~ehn;https://www.cs.cmu.edu/~morency/", "dblp": "277/0822;272/9922;263/4173;207/9749;;;05/595;31/739", "google_scholar": "r1Zw-VEAAAAJ;tZFFHYcAAAAJ;loh93ZkAAAAJ;https://scholar.google.com/citations?hl=en;;;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=APgaFK0AAAAJ", "or_profile": "~Alex_Wilf1;~Syeda_Nahida_Akter1;~Leena_Mathur1;~Paul_Pu_Liang1;~Sheryl_Mathew1;~Mengrou_Shou1;~Eric_Nyberg1;~Louis-Philippe_Morency1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;School of Computer Science, Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;cmu.edu;cs.cmu.edu;cs.cmu.edu;cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;MS student;PhD student;PhD student;Undergrad student;Undergrad student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nwilf2023differencemasking,\ntitle={Difference-Masking: Choosing What to Mask in Continued Pretraining},\nauthor={Alex Wilf and Syeda Nahida Akter and Leena Mathur and Paul Pu Liang and Sheryl Mathew and Mengrou Shou and Eric Nyberg and Louis-Philippe Morency},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=AZ8sFZtLHD}\n}", "github": "", "project": "", "reviewers": "o2x7;Yfzq;LPF7", "site": "https://openreview.net/forum?id=AZ8sFZtLHD", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;1;4", "excitement": "1;3;3", "reproducibility": "2;4;4", "correctness": "2;3;3", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 2.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-5587-5125;;;;;;;0000-0001-6376-7696", "linkedin": "abwilf/;syeda-nahida-akter-989770114/;leena-mathur/;;sheryl-m-a26809188/;mshou/;eric-nyberg-08620/;morency?challengeId=AQELGK_OvMa0vwAAAY72L-VV4X9hW8juuY80VHVeeSGHZ1PJHeeEa5LTFoeTmDGU0t1OL07MXJTYC9EAi6qgPDd2z9ztnbdFYA&submissionId=09a0ff34-04ac-c717-bef7-8c9c8811b463&challengeSource=AgFhxWkU3q7v4wAAAY72L-1xRE0eG-BnZUNE9e3eAG95pgOCZ9u1nxEg-1dK2Dw&challegeType=AgHMzV0lqKgEFwAAAY72L-11X6DHMd3V_A3Iur8XZeyYF2-oBzoufs8&memberId=AgH4yz7pZ_riCgAAAY72L-146jmR2pdr3dmhy2icxBtEQzQ&recognizeDevice=AgFDCNyrhKiFSAAAAY72L-16m7z2EH2t0ueWmMKjyk1_ZJAkfFVe", "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "AZfRWT1dOa", "title": "Not All Demonstration Examples are Equally Beneficial: Reweighting Demonstration Examples for In-Context Learning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large Language Models (LLMs) have recently gained the In-Context Learning (ICL) ability with the models scaling up, allowing them to quickly adapt to downstream tasks with only a few demonstration examples prepended in the input sequence. \nNonetheless, the current practice of ICL treats all demonstration examples equally, which still warrants improvement, as the quality of examples is usually uneven. \nIn this paper, we investigate how to determine approximately optimal weights for demonstration examples and how to apply them during ICL. \nTo assess the quality of weights in the absence of additional validation data, we design a masked self-prediction (MSP) score that exhibits a strong correlation with the final ICL performance. \nTo expedite the weight-searching process, we discretize the continuous weight space and adopt beam search. \nWith approximately optimal weights obtained, we further propose two strategies to apply them to demonstrations at different model positions.\nExperimental results on 8 text classification tasks show that our approach outperforms conventional ICL by a large margin.\nOur code are publicly available at https:github.com/Zhe-Young/WICL.", "keywords": "In-Context Learning;Training Example Reweighting", "primary_area": "", "supplementary_material": "", "author": "Zhe Yang;Damai Dai;Peiyi Wang;Zhifang Sui", "authorids": "~Zhe_Yang4;~Damai_Dai1;~Peiyi_Wang1;~Zhifang_Sui1", "gender": "M;M;M;F", "homepage": "https://github.com/Zhe-Young;;;http://eecs.pku.edu.cn/EN/People/Faculty/Detail/?ID=6024", "dblp": "181/2876-13.html;199/2097;236/6569.html;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;8b-ysf0NWVoC;K0uQ3ygAAAAJ;", "or_profile": "~Zhe_Yang4;~Damai_Dai1;~Peiyi_Wang1;~Zhifang_Sui1", "aff": "Peking University;Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nyang2023not,\ntitle={Not All Demonstration Examples are Equally Beneficial: Reweighting Demonstration Examples for In-Context Learning},\nauthor={Zhe Yang and Damai Dai and Peiyi Wang and Zhifang Sui},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=AZfRWT1dOa}\n}", "github": "", "project": "", "reviewers": "zns4;Mxfy;FRfS", "site": "https://openreview.net/forum?id=AZfRWT1dOa", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "4;2;4", "reproducibility": "4;3;3", "correctness": "4;2;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "AajIIYMm0d", "title": "Subspace Chronicles: How Linguistic Information Emerges, Shifts and Interacts during Language Model Training", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Representational spaces learned via language modeling are fundamental to Natural Language Processing (NLP), however there has been limited understanding regarding how and when during training various types of linguistic information emerge and interact. Leveraging a novel information theoretic probing suite, which enables direct comparisons of not just task performance, but their representational subspaces, we analyze nine tasks covering syntax, semantics and reasoning, across 2M pre-training steps and five seeds. We identify critical learning phases across tasks and time, during which subspaces emerge, share information, and later disentangle to specialize. Across these phases, syntactic knowledge is acquired rapidly after 0.5% of full training. Continued performance improvements primarily stem from the acquisition of open-domain knowledge, while semantics and reasoning tasks benefit from later boosts to long-range contextualization and higher specialization. Measuring cross-task similarity further reveals that linguistically related tasks share information throughout training, and do so more during the critical phase of learning than before or after. Our findings have implications for model interpretability, multi-task learning, and learning from limited data.", "keywords": "Language Modelling;Representation Learning;Learning Dynamics;Probing;Subspace Analysis", "primary_area": "", "supplementary_material": "", "author": "Max M\u00fcller-Eberstein;Rob van der Goot;Barbara Plank;Ivan Titov", "authorids": "~Max_M\u00fcller-Eberstein1;~Rob_van_der_Goot1;~Barbara_Plank2;~Ivan_Titov1", "gender": ";M;;", "homepage": "https://mxij.me;https://robvanderg.github.io/;https://bplank.github.io/;http://ivan-titov.org", "dblp": "301/9477;184/8526;46/521;08/5391", "google_scholar": "mI392-4AAAAJ;lU4zpOEAAAAJ;;https://scholar.google.nl/citations?user=FKUc3vsAAAAJ", "or_profile": "~Max_M\u00fcller-Eberstein1;~Rob_van_der_Goot1;~Barbara_Plank2;~Ivan_Titov1", "aff": "Apple;IT University of Copenhagen;IT University of Copenhagen;University of Amsterdam", "aff_domain": "apple.com;itu.dk;itu.dk;uva.nl", "position": "Intern;Assistant Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nm{\\\"u}ller-eberstein2023subspace,\ntitle={Subspace Chronicles: How Linguistic Information Emerges, Shifts and Interacts during Language Model Training},\nauthor={Max M{\\\"u}ller-Eberstein and Rob van der Goot and Barbara Plank and Ivan Titov},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=AajIIYMm0d}\n}", "github": "", "project": "", "reviewers": "29hu;USmm;n5w2", "site": "https://openreview.net/forum?id=AajIIYMm0d", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;3;4", "reproducibility": "4;4;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-0006-0658;;;", "linkedin": ";;;", "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Apple;IT University of Copenhagen;University of Amsterdam", "aff_unique_dep": "Apple Inc.;;", "aff_unique_url": "https://www.apple.com;https://itu.dk;https://www.uva.nl", "aff_unique_abbr": "Apple;ITU;UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;2", "aff_country_unique": "United States;Denmark;Netherlands" }, { "id": "AbXA40kggY", "title": "BLESS: Benchmarking Large Language Models on Sentence Simplification", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We present BLESS, a comprehensive performance benchmark of the most recent state-of-the-art Large Language Models (LLMs) on the task of text simplification (TS). We examine how well off-the-shelf LLMs can solve this challenging task, assessing a total of 44 models, differing in size, architecture, pre-training methods, and accessibility, on three test sets from different domains (Wikipedia, news, and medical) under a few-shot setting. Our analysis considers a suite of automatic metrics, as well as a large-scale quantitative investigation into the types of common edit operations performed by the different models. Furthermore, we perform a manual qualitative analysis on a subset of model outputs to better gauge the quality of the generated simplifications. Our evaluation indicates that the best LLMs, despite not being trained on TS perform comparably with state-of-the-art TS baselines. Additionally, we find that certain LLMs demonstrate a greater range and diversity of edit operations. Our performance benchmark will be available as a resource for the development of future TS methods and evaluation metrics.", "keywords": "text simplification;sentence simplification;large language models;evaluation;in-context learning", "primary_area": "", "supplementary_material": "", "author": "Tannon Kew;Alison Chi;Laura V\u00e1squez-Rodr\u00edguez;Sweta Agrawal;Dennis Aumiller;Fernando Alva-Manchego;Matthew Shardlow", "authorids": "~Tannon_Kew1;~Alison_Chi1;~Laura_V\u00e1squez-Rodr\u00edguez1;~Sweta_Agrawal1;~Dennis_Aumiller1;~Fernando_Alva-Manchego1;~Matthew_Shardlow1", "gender": "M;;F;F;M;M;M", "homepage": "https://www.cl.uzh.ch/de/people/team/compling/kew.html;;https://lmvasque.github.io/;https://sweta20.github.io/;https://dennis-aumiller.de;https://feralvam.github.io/;", "dblp": "267/9847;;;210/7863.html;263/5587;187/3319;136/8688", "google_scholar": "https://scholar.google.com/scholar?hl=en;;uz9OsTwAAAAJ;Avsw9IkAAAAJ;;https://scholar.google.co.uk/citations?user=4SnHu7sAAAAJ;https://scholar.google.co.uk/citations?hl=en", "or_profile": "~Tannon_Kew1;~Alison_Chi1;~Laura_V\u00e1squez-Rodr\u00edguez1;~Sweta_Agrawal1;~Dennis_Aumiller1;~Fernando_Alva-Manchego1;~Matthew_Shardlow1", "aff": "University of Zurich;;University of Manchester;University of Maryland, College Park;Heidelberg University;Cardiff University;The Manchester Metropolitan University", "aff_domain": "uzh.ch;;cs.manchester.ac.uk;umd.edu;informatik.uni-heidelberg.de;cardiff.ac.uk;mmu.ac.uk", "position": "PhD student;;PhD student;PhD student;PhD student;Lecturer;Senior Lecturer", "bibtex": "@inproceedings{\nkew2023bless,\ntitle={{BLESS}: Benchmarking Large Language Models on Sentence Simplification},\nauthor={Tannon Kew and Alison Chi and Laura V{\\'a}squez-Rodr{\\'\\i}guez and Sweta Agrawal and Dennis Aumiller and Fernando Alva-Manchego and Matthew Shardlow},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=AbXA40kggY}\n}", "github": "", "project": "", "reviewers": "McU4;2xuh;WJrH", "site": "https://openreview.net/forum?id=AbXA40kggY", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;4;4", "reproducibility": "3;3;4", "correctness": "3;5;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-7313-905X;;;0000-0001-6218-8377;0000-0003-1129-2750", "linkedin": ";;lmvasque/;;dennis-aumiller;;", "aff_unique_index": "0;1;2;3;4;5", "aff_unique_norm": "University of Zurich;University of Manchester;University of Maryland;Heidelberg University;Cardiff University;Manchester Metropolitan University", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.unizh.ch;https://www.manchester.ac.uk;https://www/umd.edu;https://www.uni-heidelberg.de;https://www.cardiff.ac.uk;https://www.mmu.ac.uk", "aff_unique_abbr": "UZH;UoM;UMD;Uni Heidelberg;Cardiff;MMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;1;2;3;1;1", "aff_country_unique": "Switzerland;United Kingdom;United States;Germany" }, { "id": "AfEowGM3qG", "title": "NameGuess: Column Name Expansion for Tabular Data", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recent advances in large language models have revolutionized many sectors, including the database industry. One common challenge when dealing with large volumes of tabular data is the pervasive use of abbreviated column names, which can negatively impact performance on various data search, access, and understanding tasks. To address this issue, we introduce a new task, called NameGuess, to expand column names (used in database schema) as a natural language generation problem. We create a training dataset of 384K abbreviated-expanded column pairs using a new data fabrication method and a human-annotated evaluation benchmark that includes 9.2K examples from real-world tables. To tackle the complexities associated with polysemy and ambiguity in NameGuess, we enhance auto-regressive language models by conditioning on table content and column header names -- yielding a fine-tuned model (with 2.7B parameters) that matches human performance. Furthermore, we conduct a comprehensive analysis (on multiple LLMs) to validate the effectiveness of table content in NameGuess and identify promising future opportunities. Code has been made available at https://github.com/amazon-science/nameguess.", "keywords": "Column Name Expansion;Natural Language Generation;Datasets", "primary_area": "", "supplementary_material": "", "author": "Jiani Zhang;Zhengyuan Shen;Balasubramaniam Srinivasan;Shen Wang;Huzefa Rangwala;George Karypis", "authorids": "~Jiani_Zhang2;~Zhengyuan_Shen1;~Balasubramaniam_Srinivasan1;~Shen_Wang2;~Huzefa_Rangwala2;~George_Karypis1", "gender": "F;M;;M;M;M", "homepage": "https://jennyzhang0215.github.io/;;;;http://www.cs.gmu.edu/~rangwala;", "dblp": "186/6870;;230/3792;;30/444;", "google_scholar": "CBmDAOEAAAAJ;mX2LPRwAAAAJ;uM4EhgEAAAAJ;G7twX6YAAAAJ;yWJ9BqEAAAAJ;ElqwScwAAAAJ", "or_profile": "~Jiani_Zhang2;~Zhengyuan_Shen1;~Balasubramaniam_Srinivasan1;~Shen_Wang2;~Huzefa_Rangwala2;~George_Karypis1", "aff": "AWS;Amazon;Amazon;Amazon;Computer Science, George Mason University;University of Minnesota, Minneapolis", "aff_domain": "amazon.com;amazon.com;amazon.com;amazon.com;cs.gmu.edu;umn.edu", "position": "Researcher;Researcher;Senior Applied Scientist;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2023nameguess,\ntitle={NameGuess: Column Name Expansion for Tabular Data},\nauthor={Jiani Zhang and Zhengyuan Shen and Balasubramaniam Srinivasan and Shen Wang and Huzefa Rangwala and George Karypis},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=AfEowGM3qG}\n}", "github": "", "project": "", "reviewers": "hjkH;4P1D;MEbZ", "site": "https://openreview.net/forum?id=AfEowGM3qG", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;3;4", "reproducibility": "4;4;5", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-0074-6761;;;;;", "linkedin": ";donshen16/;;shen-wang-97309138/;;", "aff_unique_index": "0;0;0;0;1;2", "aff_unique_norm": "Amazon;George Mason University;University of Minnesota", "aff_unique_dep": "Amazon Web Services;Computer Science;", "aff_unique_url": "https://aws.amazon.com;https://www.gmu.edu;https://www.minnesota.edu", "aff_unique_abbr": "AWS;GMU;UMN", "aff_campus_unique_index": "1", "aff_campus_unique": ";Minneapolis", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "AfnJBOXfAU", "title": "COFFEE: Counterfactual Fairness for Personalized Text Generation in Explainable Recommendation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "As language models become increasingly integrated into our digital lives, Personalized Text Generation (PTG) has emerged as a pivotal component with a wide range of applications. However, the bias inherent in user written text, often used for PTG model training, can inadvertently associate different levels of linguistic quality with users' protected attributes. The model can inherit the bias and perpetuate inequality in generating text w.r.t. users' protected attributes, leading to unfair treatment when serving users. In this work, we investigate fairness of PTG in the context of personalized explanation generation for recommendations. We first discuss the biases in generated explanations and their fairness implications. To promote fairness, we introduce a general framework to achieve measure-specific counterfactual fairness in explanation generation. Extensive experiments and human evaluations demonstrate the effectiveness of our method.", "keywords": "personalized text generation;fairness;bias;explanation for recommendation;human evaluation;counterfactual fairness", "primary_area": "", "supplementary_material": "", "author": "Nan Wang;Qifan Wang;Yi-Chia Wang;Maziar Sanjabi;Jingzhou Liu;Hamed Firooz;Hongning Wang;Shaoliang Nie", "authorids": "~Nan_Wang6;~Qifan_Wang2;~Yi-Chia_Wang2;~Maziar_Sanjabi1;~Jingzhou_Liu1;~Hamed_Firooz1;~Hongning_Wang1;~Shaoliang_Nie1", "gender": "M;M;;M;M;M;M;M", "homepage": "http://www.cs.virginia.edu/~nw6a/;https://wqfcr.github.io/;;https://sites.google.com/view/maziar;;;http://www.cs.virginia.edu/~hw5x/;https://snie2012.github.io", "dblp": "84/864;33/8610;71/2302;21/8577;61/11308;;05/6545;213/7860", "google_scholar": "https://scholar.google.com/citations?hl=en;LrSyLosAAAAJ;9gMgFPQAAAAJ;bc_N2-oAAAAJ;;4pKOL5gAAAAJ;qkdvKNoAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Nan_Wang6;~Qifan_Wang2;~Yi-Chia_Wang2;~Maziar_Sanjabi1;~Jingzhou_Liu1;~Hamed_Firooz1;~Hongning_Wang1;~Shaoliang_Nie1", "aff": "University of Virginia;Meta AI;Meta;Meta;Meta;Meta Facebook;University of Virginia;Meta Inc", "aff_domain": "virginia.edu;fb.com;meta.com;meta.com;meta.com;facebook.com;virginia.edu;meta.com", "position": "PhD student;Principal Researcher;Research Scientist;Researcher;Researcher;Researcher;Associate Professor;Researcher", "bibtex": "@inproceedings{\nwang2023coffee,\ntitle={{COFFEE}: Counterfactual Fairness for Personalized Text Generation in Explainable Recommendation},\nauthor={Nan Wang and Qifan Wang and Yi-Chia Wang and Maziar Sanjabi and Jingzhou Liu and Hamed Firooz and Hongning Wang and Shaoliang Nie},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=AfnJBOXfAU}\n}", "github": "", "project": "", "reviewers": "iBGj;yP6b;VQ9B", "site": "https://openreview.net/forum?id=AfnJBOXfAU", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "4;3;4", "reproducibility": "4;3;3", "correctness": "3;2;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-7570-5756;;;;;0000-0002-6524-9195;", "linkedin": "https://www.linkedin.com/public-profile/in/nan-nolen-wang-493341163?challengeId=AQEquDuYuK0KdAAAAXd-p60BoYifuxHUM8sbuGC1zveND5ifUDR5jduLsQ3NFivCjMxOS21SsmFG6K4n20UdyeCKLgXz2EFH-w&submissionId=b5d1bff9-5998-6116-18d7-1a300fe1552b;;;;;;;shaoliang-nie/", "aff_unique_index": "0;1;1;1;1;1;0;1", "aff_unique_norm": "University of Virginia;Meta", "aff_unique_dep": ";Meta AI", "aff_unique_url": "https://www.virginia.edu;https://meta.com", "aff_unique_abbr": "UVA;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "AgsLcJ9KaX", "title": "How do languages influence each other? Studying cross-lingual data sharing during LM fine-tuning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Multilingual language models (MLMs) are jointly trained on data from many different languages such that representation of individual languages can benefit from other languages' data. Impressive performance in zero-shot cross-lingual transfer shows that these models are able to exploit this property. Yet, it remains unclear to what extent, and under which conditions, languages rely on each other's data. To answer this question, we use TracIn (Pruthi et al., 2020), a training data attribution (TDA) method, to retrieve training samples from multilingual data that are most influential for test predictions in a given language. This allows us to analyse cross-lingual sharing mechanisms of MLMs from a new perspective. While previous work studied cross-lingual sharing at the model parameter level, we present the first approach to study it at the data level. We find that MLMs rely on data from multiple languages during fine-tuning and this reliance increases as fine-tuning progresses. We further find that training samples from other languages can both reinforce and complement the knowledge acquired from data of the test language itself.", "keywords": "cross-lingual influence;data sharing;training data attribution", "primary_area": "", "supplementary_material": "", "author": "Rochelle Choenni;Dan Garrette;Ekaterina Shutova", "authorids": "~Rochelle_Choenni1;~Dan_Garrette1;~Ekaterina_Shutova1", "gender": "F;M;F", "homepage": "https://rochellechoenni.github.io/;http://www.dhgarrette.com/;https://www.shutova.org/", "dblp": "238/0597;117/4050;33/8156", "google_scholar": "https://scholar.google.nl/citations?user=-_WbyoMAAAAJ;tT9mhNMAAAAJ;jqOFBGoAAAAJ", "or_profile": "~Rochelle_Choenni1;~Dan_Garrette1;~Ekaterina_Shutova1", "aff": "University of Amsterdam;Google DeepMind;University of Amsterdam", "aff_domain": "uva.nl;google.com;uva.nl", "position": "PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nchoenni2023how,\ntitle={How do languages influence each other? Studying cross-lingual data sharing during {LM} fine-tuning},\nauthor={Rochelle Choenni and Dan Garrette and Ekaterina Shutova},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=AgsLcJ9KaX}\n}", "github": "", "project": "", "reviewers": "qo5L;3fXq;JCHa;FsHG", "site": "https://openreview.net/forum?id=AgsLcJ9KaX", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "3;3;3;3", "excitement": "3;4;4;3", "reproducibility": "4;3;3;5", "correctness": "4;4;4;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 3.5, "reproducibility_avg": 3.75, "correctness_avg": 4.0, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Amsterdam;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.uva.nl;https://deepmind.com", "aff_unique_abbr": "UvA;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Netherlands;United Kingdom" }, { "id": "Ai0oBKlJP2", "title": "ChatGPT Beyond English: Towards a Comprehensive Evaluation of Large Language Models in Multilingual Learning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Over the last few years, large language models (LLMs) have emerged as the most important breakthroughs in natural language processing (NLP) that fundamentally transform research and developments in the field. ChatGPT represents one of the most exciting LLM systems developed recently to showcase impressive skills for language generation and highly attract public attention. Among various exciting applications discovered for ChatGPT in English, the model can process and generate texts for multiple languages due to its multilingual training data. Given the broad adoption of ChatGPT for English in different problems and areas, a natural question is whether ChatGPT can also be applied effectively for other languages or it is necessary to develop more language-specific technologies. The answer to this question requires a thorough evaluation of ChatGPT over multiple tasks with diverse languages and large datasets (i.e., beyond reported anecdotes), which is still missing or limited in current research. Our work aims to fill this gap for the evaluation of ChatGPT and similar LLMs to provide more comprehensive information for multilingual NLP applications. In particular, we evaluate ChatGPT on 7 different tasks, covering 37 diverse languages with high, medium, low, and extremely low resources. Compared to the performance of previous models, our extensive experiments demonstrate the worse performance of ChatGPT for different NLP tasks and languages, calling for further research to develop better models and understanding for multilingual learning.", "keywords": "ChatGPT;Multilingual Evaluation;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "Viet Dac Lai;Nghia Trung Ngo;Amir Pouran Ben Veyseh;Hieu Man;Franck Dernoncourt;Trung Bui;Thien Huu Nguyen", "authorids": "~Viet_Dac_Lai1;~Nghia_Trung_Ngo1;~Amir_Pouran_Ben_Veyseh2;~Hieu_Man1;~Franck_Dernoncourt1;~Trung_Bui1;~Thien_Huu_Nguyen1", "gender": "M;M;M;M;;M;M", "homepage": "http://laiviet.github.io;;;;http://francky.me;https://sites.google.com/site/trungbuistanford/;http://ix.cs.uoregon.edu/~thien", "dblp": "251/8546;264/6391;;324/1286;132/4043;180/0632;17/9407", "google_scholar": "TtxmNccAAAAJ;;CrK5xTwAAAAJ;no4ETNIAAAAJ;kz2aIc8AAAAJ;FpFTduYAAAAJ;Da2FhegAAAAJ", "or_profile": "~Viet_Dac_Lai1;~Nghia_Trung_Ngo1;~Amir_Pouran_Ben_Veyseh2;~Hieu_Man1;~Franck_Dernoncourt1;~Trung_Bui1;~Thien_Huu_Nguyen1", "aff": "Dept. Computer and Information Science, University of Oregon;University of Oregon;University of Oregon;VinAI Research;Adobe Systems;Adobe Research;University of Oregon", "aff_domain": "cs.uoregon.edu;uoregon.edu;uoregon.edu;vingroup.net;adobe.com;adobe.com;cs.uoregon.edu", "position": "PhD student;PhD student;PhD student;Researcher;Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nlai2023chatgpt,\ntitle={Chat{GPT} Beyond English: Towards a Comprehensive Evaluation of Large Language Models in Multilingual Learning},\nauthor={Viet Dac Lai and Nghia Trung Ngo and Amir Pouran Ben Veyseh and Hieu Man and Franck Dernoncourt and Trung Bui and Thien Huu Nguyen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Ai0oBKlJP2}\n}", "github": "", "project": "", "reviewers": "6NDZ;3x9S;PmRm", "site": "https://openreview.net/forum?id=Ai0oBKlJP2", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "3;4;3", "reproducibility": "3;4;3", "correctness": "3;5;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-1119-1346;0000-0002-0871-349X;", "linkedin": "laidacviet/;;;;franckdernoncourt;trung-bui-4333322/;thien-huu-nguyen-7a193030/", "aff_unique_index": "0;0;0;1;2;2;0", "aff_unique_norm": "University of Oregon;VinAI Research;Adobe", "aff_unique_dep": "Dept. of Computer and Information Science;;Adobe Systems Incorporated", "aff_unique_url": "https://www.uoregon.edu;https://www.vinai.io/;https://www.adobe.com", "aff_unique_abbr": "UO;VinAI;Adobe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "United States;Vietnam" }, { "id": "AjGXZIgvIb", "title": "Towards Concept-Aware Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Concepts play a pivotal role in various human cognitive functions, including learning, reasoning and communication. However, there is very little work on endowing machines with the ability to form and reason with concepts. In particular, state-of-the-art large language models (LLMs) work at the level of tokens, not concepts.\n\nIn this work, we analyze how well contemporary LLMs capture human concepts and their structure. We then discuss ways to develop concept-aware LLMs, taking place at different stages of the pipeline.\nWe sketch a method for pretraining LLMs using concepts, and also explore the simpler approach that uses the output of existing LLMs. Despite its simplicity, our proof-of-concept is shown to better match human intuition, as well as improve the robustness of predictions. These preliminary results underscore the promise of concept-aware LLMs.", "keywords": "Concepts;Pretrained Large Language Models", "primary_area": "", "supplementary_material": "", "author": "Chen Shani;Jilles Vreeken;Dafna Shahaf", "authorids": "~Chen_Shani1;~Jilles_Vreeken2;~Dafna_Shahaf1", "gender": "F;M;F", "homepage": ";https://vreeken.eu;http://hyadatalab.com/", "dblp": "264/5040;94/6462;02/2672.html", "google_scholar": "jQzR-IwAAAAJ;p5HEQfIAAAAJ;https://scholar.google.com.tw/citations?user=AgyW_90AAAAJ", "or_profile": "~Chen_Shani1;~Jilles_Vreeken2;~Dafna_Shahaf1", "aff": "Hebrew University of Jerusalem;CISPA Helmholtz Center for Information Security;Hebrew University of Jerusalem", "aff_domain": "huji.ac.il;cispa.de;huji.ac.il", "position": "PhD student;Tenured Faculty;Full Professor", "bibtex": "@inproceedings{\nshani2023towards,\ntitle={Towards Concept-Aware Large Language Models},\nauthor={Chen Shani and Jilles Vreeken and Dafna Shahaf},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=AjGXZIgvIb}\n}", "github": "", "project": "", "reviewers": "YmXf;YpEe;HKKb", "site": "https://openreview.net/forum?id=AjGXZIgvIb", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "4;3;3", "reproducibility": "4;3;4", "correctness": "4;2;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-2310-2806;0000-0003-3261-0818", "linkedin": "chen-shani-ph-d-638816184/;jilles-vreeken-b3b05b58/;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Hebrew University of Jerusalem;CISPA Helmholtz Center for Information Security", "aff_unique_dep": ";", "aff_unique_url": "https://www.huji.ac.il;https://www.cispa.de/", "aff_unique_abbr": "HUJI;CISPA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Jerusalem;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Israel;Germany" }, { "id": "Akk5ep2gQx", "title": "Semantic Space Grounded Weighted Decoding for Multi-Attribute Controllable Dialogue Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Controlling chatbot utterance generation with multiple attributes such as \npersonalities, emotions and dialogue acts is a practically useful but \nunder-studied problem.\nWe propose a novel framework called DASC \nthat possesses strong controllability with a weighted decoding paradigm, \nwhile improving generation quality with the grounding in an \nattribute semantics space. Generation with multiple attributes is then \nintuitively implemented with an interpolation of multiple attribute embeddings,\nwhich results in substantial reduction in the model sizes. \nExperiments show that DASC can achieve high control accuracy \nin generation task with the simultaneous control of 3 aspects while also producing interesting and \nreasonably sensible responses, even in an out-of-distribution robustness \ntest.", "keywords": "dialogue response generation;chatbot;controllable generation;multi-attributes", "primary_area": "", "supplementary_material": "", "author": "Zhiling Zhang;Mengyue Wu;Kenny Q. Zhu", "authorids": "~Zhiling_Zhang1;~Mengyue_Wu1;~Kenny_Q._Zhu1", "gender": ";F;M", "homepage": ";https://speechlab.sjtu.edu.cn/members/mengyue-wu;http://www.cs.sjtu.edu.cn/~kzhu/", "dblp": ";82/2416;z/KennyQiliZhu", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=ZIRJ6lIAAAAJ", "or_profile": "~Zhiling_Zhang1;~Mengyue_Wu1;~Kenny_Q._Zhu1", "aff": ";Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": ";sjtu.edu.cn;cs.sjtu.edu.cn", "position": ";Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2023semantic,\ntitle={Semantic Space Grounded Weighted Decoding for Multi-Attribute Controllable Dialogue Generation},\nauthor={Zhiling Zhang and Mengyue Wu and Kenny Q. Zhu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Akk5ep2gQx}\n}", "github": "", "project": "", "reviewers": "yeTY;fjTP;kcyB", "site": "https://openreview.net/forum?id=Akk5ep2gQx", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;3", "excitement": "4;3;3", "reproducibility": "4;4;5", "correctness": "4;4;4", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "AlEeMxkgsi", "title": "A Scalable Framework for Table of Contents Extraction from Complex ESG Annual Reports", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Table of contents (ToC) extraction centres on structuring documents in a hierarchical manner. In this paper, we propose a new dataset, ESGDoc, comprising 1,093 ESG annual reports from 563 companies spanning from 2001 to 2022. These reports pose significant challenges due to their diverse structures and extensive length. To address these challenges, we propose a new framework for Toc extraction, consisting of three steps: (1) Constructing an initial tree of text blocks based on reading order and font sizes; (2) Modelling each tree node (or text block) independently by considering its contextual information captured in node-centric subtree; (3) Modifying the original tree by taking appropriate action on each tree node (Keep, Delete, or Move). This construction-modelling-modification (CMM) process offers several benefits. It eliminates the need for pairwise modelling of section headings as in previous approaches, making document segmentation practically feasible. By incorporating structured information, each section heading can leverage both local and long-distance context relevant to itself. Experimental results show that our approach outperforms the previous state-of-the-art baseline with a fraction of running time. Our framework proves its scalability by effectively handling documents of any length.", "keywords": "Table of Contents Extraction;Tree", "primary_area": "", "supplementary_material": "", "author": "Xinyu Wang;Lin Gui;Yulan He", "authorids": "~Xinyu_Wang9;~Lin_Gui3;~Yulan_He1", "gender": "M;M;F", "homepage": ";;https://www.kcl.ac.uk/people/yulan-he", "dblp": ";34/8605-3;75/5430", "google_scholar": "zUAHqQgAAAAJ;https://scholar.google.com.ph/citations?user=1b3Eyx4AAAAJ;https://scholar.google.co.uk/citations?user=SP9r32UAAAAJ", "or_profile": "~Xinyu_Wang9;~Lin_Gui3;~Yulan_He1", "aff": "University of Warwick;King's College London, University of London;King's College London, University of London", "aff_domain": "warwick.ac.uk;kcl.ac.uk;kcl.ac.uk", "position": "PhD student;Lecturer;Full Professor", "bibtex": "@inproceedings{\nwang2023a,\ntitle={A Scalable Framework for Table of Contents Extraction from Complex {ESG} Annual Reports},\nauthor={Xinyu Wang and Lin Gui and Yulan He},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=AlEeMxkgsi}\n}", "github": "", "project": "", "reviewers": "Eyfj;eEPP;Z2mu", "site": "https://openreview.net/forum?id=AlEeMxkgsi", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "4;4;4", "reproducibility": "4;3;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-3948-5845", "linkedin": ";;yulan-he-277234a/?originalSubdomain=uk", "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Warwick;King's College London", "aff_unique_dep": ";", "aff_unique_url": "https://www.warwick.ac.uk;https://www.kcl.ac.uk", "aff_unique_abbr": "Warwick;KCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "AoGdaivPEh", "title": "Natural Language Decompositions of Implicit Content Enable Better Text Representations", "track": "main", "status": "Long Main", "tldr": "", "abstract": "When people interpret text, they rely on inferences that go beyond the observed language itself. Inspired by this observation, we introduce a method for the analysis of text that takes implicitly communicated content explicitly into account. We use a large language model to produce sets of propositions that are inferentially related to the text that has been observed, then validate the plausibility of the generated content via human judgments. Incorporating these explicit representations of implicit content proves useful in multiple problem settings that involve the human interpretation of utterances: assessing the similarity of arguments, making sense of a body of opinion data, and modeling legislative behavior. Our results suggest that modeling the meanings behind observed language, rather than the literal text alone, is a valuable direction for NLP and particularly its applications to social science.", "keywords": "computational social science;implicit;decompositions;decompose;propositions;generation;political science;sentence embeddings;clustering", "primary_area": "", "supplementary_material": "", "author": "Alexander Hoyle;Rupak Sarkar;Pranav Goel;Philip Resnik", "authorids": "~Alexander_Hoyle1;~Rupak_Sarkar1;~Pranav_Goel1;~Philip_Resnik1", "gender": "M;M;M;M", "homepage": "https://alexanderhoyle.com;https://styx97.github.io;https://pranav-goel.github.io/;http://www.umiacs.umd.edu/~resnik/", "dblp": "297/8769;256/0987;188/5967;p/PhilipResnik", "google_scholar": "NpK0IXgAAAAJ;SOckmGoAAAAJ;https://scholar.google.co.in/citations?user=LOXt_eYAAAAJ;https://scholar.google.com.tw/citations?user=71BFWc0AAAAJ", "or_profile": "~Alexander_Hoyle1;~Rupak_Sarkar1;~Pranav_Goel1;~Philip_Resnik1", "aff": "University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "umd.edu;umd.edu;umd.edu;umd.edu", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nhoyle2023natural,\ntitle={Natural Language Decompositions of Implicit Content Enable Better Text Representations},\nauthor={Alexander Hoyle and Rupak Sarkar and Pranav Goel and Philip Resnik},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=AoGdaivPEh}\n}", "github": "", "project": "", "reviewers": "V5xX;5xdd;xCQB", "site": "https://openreview.net/forum?id=AoGdaivPEh", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;3", "excitement": "3;3;4", "reproducibility": "2;3;3", "correctness": "3;3;3", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-1037-2687;0000-0002-6130-8602", "linkedin": ";rupak-sarkar-265979152/;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Maryland", "aff_unique_dep": "", "aff_unique_url": "https://www/umd.edu", "aff_unique_abbr": "UMD", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "College Park", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "AptTXihnhH", "title": "Character-LLM: A Trainable Agent for Role-Playing", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) can be used to serve as agents to simulate human behaviors, given the powerful ability to understand human instructions and provide high-quality generated texts.\nSuch ability stimulates us to wonder whether LLMs can simulate a person in a higher form than simple human behaviors.\nTherefore, we aim to train an agent with the profile, experience, and emotional states of a specific person instead of using limited prompts to instruct ChatGPT API. \nIn this work, we introduce Character-LLM that teach LLMs to act as specific people such as Beethoven, Queen Cleopatra, Julius Caesar, etc.\nOur method focuses on editing profiles as experiences of a certain character and training models to be personal simulacra with these experiences.\nTo assess the effectiveness of our approach, we build a test playground that interviews trained agents and evaluates whether the agents \\textit{memorize} their characters and experiences.\nExperimental results show interesting observations that help build future simulacra of humankind.\\footnote{Code and datasets are public at \\url{https://github.com/choosewhatulike/trainable-agents}}", "keywords": "human simulacra;LLM", "primary_area": "", "supplementary_material": "", "author": "Yunfan Shao;Linyang Li;Junqi Dai;Xipeng Qiu", "authorids": "~Yunfan_Shao1;~Linyang_Li1;~Junqi_Dai1;~Xipeng_Qiu1", "gender": "M;M;;M", "homepage": ";https://github.com/LinyangLee;;https://xpqiu.github.io/", "dblp": "236/5806;228/8051;;69/1395", "google_scholar": "pw5QEtoAAAAJ;T6eEqcMAAAAJ;;Pq4Yp_kAAAAJ", "or_profile": "~Yunfan_Shao1;~Linyang_Li1;~Junqi_Dai1;~Xipeng_Qiu1", "aff": "Fudan University;Fudan University;;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;;fudan.edu.cn", "position": "PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nshao2023characterllm,\ntitle={Character-{LLM}: A Trainable Agent for Role-Playing},\nauthor={Yunfan Shao and Linyang Li and Junqi Dai and Xipeng Qiu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=AptTXihnhH}\n}", "github": "", "project": "", "reviewers": "fhYt;3D9c;r5Uk", "site": "https://openreview.net/forum?id=AptTXihnhH", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "4;2;4", "reproducibility": "2;2;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-7163-5247", "linkedin": ";;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "ArSMQ3dCUx", "title": "Noise-Robust Semi-Supervised Learning for Distantly Supervised Relation Extraction", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Distantly supervised relation extraction (DSRE) aims to extract relational facts from texts but suffers from noisy instances.\n To mitigate the influence of noisy labels, current methods typically use the Multi-Instance-Learning framework to extract relations for each bag. However, these approaches are not capable of extracting relation labels for individual sentences. Several studies have focused on sentence-level DSRE to solve the above problem. These studies primarily aim to develop methods for identifying noisy samples and filtering them out to mitigate the impact of noise. However, discarding noisy samples directly leads to the loss of useful information. To this end, we propose SSLRE, a novel Semi-Supervised-Learning Relation Extraction framework for sentence-level DSRE. We discard only the labels of the noisy samples and utilize these instances without labels as unlabeled samples. Our SSLRE framework utilizes a weighted K-NN graph to select confident samples as labeled data and the rest as unlabeled. We then design a robust semi-supervised learning framework that can efficiently handle remaining label noise present in the labeled dataset, while also making effective use of unlabeled samples. Based on our experiments on two real-world datasets, the SSLRE framework we proposed has achieved significant enhancements in sentence-level relation extraction performance compared to the existing state-of-the-art methods. Moreover, it has also attained a state-of-the-art level of performance in bag-level relation extraction with ONE aggregation strategy.", "keywords": "Relation Extraction;Distant Supervision;Semi-Supervised-Learning", "primary_area": "", "supplementary_material": "", "author": "Xin Sun;Qiang Liu;Shu Wu;Zilei Wang;Liang Wang", "authorids": "~Xin_Sun9;~Qiang_Liu8;~Shu_Wu1;~Zilei_Wang1;~Liang_Wang3", "gender": "M;M;M;M;M", "homepage": "https://sunxin000.github.io/;https://john-qiangliu.tech/;http://www.shuwu.name;;", "dblp": ";61/3234-6;06/3577;49/1878;56/4499-1", "google_scholar": ";https://scholar.google.co.jp/citations?user=D-lKLcMAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?hl=zh-CN;", "or_profile": "~Xin_Sun9;~Qiang_Liu8;~Shu_Wu1;~Zilei_Wang1;~Liang_Wang3", "aff": "University of Science and Technology of China;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;University of Science and Technology of China;Institute of Automation\uff0c CAS\uff0cChina", "aff_domain": "ustc.edu.cn;nlpr.ia.ac.cn;ia.ac.cn;ustc.edu.cn;ia.ac.cn", "position": "MS student;Associate Professor;Associate Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nsun2023noiserobust,\ntitle={Noise-Robust Semi-Supervised Learning for Distantly Supervised Relation Extraction},\nauthor={Xin Sun and Qiang Liu and Shu Wu and Zilei Wang and Liang Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ArSMQ3dCUx}\n}", "github": "", "project": "", "reviewers": "JnUq;HoSe;MQ5c", "site": "https://openreview.net/forum?id=ArSMQ3dCUx", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;4", "excitement": "3;4;3", "reproducibility": "4;4;4", "correctness": "4;4;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-9233-3827;0000-0003-2164-3577;;", "linkedin": ";;;;", "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "University of Science and Technology of China;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Automation", "aff_unique_url": "http://www.ustc.edu.cn;http://www.ia.cas.cn", "aff_unique_abbr": "USTC;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "Ariw9I14zZ", "title": "XLM-V: Overcoming the Vocabulary Bottleneck in Multilingual Masked Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large multilingual language models typically rely on a single vocabulary shared across 100+ languages. As these models have increased in parameter count and depth, vocabulary size has remained largely unchanged. This \\textit{vocabulary bottleneck} limits the representational capabilities of multilingual models like XLM-R. In this paper, we introduce a new approach for scaling to very large multilingual vocabularies by de-emphasizing token sharing between languages with little lexical overlap and assigning vocabulary capacity to achieve sufficient coverage for each individual language. Tokenizations using our vocabulary are typically more semantically meaningful and shorter compared to XLM-R. Leveraging this improved vocabulary, we train XLM-V, a multilingual language model with a one million token vocabulary. XLM-V outperforms XLM-R on every task we tested on ranging from natural language inference (XNLI), question answering (MLQA, XQuAD, TyDiQA), to named entity recognition (WikiAnn). XLM-V is particularly effective on low-resource language tasks and outperforms XLM-R by 11.2\\% and 5.8\\% absolute on MasakhaNER and Americas NLI, respectively.", "keywords": "Multilingual;Masked Language Models", "primary_area": "", "supplementary_material": "", "author": "Davis Liang;Hila Gonen;Yuning Mao;Rui Hou;Naman Goyal;Marjan Ghazvininejad;Luke Zettlemoyer;Madian Khabsa", "authorids": "~Davis_Liang1;~Hila_Gonen1;~Yuning_Mao1;~Rui_Hou3;~Naman_Goyal1;~Marjan_Ghazvininejad1;~Luke_Zettlemoyer1;~Madian_Khabsa1", "gender": "M;;;M;M;;M;M", "homepage": "https://www.davisliang.com;https://gonenhila.github.io/;https://morningmoni.github.io/;;;;https://www.cs.washington.edu/people/faculty/lsz/;https://www.madiankhabsa.com", "dblp": "206/6843;167/5312;178/3692;;183/1418;;21/6793;87/11087", "google_scholar": "9lh2gH8AAAAJ;URThmtMAAAAJ;steJe6IAAAAJ;;CRbM_P4AAAAJ;;https://scholar.google.com.tw/citations?user=UjpbO6IAAAAJ;V9JYPP0AAAAJ", "or_profile": "~Davis_Liang1;~Hila_Gonen1;~Yuning_Mao1;~Rui_Hou3;~Naman_Goyal1;~Marjan_Ghazvininejad1;~Luke_Zettlemoyer1;~Madian_Khabsa1", "aff": "Meta ;Meta Facebook;Meta;Meta Inc. ;;;Meta;Meta", "aff_domain": "meta.com;facebook.com;meta.com;meta.inc;;;meta.com;meta.com", "position": "Researcher;Postdoc;Researcher;Research Scientist;;;Researcher;Researcher", "bibtex": "@inproceedings{\nliang2023xlmv,\ntitle={{XLM}-V: Overcoming the Vocabulary Bottleneck in Multilingual Masked Language Models},\nauthor={Davis Liang and Hila Gonen and Yuning Mao and Rui Hou and Naman Goyal and Marjan Ghazvininejad and Luke Zettlemoyer and Madian Khabsa},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Ariw9I14zZ}\n}", "github": "", "project": "", "reviewers": "hL9S;rngv;BY56", "site": "https://openreview.net/forum?id=Ariw9I14zZ", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;4", "excitement": "3;4;4", "reproducibility": "3;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 5, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;", "linkedin": ";;morningmoni/;rayhou/;ngoyal2707/;;luke-zettlemoyer-a0109b226/;", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Meta", "aff_unique_dep": "Meta Platforms, Inc.", "aff_unique_url": "https://meta.com", "aff_unique_abbr": "Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "AtjErbRsg2", "title": "Reconstruct Before Summarize: An Efficient Two-Step Framework for Condensing and Summarizing Meeting Transcripts", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Meetings typically involve multiple participants and lengthy conversations, resulting in redundant and trivial content. To overcome these challenges, we propose a two-step framework, Reconstruct before Summarize (RbS), for effective and efficient meeting summarization. RbS first leverages a self-supervised paradigm to annotate essential contents by reconstructing the meeting transcripts. Secondly, we propose a relative positional bucketing (RPB) algorithm to equip (conventional) summarization models to generate the summary. Despite the additional reconstruction process, our proposed RPB significantly compresses the input, leading to faster processing and reduced memory consumption compared to traditional summarization methods. We validate the effectiveness and efficiency of our method through extensive evaluations and analyses. On two meeting summarization datasets, AMI and ICSI, our approach outperforms previous state-of-the-art approaches without relying on large-scale pre-training or expert-grade annotating tools.", "keywords": "meeting summarization;essential content extraction;long-text compression", "primary_area": "", "supplementary_material": "", "author": "Haochen Tan;Han Wu;Wei Shao;Xinyun Zhang;Mingjie Zhan;Zhaohui Hou;Ding Liang;Linqi Song", "authorids": "~Haochen_Tan1;~Han_Wu5;~Wei_Shao5;~Xinyun_Zhang2;~Mingjie_Zhan1;~Zhaohui_Hou1;~Ding_Liang1;~Linqi_Song1", "gender": "M;M;M;M;M;M;;M", "homepage": "https://scholars.cityu.edu.hk/en/persons/haochen-tan(6f087d1a-f724-44a4-83b4-9f3064fc52b7)/publications.html;https://hahahawu.com/;;;;https://www.semanticscholar.org/author/Zhaohui-Hou/2068251214;;https://sites.google.com/site/aisquaredlab/", "dblp": "269/9939;13/1864-4;;150/9539;277/1226;;;137/7963.html", "google_scholar": ";https://scholar.google.com.hk/citations?user=1SHXVAIAAAAJ;4o57IEAAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;;;UcGN3MoAAAAJ", "or_profile": "~Haochen_Tan1;~Han_Wu5;~Wei_Shao5;~Xinyun_Zhang2;~Mingjie_Zhan1;~Zhaohui_Hou1;~Ding_Liang1;~Linqi_Song1", "aff": "City University of Hong Kong;City University of Hong Kong;City University of Hong Kong;Department of Computer Science and Engineering, The Chinese University of Hong Kong;SenseTime Research;Sensetime;;City University of Hong Kong", "aff_domain": "cityu.edu.hk;cityu.edu.hk;cityu.edu.hk;cse.cuhk.edu.hk;sensetime.com;sensetime.com;;cityu.edu.hk", "position": "PhD student;PhD student;PhD student;PhD student;Researcher;Researcher;;Assistant Professor", "bibtex": "@inproceedings{\ntan2023reconstruct,\ntitle={Reconstruct Before Summarize: An Efficient Two-Step Framework for Condensing and Summarizing Meeting Transcripts},\nauthor={Haochen Tan and Han Wu and Wei Shao and Xinyun Zhang and Mingjie Zhan and Zhaohui Hou and Ding Liang and Linqi Song},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=AtjErbRsg2}\n}", "github": "", "project": "", "reviewers": "KpoB;9DKz;NpwE;hWuH", "site": "https://openreview.net/forum?id=AtjErbRsg2", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "4;4;4;3", "excitement": "4;3;3;3", "reproducibility": "2;4;3;4", "correctness": "4;4;4;3", "rating_avg": 5.0, "confidence_avg": 3.75, "excitement_avg": 3.25, "reproducibility_avg": 3.25, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-8008-064X;;;;;;0000-0003-2756-4984", "linkedin": ";;;;;;;", "aff_unique_index": "0;0;0;1;2;2;0", "aff_unique_norm": "City University of Hong Kong;Chinese University of Hong Kong;SenseTime", "aff_unique_dep": ";Department of Computer Science and Engineering;SenseTime Research", "aff_unique_url": "https://www.cityu.edu.hk;https://www.cuhk.edu.hk;https://www.sensetime.com", "aff_unique_abbr": "CityU;CUHK;SenseTime", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "AxPGO36LfE", "title": "X-SNS: Cross-Lingual Transfer Prediction through Sub-Network Similarity", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Cross-lingual transfer (XLT) is an emergent ability of multilingual language models that preserves their performance on a task to a significant extent when evaluated in languages that were not included in the fine-tuning process.\nWhile English, due to its widespread usage, is typically regarded as the primary language for model adaption in various tasks, recent studies have revealed that the efficacy of XLT can be amplified by selecting the most appropriate source languages based on specific conditions.\nIn this work, we propose the utilization of sub-network similarity between two languages as a proxy for predicting the compatibility of the languages in the context of XLT.\nOur approach is model-oriented, better reflecting the inner workings of foundation models. \nIn addition, it requires only a moderate amount of raw text from candidate languages, distinguishing it from the majority of previous methods that rely on external resources.\nIn experiments, we demonstrate that our method is more effective than baselines across diverse tasks.\nSpecifically, it shows proficiency in ranking candidates for zero-shot XLT, achieving an improvement of 4.6% on average in terms of NDCG@3.\nWe also provide extensive analyses that confirm the utility of sub-networks for XLT prediction.", "keywords": "Cross-lingual transfer;Multilingual Language Model", "primary_area": "", "supplementary_material": "", "author": "Taejun Yun;Jinhyeon Kim;Deokyeong Kang;Seonghoon Lim;Jihoon Kim;Taeuk Kim", "authorids": "~Taejun_Yun1;~Jinhyeon_Kim3;~Deokyeong_Kang1;~Seonghoon_Lim1;~Jihoon_Kim6;~Taeuk_Kim1", "gender": "M;M;M;;M;M", "homepage": "https://github.com/Tjay16;;https://github.com/kdy20401;https://github.com/dmammfl;https://www.linkedin.com/in/jihoon-kim-b05257125/;https://galsang.github.io", "dblp": ";;;;;205/3110", "google_scholar": ";;;;;eH5uq7wAAAAJ", "or_profile": "~Taejun_Yun1;~Jinhyeon_Kim3;~Deokyeong_Kang1;~Seonghoon_Lim1;~Jihoon_Kim6;~Taeuk_Kim1", "aff": "Hanyang University;Hanyang University;Hanyang University;Hanyang University;;Hanyang University", "aff_domain": "hanyang.ac.kr;hanyang.ac.kr;hanyang.ac.kr;hanyang.ac.kr;;hanyang.ac.kr", "position": "MS student;MS student;MS student;MS student;;Assistant Professor", "bibtex": "@inproceedings{\nyun2023xsns,\ntitle={X-{SNS}: Cross-Lingual Transfer Prediction through Sub-Network Similarity},\nauthor={Taejun Yun and Jinhyeon Kim and Deokyeong Kang and Seonghoon Lim and Jihoon Kim and Taeuk Kim},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=AxPGO36LfE}\n}", "github": "", "project": "", "reviewers": "uSgC;TZ8W;39c7", "site": "https://openreview.net/forum?id=AxPGO36LfE", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;3;3", "excitement": "3;3;2", "reproducibility": "4;4;3", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0001-6919-7727", "linkedin": ";kimjinhye0n;;;jihoon-kim-b05257125/;\ud0dc\uc6b1-\uae40-07125a13a/", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Hanyang University", "aff_unique_dep": "", "aff_unique_url": "https://www.hanyang.ac.kr", "aff_unique_abbr": "HYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "B01gPP5YCh", "title": "Parameter-Efficient Prompt Tuning Makes Generalized and Calibrated Neural Text Retrievers", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Prompt tuning attempts to update few task-specific parameters in pre-trained models. It has achieved comparable performance to fine-tuning of the full parameter set on both language understanding and generation tasks. In this work, we study the problem of prompt tuning for neural text retrievers. We introduce parameter-efficient prompt tuning for text retrieval across in-domain, cross-domain, and cross-topic settings. Through an extensive analysis, we show that the strategy can mitigate the two issues---parameter-inefficiency and weak generalizability---faced by fine-tuning based retrieval methods. Notably, it can significantly improve the out-of-domain zero-shot generalization of the retrieval models. By updating only 0.1% of the model parameters, the prompt tuning strategy can help retrieval models achieve better generalization performance than traditional methods in which all parameters are updated. Finally, to facilitate research on retrievers' cross-topic generalizability, we curate and release an academic retrieval dataset with 18K query-results pairs in 87 topics, making it the largest topic-specific one to date.", "keywords": "information retrieval;prompt tuning;generalization", "primary_area": "", "supplementary_material": "", "author": "Weng Lam Tam;Xiao Liu;Kaixuan Ji;Lilong Xue;Jiahua Liu;tao li;Yuxiao Dong;Jie Tang", "authorids": "~Weng_Lam_Tam1;~Xiao_Liu15;~Kaixuan_Ji2;~Lilong_Xue1;~Jiahua_Liu1;~tao_li13;~Yuxiao_Dong1;~Jie_Tang1", "gender": "F;M;Not Specified;M;M;M;M;", "homepage": "https://github.com/rainatam;https://github.com/xiao9905;https://github.com/jkx19;https://github.com/bigdante?tab=repositories;;https://scholar.google.com.hk/citations?hl=zh-CN&user=klVif50AAAAJ;https://keg.cs.tsinghua.edu.cn/yuxiao/;", "dblp": ";82/1364-36;252/7475;;117/6636;75/4601;17/9267;", "google_scholar": ";VKI8EhUAAAAJ;FOoKDukAAAAJ;;MH95amsAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.com.hk/citations?hl=en;", "or_profile": "~Weng_Lam_Tam1;~Xiao_Liu15;~Kaixuan_Ji2;~Lilong_Xue1;~Jiahua_Liu1;~tao_li13;~Yuxiao_Dong1;~Jie_Tang1", "aff": ";Tsinghua University;Tsinghua University;Tsinghua University;;Meituan;Tsinghua University;", "aff_domain": ";tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;;meituan.com;tsinghua.edu.cn;", "position": ";PhD student;Undergrad student;MS student;;Researcher;Associate Professor;", "bibtex": "@inproceedings{\ntam2023parameterefficient,\ntitle={Parameter-Efficient Prompt Tuning Makes Generalized and Calibrated Neural Text Retrievers},\nauthor={Weng Lam Tam and Xiao Liu and Kaixuan Ji and Lilong Xue and Jiahua Liu and tao li and Yuxiao Dong and Jie Tang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=B01gPP5YCh}\n}", "github": "", "project": "", "reviewers": "TuGb;deUw;DnA1", "site": "https://openreview.net/forum?id=B01gPP5YCh", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;3;2", "excitement": "2;4;4", "reproducibility": "4;4;4", "correctness": "2;3;4", "rating_avg": 3.0, "confidence_avg": 2.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-9226-4569;;;;;0000-0002-6092-2002;", "linkedin": ";;;;;;;", "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Tsinghua University;Meituan", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.meituan.com", "aff_unique_abbr": "THU;Meituan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "B14ohp9mPU", "title": "On Evaluation of Bangla Word Analogies", "track": "main", "status": "Short Main", "tldr": "", "abstract": "This paper presents a benchmark dataset of Bangla word analogies for evaluating the quality of existing Bangla word embeddings. Despite being the 7th largest spoken language in the world, Bangla is still a low-resource language and popular NLP models often struggle to perform well on Bangla data sets. Therefore, developing a robust evaluation set is crucial for benchmarking and guiding future research on improving Bangla word embeddings, which is currently missing. To address this issue, we introduce a new evaluation set of 16,678 unique word analogies in Bangla as well as a translated and curated version of the original Mikolov dataset (10,594 samples) in Bangla. Our experiments with different state-of-the-art embedding models reveal that current Bangla word embeddings struggle to achieve high accuracy on both data sets, demonstrating a significant gap in multilingual NLP research.", "keywords": "Bangla;Word Analogy;Evaluation", "primary_area": "", "supplementary_material": "", "author": "Mousumi Akter;Souvika Sarkar;Shubhra Kanti Karmaker Santu", "authorids": "~Mousumi_Akter1;~Souvika_Sarkar1;~Shubhra_Kanti_Karmaker_Santu2", "gender": ";;", "homepage": "https://sites.google.com/view/mousumi-akter;;", "dblp": "200/7260-1.html;;", "google_scholar": "0k_iDPIAAAAJ;;", "or_profile": "~Mousumi_Akter1;~Souvika_Sarkar1;~Shubhra_Kanti_Karmaker_Santu2", "aff": "Auburn University;;", "aff_domain": "auburn.edu;;", "position": "PhD student;;", "bibtex": "@inproceedings{\nakter2023on,\ntitle={On Evaluation of Bangla Word Analogies},\nauthor={Mousumi Akter and Souvika Sarkar and Shubhra Kanti Karmaker Santu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=B14ohp9mPU}\n}", "github": "", "project": "", "reviewers": "Wxve;5yeX;ZKu3", "site": "https://openreview.net/forum?id=B14ohp9mPU", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;3;2", "reproducibility": "4;4;2", "correctness": "4;4;2", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "mousumi-akter-1a80a8b8/;;", "aff_unique_index": "0", "aff_unique_norm": "Auburn University", "aff_unique_dep": "", "aff_unique_url": "https://www.auburn.edu", "aff_unique_abbr": "Auburn", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "B3Muf1R1UD", "title": "NLMs: Augmenting Negation in Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Negation is the fundamental component in a natural language that reverses the semantic meaning of a sentence.\nIt plays an extremely important role across a wide range of applications, yet they are underrepresented in pre-trained language models (LMs), resulting often in wrong inferences. In this work, we try to improve the underlying understanding of the negation in the pre-trained LMs. To augment negation understanding, we propose a language model objective with a weighted cross-entropy loss and elastic weight consolidation regularization. We reduce the mean top 1 error rate for BERT-base to 1.1\\%, BERT-large to 0.78\\%, RoBERTA-base to 3.74\\%, RoBERTA-large to 0.01\\% on the negated LAMA dataset. It minimizes the BERT error rate by a margin of 8\\% and also outperform the existing negation models. We also provide empirical evidences that negated augmented models outperform the classical models on original as well as negation benchmarks on natural language inference tasks.", "keywords": "Language Models;Negation", "primary_area": "", "supplementary_material": "", "author": "Rituraj Singh;Rahul Kumar;Vivek Sridhar", "authorids": "~Rituraj_Singh2;~Rahul_Kumar6;~Vivek_Sridhar1", "gender": "M;M;M", "homepage": "https://devrituraj.github.io/;;", "dblp": "170/2496;;", "google_scholar": "https://scholar.google.co.in/citations?user=Wj8zIVUAAAAJ;snCEaJ8AAAAJ;", "or_profile": "~Rituraj_Singh2;~Rahul_Kumar6;~Vivek_Sridhar1", "aff": "Samsung Research and Development Institute - India, Bengaluru ;;", "aff_domain": "samsung.com;;", "position": "Researcher;;", "bibtex": "@inproceedings{\nsingh2023nlms,\ntitle={{NLM}s: Augmenting Negation in Language Models},\nauthor={Rituraj Singh and Rahul Kumar and Vivek Sridhar},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=B3Muf1R1UD}\n}", "github": "", "project": "", "reviewers": "nWX2;f5qH;ZAkP", "site": "https://openreview.net/forum?id=B3Muf1R1UD", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;3;3", "excitement": "3;3;4", "reproducibility": "3;4;3", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "rituraj-singh-786a5872/;;vivek-sridhar-a649232b", "aff_unique_index": "0", "aff_unique_norm": "Samsung", "aff_unique_dep": "Samsung Research and Development Institute", "aff_unique_url": "https://www.samsung.com/in/", "aff_unique_abbr": "SRDI", "aff_campus_unique_index": "0", "aff_campus_unique": "Bengaluru", "aff_country_unique_index": "0", "aff_country_unique": "India" }, { "id": "B3SjWgXHzM", "title": "Token Prediction as Implicit Classification to Identify LLM-Generated Text", "track": "main", "status": "Short Main", "tldr": "", "abstract": "This paper introduces a novel approach for identifying the possible large language models (LLMs) involved in text generation. Instead of adding an additional classification layer to a base LM, we reframe the classification task as a next-token prediction task and directly fine-tune the base LM to perform it. We utilize the Text-to-Text Transfer Transformer (T5) model as the backbone for our experiments. We compared our approach to the more direct approach of utilizing hidden states for classification. Evaluation shows the exceptional performance of our method in the text classification task, highlighting its simplicity and efficiency. Furthermore, interpretability studies on the features extracted by our model reveal its ability to differentiate distinctive writing styles among various LLMs even in the absence of an explicit classifier. We also collected a dataset named OpenLLMText, containing approximately 340k text samples from human and LLMs, including GPT3.5, PaLM, LLaMA, and GPT2.", "keywords": "Machine-generated text detection;Text-to-Text Transfer Transformer (T5);Large language model;Transfer learning;Large language model", "primary_area": "", "supplementary_material": "", "author": "Yutian Chen;Hao Kang;Vivian Zhai;Liangze Li;Rita Singh;Bhiksha Raj", "authorids": "~Yutian_Chen2;~Hao_Kang6;~Vivian_Zhai1;~Liangze_Li1;~Rita_Singh1;~Bhiksha_Raj1", "gender": "M;;;;F;M", "homepage": ";https://haokang.me/;;;http://mlsp.cs.cmu.edu/people/rsingh/index.html;https://www.cs.cmu.edu/directory/bhikshar/", "dblp": ";;;;;60/3996", "google_scholar": "9-Cac9MAAAAJ;;;;;", "or_profile": "~Yutian_Chen2;~Hao_Kang6;~Vivian_Zhai1;~Liangze_Li1;~Rita_Singh1;~Bhiksha_Raj1", "aff": "Carnegie Mellon University;School of Computer Science, Carnegie Mellon University;Carnegie Mellon University;School of Computer Science, Carnegie Mellon University;School of Computer Science, Carnegie Mellon University;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "cmu.edu;cs.cmu.edu;cmu.edu;cs.cmu.edu;cs.cmu.edu;mbzuai.ac.ae", "position": "Undergrad student;Undergrad student;Undergrad student;MS student;Research Professor;Full Professor", "bibtex": "@inproceedings{\nchen2023token,\ntitle={Token Prediction as Implicit Classification to Identify {LLM}-Generated Text},\nauthor={Yutian Chen and Hao Kang and Vivian Zhai and Liangze Li and Rita Singh and Bhiksha Raj},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=B3SjWgXHzM}\n}", "github": "", "project": "", "reviewers": "GnKq;K9Y3;PtZE", "site": "https://openreview.net/forum?id=B3SjWgXHzM", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "3;3;4", "reproducibility": "3;2;3", "correctness": "2;2;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 2.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-8008-9014;;0009-0000-3331-3167;;;", "linkedin": ";haok1402/;;liangzel/;;", "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Carnegie Mellon University;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://mbzuai.ac.ae", "aff_unique_abbr": "CMU;MBZUAI", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "United States;United Arab Emirates" }, { "id": "B3rTZovgaA", "title": "Doolittle: Benchmarks and Corpora for Academic Writing Formalization", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Improving the quality of academic writing is a meaningful but challenging task. Conventional methods of language refinement focus on narrow, specific linguistic features within isolated sentences, such as grammatical errors and improper word use. We propose a more general task, Academic Writing Formalization (AWF), to improve the overall quality of formal academic writing at the paragraph level. We formulate this language refinement task as a formal text style transfer task which transfers informal-academic text to formal-academic and contribute a large-scale non-parallel dataset, Doolittle, for this purpose. Concurrently, we apply a method named metric-oriented reinforcement learning (MORL) to two large language models (LLM) where we incorporate different levels of automatic feedback into the training process. Our experiments reveal that existing text transfer models and grammatical error correction models address certain aspects of AWF but still have a significant performance gap compared to human performance. Meanwhile, language models fine-tuned with our MORL method exhibit considerably improved performance, rivaling the latest chatbot ChatGPT, but still have a non-negligible gap compared to the ground truth formal-academic texts in Doolittle.", "keywords": "grammar error correction;large language models;academic writing formalization", "primary_area": "", "supplementary_material": "", "author": "Shizhe Diao;Yongyu Lei;Liangming Pan;Tianqing Fang;Wangchunshu Zhou;Sedrick Scott Keh;Min-Yen Kan;Tong Zhang", "authorids": "~Shizhe_Diao2;~Yongyu_Lei1;~Liangming_Pan1;~Tianqing_Fang1;~Wangchunshu_Zhou1;~Sedrick_Scott_Keh1;~Min-Yen_Kan1;~Tong_Zhang2", "gender": "M;M;M;M;M;M;M;M", "homepage": ";https://liangmingpan.bio;http://fangtq.com/;https://michaelzhouwang.github.io;https://sedrickkeh.github.io;https://www.comp.nus.edu.sg/~kanmy/;http://tongzhang-ml.org;https://shizhediao.github.io/", "dblp": ";186/9707;283/4921;245/8640.html;244/9561;k/MinYenKan;07/4227-1;221/3896", "google_scholar": ";JcjjOTUAAAAJ;https://scholar.google.com.hk/citations?user=Tb3rc34AAAAJ;UebIjuQAAAAJ;IMYgXsYAAAAJ;https://scholar.google.com.tw/citations?user=aNVcd3EAAAAJ;LurWtuYAAAAJ;NDFQrLQAAAAJ", "or_profile": "~Yongyu_Lei1;~Liangming_Pan1;~Tianqing_Fang1;~Wangchunshu_Zhou1;~Sedrick_Scott_Keh1;~Min-Yen_Kan1;~Tong_Zhang2;~SHIZHE_DIAO1", "aff": "Hong Kong University of Science and Technology;University of California, Santa Barbara;Hong Kong University of Science and Technology;Department of Computer Science, ETHZ - ETH Zurich;Toyota Research Institute;National University of Singapore;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;ucsb.edu;ust.hk;inf.ethz.ch;tri.global;nus.edu.sg;ust.hk;ust.hk", "position": "Intern;Postdoc;PhD student;PhD student;AI Resident;Associate Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\ndiao2023doolittle,\ntitle={Doolittle: Benchmarks and Corpora for Academic Writing Formalization},\nauthor={Shizhe Diao and Yongyu Lei and Liangming Pan and Tianqing Fang and Wangchunshu Zhou and Sedrick Scott Keh and Min-Yen Kan and Tong Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=B3rTZovgaA}\n}", "github": "", "project": "", "reviewers": "rcwz;r3ht;GKgM;euze", "site": "https://openreview.net/forum?id=B3rTZovgaA", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "3;2;3;4", "excitement": "3;4;4;3", "reproducibility": "4;4;3;4", "correctness": "3;4;4;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 3.5, "reproducibility_avg": 3.75, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;0000-0002-5511-2558;", "linkedin": "leiyongyu;;;;;;;", "aff_unique_index": "0;1;0;2;3;4;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology;University of California, Santa Barbara;ETH Zurich;Toyota Research Institute;National University of Singapore", "aff_unique_dep": ";;Department of Computer Science;;", "aff_unique_url": "https://www.ust.hk;https://www.ucsb.edu;https://www.ethz.ch;https://www.tri.global;https://www.nus.edu.sg", "aff_unique_abbr": "HKUST;UCSB;ETHZ;TRI;NUS", "aff_campus_unique_index": "0;1;0;2;0;0", "aff_campus_unique": "Hong Kong SAR;Santa Barbara;Zurich;", "aff_country_unique_index": "0;1;0;2;1;3;0;0", "aff_country_unique": "China;United States;Switzerland;Singapore" }, { "id": "B6BXB4g8eQ", "title": "Be Selfish, But Wisely: Investigating the Impact of Agent Personality in Mixed-Motive Human-Agent Interactions", "track": "main", "status": "Long Main", "tldr": "", "abstract": "A natural way to design a negotiation dialogue system is via self-play RL: train an agent that learns to maximize its performance by interacting with a simulated user that has been designed to imitate human-human dialogue data. Although this procedure has been adopted in prior work, we find that it results in a fundamentally flawed system that fails to learn the value of compromise in a negotiation, which can often lead to no agreements (i.e., the partner walking away without a deal), ultimately hurting the model\u2019s overall performance. We investigate this observation in the context of DealOrNoDeal task, a multi-issue negotiation over books, hats, and balls. Grounded in negotiation theory from Economics, we modify the training procedure in two novel ways to design agents with diverse personalities and analyze their performance with human partners. We find that although both techniques show promise, a selfish agent, which maximizes its own performance while also avoiding walkaways, performs superior to other variants by implicitly learning to generate value for both itself and the negotiation partner. We discuss the implications of our findings for what it means to be a successful negotiation dialogue system and how these systems should be designed in the future.", "keywords": "dialogue;negotiation;personality;reinforcement learning", "primary_area": "", "supplementary_material": "", "author": "Kushal Chawla;Ian Wu;Yu Rong;Gale Lucas;Jonathan Gratch", "authorids": "~Kushal_Chawla2;~Ian_Wu1;~Yu_Rong5;~Gale_Lucas1;~Jonathan_Gratch1", "gender": "M;M;F;F;M", "homepage": "https://kushalchawla.github.io/;https://ianwu13.github.io/;https://www.linkedin.com/in/yu-rong-6b09371ba/;https://ict.usc.edu/profile/gale-lucas/;https://people.ict.usc.edu/~gratch/", "dblp": "206/6592;;;146/1163;71/3911.html", "google_scholar": "https://scholar.google.co.in/citations?user=x4rFCskAAAAJ;;;Bkq-JK8AAAAJ;HF448PMAAAAJ", "or_profile": "~Kushal_Chawla2;~Ian_Wu1;~Yu_Rong5;~Gale_Lucas1;~Jonathan_Gratch1", "aff": "University of Southern California;University of Southern California;University of Southern California;University of Southern California;University of Southern California", "aff_domain": "usc.edu;usc.edu;usc.edu;usc.edu;usc.edu", "position": "PhD student;MS student;MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nchawla2023be,\ntitle={Be Selfish, But Wisely: Investigating the Impact of Agent Personality in Mixed-Motive Human-Agent Interactions},\nauthor={Kushal Chawla and Ian Wu and Yu Rong and Gale Lucas and Jonathan Gratch},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=B6BXB4g8eQ}\n}", "github": "", "project": "", "reviewers": "atVW;qS7U;BeTN", "site": "https://openreview.net/forum?id=B6BXB4g8eQ", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;3", "excitement": "4;4;4", "reproducibility": "3;4;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-3089-9283;0000-0002-5959-809X", "linkedin": "kushalchawla/;ianwu13/;yu-rong-6b09371ba/;gale-m-lucas-phd/;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "", "aff_unique_url": "https://www.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "B6Gdg7u04y", "title": "LLMaAA: Making Large Language Models as Active Annotators", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Prevalent supervised learning methods in natural language processing (NLP) are notoriously data-hungry, which demand large amounts of high-quality annotated data. In practice, acquiring such data is a costly endeavor. Recently, the superior few-shot performance of large language models (LLMs) has propelled the development of dataset generation, where the training data are solely synthesized from LLMs. However, such an approach usually suffers from low-quality issues, and requires orders of magnitude more labeled data to achieve satisfactory performance.\nTo fully exploit the potential of LLMs and make use of massive unlabeled data, we propose LLMaAA, which takes LLMs as annotators and puts them into an active learning loop to determine what to annotate efficiently. To learn robustly with pseudo labels, we optimize both the annotation and training processes: (1) we draw $k$-NN examples from a small demonstration pool as in-context examples, and (2) we adopt the example reweighting technique to assign training samples with learnable weights.\nCompared with previous approaches, LLMaAA features both efficiency and reliability.\nWe conduct experiments and analysis on two classic NLP tasks, named entity recognition and relation extraction. With LLMaAA, task-specific models trained from LLM-generated labels can outperform the teacher within only hundreds of annotated examples, which is much more cost-effective than other baselines.", "keywords": "Large language model;psuedo labeling;active learning", "primary_area": "", "supplementary_material": "", "author": "Ruoyu Zhang;Yanzeng Li;Yongliang Ma;Ming Zhou;Lei Zou", "authorids": "~Ruoyu_Zhang1;~Yanzeng_Li1;~Yongliang_Ma1;~Ming_Zhou5;~Lei_Zou2", "gender": "M;;;M;M", "homepage": "https://github.com/ridiculouz;;;;https://www.wict.pku.edu.cn/zoulei/", "dblp": "81/8054;;;;81/3390-1.html", "google_scholar": "siuZCjUAAAAJ;;;a0w5c0gAAAAJ;", "or_profile": "~Ruoyu_Zhang1;~Yanzeng_Li1;~Yongliang_Ma1;~Ming_Zhou5;~Lei_Zou2", "aff": "Peking University;;;Sinovation Ventures;Peking University", "aff_domain": "pku.edu.cn;;;chuangxin.com;pku.edu.cn", "position": "MS student;;;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nzhang2023llmaaa,\ntitle={{LLM}a{AA}: Making Large Language Models as Active Annotators},\nauthor={Ruoyu Zhang and Yanzeng Li and Yongliang Ma and Ming Zhou and Lei Zou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=B6Gdg7u04y}\n}", "github": "", "project": "", "reviewers": "z8rH;P3tf;Neog", "site": "https://openreview.net/forum?id=B6Gdg7u04y", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;4;4", "excitement": "4;4;3", "reproducibility": "2;3;3", "correctness": "3;3;3", "rating_avg": 2.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0000-3871-6424;;;;0000-0002-8586-4400", "linkedin": ";;;;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Peking University;Sinovation Ventures", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.sinovationventures.com", "aff_unique_abbr": "Peking U;Sinovation Ventures", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "B8Hz9HqnFm", "title": "Towards Zero-shot Learning for End-to-end Cross-modal Translation Models", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "One of the main problems in speech translation is the mismatches between different modalities. The second problem, scarcity of parallel data covering multiple modalities, means that the end-to-end multi-modal models tend to perform worse than cascade models, although there are exceptions under favorable conditions. To address these problems, we propose an end-to-end zero-shot speech translation model, connecting two pre-trained uni-modality modules via word rotator's distance. The model retains the ability of zero-shot, which is like cascade models, and also can be trained in an end-to-end style to avoid error propagation. Our comprehensive experiments on the MuST-C benchmarks show that our end-to-end zero-shot approach performs better than or as well as those of the CTC-based cascade models and that our end-to-end model with supervised training also matches the latest baselines.", "keywords": "Zero-Shot;End-to-End;Speech Translation", "primary_area": "", "supplementary_material": "", "author": "Jichen Yang;Kai Fan;Minpeng Liao;Boxing Chen;Zhongqiang Huang", "authorids": "~Jichen_Yang1;~Kai_Fan1;~Minpeng_Liao1;~Boxing_Chen1;~Zhongqiang_Huang1", "gender": "M;M;;M;M", "homepage": "https://github.com/NexusIRonY/Niry;https://scholar.google.com/citations?user=SQqkcdgAAAAJ&hl=zh;;https://sites.google.com/site/chenboxing/Home;", "dblp": ";20/3825-2.html;;12/1081;10/3565", "google_scholar": ";SQqkcdgAAAAJ;;LiINs3gAAAAJ;", "or_profile": "~Jichen_Yang1;~Kai_Fan1;~Minpeng_Liao1;~Boxing_Chen1;~Zhongqiang_Huang1", "aff": "Alibaba Group;Alibaba Group;;Huawei Technologies Ltd.;Alibaba Group", "aff_domain": "alibaba-inc.com;alibaba-inc.com;;huawei.com;alibaba-inc.com", "position": "Intern;Researcher;;Principal Researcher;Senior Staff Engineer", "bibtex": "@inproceedings{\nyang2023towards,\ntitle={Towards Zero-shot Learning for End-to-end Cross-modal Translation Models},\nauthor={Jichen Yang and Kai Fan and Minpeng Liao and Boxing Chen and Zhongqiang Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=B8Hz9HqnFm}\n}", "github": "", "project": "", "reviewers": "ZEUx;kgo5;aFGi;B6yQ", "site": "https://openreview.net/forum?id=B8Hz9HqnFm", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;4;3;3", "excitement": "4;3;4;3", "reproducibility": "4;2;4;3", "correctness": "3;2;3;3", "rating_avg": 4.0, "confidence_avg": 3.25, "excitement_avg": 3.5, "reproducibility_avg": 3.25, "correctness_avg": 2.75, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-8256-0807;;0000-0002-3170-4858;", "linkedin": ";;;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Alibaba Group;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.alibaba.com;https://www.huawei.com", "aff_unique_abbr": "Alibaba;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "B8mdHlqNfw", "title": "Large Language Models and Multimodal Retrieval for Visual Word Sense Disambiguation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Visual Word Sense Disambiguation (VWSD) is a novel challenging task with the goal of retrieving an image among a set of candidates, which better represents the meaning of an ambiguous word within a given context. In this paper, we make a substantial step towards unveiling this interesting task by applying a varying set of approaches. Since VWSD is primarily a text-image retrieval task, we explore the latest transformer-based methods for multimodal retrieval. Additionally, we utilize Large Language Models (LLMs) as knowledge bases to enhance the given phrases and resolve ambiguity related to the target word. We also study VWSD as a unimodal problem by converting to text-to-text and image-to-image retrieval, as well as question-answering (QA), to fully explore the capabilities of relevant models. To tap into the implicit knowledge of LLMs, we experiment with Chain-of-Thought (CoT) prompting to guide explainable answer generation. On top of all, we train a learn to rank (LTR) model in order to combine our different modules, achieving competitive ranking results. Extensive experiments on VWSD demonstrate valuable insights to effectively drive future directions.", "keywords": "visual word sense disambiguation;large language models;multimodal retrieval", "primary_area": "", "supplementary_material": "", "author": "Anastasia Kritharoula;Maria Lymperaiou;Giorgos Stamou", "authorids": "~Anastasia_Kritharoula1;~Maria_Lymperaiou1;~Giorgos_Stamou1", "gender": "F;F;M", "homepage": ";https://www.ails.ece.ntua.gr/people/marialymp;https://www.ece.ntua.gr/en/staff/174", "dblp": ";329/4552;s/GBStamou", "google_scholar": ";YNikyhIAAAAJ;https://scholar.google.gr/citations?user=R3y5dxMAAAAJ", "or_profile": "~Anastasia_Kritharoula1;~Maria_Lymperaiou1;~Giorgos_B._Stamou1", "aff": "National Technical University of Athens;National Technical University of Athens;National Technical University of Athens", "aff_domain": "ntua.gr;ntua.gr;ntua.gr", "position": "Undergrad student;PhD student;Full Professor", "bibtex": "@inproceedings{\nkritharoula2023large,\ntitle={Large Language Models and Multimodal Retrieval for Visual Word Sense Disambiguation},\nauthor={Anastasia Kritharoula and Maria Lymperaiou and Giorgos Stamou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=B8mdHlqNfw}\n}", "github": "", "project": "", "reviewers": "xNwU;xQYd;tgVD", "site": "https://openreview.net/forum?id=B8mdHlqNfw", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;3;3", "excitement": "4;4;3", "reproducibility": "5;4;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-9442-4186;", "linkedin": "anastasia-kritharoula-b1229b230;maria-lymperaiou-55a5b964/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "National Technical University of Athens", "aff_unique_dep": "", "aff_unique_url": "https://www.ntua.gr", "aff_unique_abbr": "NTUA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Greece" }, { "id": "BAA4209PGJ", "title": "Set Learning for Generative Information Extraction", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Recent efforts have endeavored to employ the sequence-to-sequence (Seq2Seq) model in Information Extraction~(IE) due to its potential to tackle multiple IE tasks in a unified manner.\nUnder this formalization, multiple structured objects are concatenated as the target sequence in a predefined order.\nHowever, structured objects, by their nature, constitute an unordered set. Consequently, this formalization introduces a potential order bias, which can impair model learning.\nTargeting this issue, this paper proposes a set learning approach that considers multiple permutations of structured objects to optimize set probability approximately. Notably, our approach does not require any modifications to model structures, making it easily integrated into existing generative IE frameworks. Experiments show that our method consistently improves existing frameworks on vast tasks and datasets.", "keywords": "Information Extraction", "primary_area": "", "supplementary_material": "", "author": "Jiangnan Li;Yice Zhang;Bin Liang;Kam-Fai Wong;Ruifeng Xu", "authorids": "~Jiangnan_Li1;~Yice_Zhang1;~Bin_Liang6;~Kam-Fai_Wong2;~Ruifeng_Xu1", "gender": "M;M;M;M;M", "homepage": ";https://binliang-nlp.github.io/;http://www.se.cuhk.edu.hk/~kfwong;http://faculty.hitsz.edu.cn/xuruifeng;", "dblp": "225/4508;71/6053-4;w/KamFaiWong;93/5407-1;", "google_scholar": "a4akjpYAAAAJ;djpQeLEAAAAJ;;mObXnNIAAAAJ;ux6PjDQAAAAJ", "or_profile": "~Yice_Zhang1;~Bin_Liang6;~Kam-Fai_Wong2;~Ruifeng_Xu1;~Li_jiangnan2", "aff": "Harbin Institute of Technology;The Chinese University of Hong Kong;The Chinese University of Hong Kong;Harbin Institute of Technology;Harbin Institute of Technology(Shenzhen)", "aff_domain": "hit.edu.cn;cuhk.edu.hk;cuhk.edu.hk;hit.edu.cn;hit.edu.cn", "position": "PhD student;Postdoc;Full Professor;Full Professor;MS student", "bibtex": "@inproceedings{\nli2023set,\ntitle={Set Learning for Generative Information Extraction},\nauthor={Jiangnan Li and Yice Zhang and Bin Liang and Kam-Fai Wong and Ruifeng Xu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=BAA4209PGJ}\n}", "github": "", "project": "", "reviewers": "wMdM;rpsn;sCUS", "site": "https://openreview.net/forum?id=BAA4209PGJ", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;4;3", "reproducibility": "5;4;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-7234-1347;0000-0002-9427-5659;0000-0002-4009-5679;", "linkedin": ";;;;", "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "Harbin Institute of Technology;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "http://www.hit.edu.cn/;https://www.cuhk.edu.hk", "aff_unique_abbr": "HIT;CUHK", "aff_campus_unique_index": "0;1;1;0;2", "aff_campus_unique": "Harbin;Hong Kong SAR;Shenzhen", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "BB1qrcPgRu", "title": "Survival of the Most Influential Prompts: Efficient Black-Box Prompt Search via Clustering and Pruning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Prompt-based learning has been an effective paradigm for large pretrained language models (LLM), enabling few-shot or even zero-shot learning. Black-box prompt search has received growing interest recently for its distinctive properties of gradient-free optimization, proven particularly useful and powerful for model-as-a-service usage. However, the discrete nature and the complexity of combinatorial optimization hinder the efficiency of modern black-box approaches. Despite extensive research on search algorithms, the crucial aspect of search space design and optimization has been largely overlooked. In this paper, we first conduct a sensitivity analysis by prompting LLM, revealing that only a small number of tokens exert a disproportionate amount of influence on LLM predictions. Leveraging this insight, we propose the Clustering and Pruning for Efficient Black-box Prompt Search (ClaPS), a simple black-box search method that first clusters and prunes the search space to focus exclusively on influential prompt tokens. By employing even simple search methods within the pruned search space, ClaPS achieves state-of-the-art performance across various tasks and LLMs, surpassing the performance of complex approaches while significantly reducing search costs. Our findings underscore the critical role of search space design and optimization in enhancing both the usefulness and the efficiency of black-box prompt-based learning.", "keywords": "large language models;black-box;discrete prompt learning", "primary_area": "", "supplementary_material": "", "author": "Han Zhou;Xingchen Wan;Ivan Vuli\u0107;Anna Korhonen", "authorids": "~Han_Zhou4;~Xingchen_Wan1;~Ivan_Vuli\u01071;~Anna_Korhonen1", "gender": "M;M;M;", "homepage": "https://hzhou.top;https://xingchen.one;https://sites.google.com/site/ivanvulic/;https://sites.google.com/site/annakorhonen/", "dblp": ";255/7214;77/9768;14/6532", "google_scholar": "7pXfJVgAAAAJ;6KkohssAAAAJ;ZX8js60AAAAJ;https://scholar.google.co.uk/citations?user=SCoVoOYAAAAJ", "or_profile": "~Han_Zhou4;~Xingchen_Wan1;~Ivan_Vuli\u01071;~Anna_Korhonen1", "aff": "Google;University of Oxford;PolyAI Limited;University of Cambridge", "aff_domain": "google.com;robots.ox.ac.uk;poly-ai.com;cam.ac.uk", "position": "Student Researcher;PhD student;Senior Scientist;Professor", "bibtex": "@inproceedings{\nzhou2023survival,\ntitle={Survival of the Most Influential Prompts: Efficient Black-Box Prompt Search via Clustering and Pruning},\nauthor={Han Zhou and Xingchen Wan and Ivan Vuli{\\'c} and Anna Korhonen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=BB1qrcPgRu}\n}", "github": "", "project": "", "reviewers": "3YDq;4AgN;a6rh", "site": "https://openreview.net/forum?id=BB1qrcPgRu", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "2;3;3", "reproducibility": "3;4;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-3778-4075;0000-0003-0074-0597;;", "linkedin": "hanzhou032;;ivan-vuli%C4%87-286b4a81/;anna-korhonen-534a9b5/", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Google;University of Oxford;PolyAI Limited;University of Cambridge", "aff_unique_dep": "Google;;;", "aff_unique_url": "https://www.google.com;https://www.ox.ac.uk;https://www.poly.ai;https://www.cam.ac.uk", "aff_unique_abbr": "Google;Oxford;PolyAI;Cambridge", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Mountain View;;Cambridge", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "BEFiYM5Vtx", "title": "Multi-Task Knowledge Distillation with Embedding Constraints for Scholarly Keyphrase Boundary Classification", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The task of scholarly keyphrase boundary classification aims at identifying keyphrases from scientific papers and classifying them with their types from a set of predefined classes (e.g., task, process, or material). Despite the importance of keyphrases and their types in many downstream applications including indexing, searching, and question answering over scientific documents, scholarly keyphrase boundary classification is still an under-explored task. In this work, we propose a novel embedding constraint on multi-task knowledge distillation which enforces the teachers (single-task models) and the student (multi-task model) similarity in the embedding space. Specifically, we enforce that the student model is trained not only to imitate the teachers\u2019 output distribution over classes, but also to produce language representations that are similar to those produced by the teachers. Our results show that the proposed approach outperforms previous works and strong baselines on three datasets of scientific documents.", "keywords": "keyphrase boundary classification;multi-task learning;knowledge distillation", "primary_area": "", "supplementary_material": "", "author": "Seo Yeon Park;Cornelia Caragea", "authorids": "~Seo_Yeon_Park1;~Cornelia_Caragea2", "gender": ";", "homepage": "https://seoyeon-p.github.io/;https://www.cs.uic.edu/~cornelia/", "dblp": "259/0813;69/6680.html", "google_scholar": "V6uvvo8AAAAJ;vkX6VV4AAAAJ", "or_profile": "~Seo_Yeon_Park1;~Cornelia_Caragea2", "aff": "University of Illinois at Chicago;University of Illinois at Chicago", "aff_domain": "uic.edu;uic.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\npark2023multitask,\ntitle={Multi-Task Knowledge Distillation with Embedding Constraints for Scholarly Keyphrase Boundary Classification},\nauthor={Seo Yeon Park and Cornelia Caragea},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=BEFiYM5Vtx}\n}", "github": "", "project": "", "reviewers": "evsV;bBkg;7E6J", "site": "https://openreview.net/forum?id=BEFiYM5Vtx", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;5;3", "excitement": "4;4;4", "reproducibility": "3;3;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "University of Illinois at Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.uic.edu", "aff_unique_abbr": "UIC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Chicago", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "BFbdO9GwTZ", "title": "Generative Adversarial Training with Perturbed Token Detection for Model Robustness", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Adversarial training is the dominant strategy towards model robustness. Current adversarial training methods typically apply perturbations to embedding representations, whereas actual text-based attacks introduce perturbations as discrete tokens. Thus there exists a gap between the continuous embedding representations and discrete text tokens that hampers the effectiveness of adversarial training. Moreover, the continuous representations of perturbations cannot be further utilized, resulting in the suboptimal performance. To bridge this gap for adversarial robustness, in this paper, we devise a novel generative adversarial training framework that integrates gradient-based learning, adversarial example generation and perturbed token detection. Our proposed framework consists of generative adversarial attack and adversarial training process. Specifically, in generative adversarial attack, the embeddings are shared between the classifier and the generative model, which enables the generative model to leverage the gradients from the classifier for generating perturbed tokens. Then, adversarial training process combines adversarial regularization with perturbed token detection to provide token-level supervision and improve the efficiency of sample utilization. Extensive experiments on five datasets from the AdvGLUE benchmark demonstrate that our framework significantly enhances the model robustness, surpassing the state-of-the-art results of ChatGPT by 10% in average accuracy.", "keywords": "generative adversarial training;adversarial defense;adversarial detection;discriminative pre-trained model", "primary_area": "", "supplementary_material": "", "author": "Jiahao Zhao;Wenji Mao", "authorids": "~Jiahao_Zhao1;~Wenji_Mao1", "gender": "M;F", "homepage": "https://github.com/Opdoop;", "dblp": ";16/2159.html", "google_scholar": "WYbijGsAAAAJ;h6m4X_AAAAAJ", "or_profile": "~Jiahao_Zhao1;~Wenji_Mao1", "aff": "Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;ia.ac.cn", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nzhao2023generative,\ntitle={Generative Adversarial Training with Perturbed Token Detection for Model Robustness},\nauthor={Jiahao Zhao and Wenji Mao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=BFbdO9GwTZ}\n}", "github": "", "project": "", "reviewers": "1YBd;LmDX;WUQd", "site": "https://openreview.net/forum?id=BFbdO9GwTZ", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;4;3", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 5, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation", "aff_unique_url": "http://www.ia.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "BGsssE3E4i", "title": "Efficient Data Learning for Open Information Extraction with Pre-trained Language Models", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Open Information Extraction (OpenIE) is a fundamental yet challenging task in Natural Language Processing, which involves extracting all triples (subject, predicate, object) from a given sentence. While labelling-based methods have their merits, generation-based techniques offer unique advantages, such as the ability to generate tokens not present in the original sentence. However, these generation-based methods often require a significant amount of training data to learn the task form of OpenIE and substantial training time to overcome slow model convergence due to the order penalty. In this paper, we introduce a novel framework, OK-IE, that ingeniously transforms the task form of OpenIE into the pre-training task form of the T5 model, thereby reducing the need for extensive training data. Furthermore, we introduce an innovative concept of 'anchors' to control the sequence of model outputs, effectively eliminating the impact of order penalty on model convergence and significantly reducing training time. Experimental results indicate that, compared to previous SOTA methods, OK-IE requires only 1/100 of the training data (900 instances) and 1/120 of the training time (3 minutes) to achieve comparable results.", "keywords": "information extraction;efficient;low-resource", "primary_area": "", "supplementary_material": "", "author": "Zhiyuan Fan;Shizhu He", "authorids": "~Zhiyuan_Fan2;~Shizhu_He2", "gender": "M;M", "homepage": "https://zhiyuan.fan;https://heshizhu.github.io/", "dblp": "210/1532;136/8650", "google_scholar": ";zBPIt3QAAAAJ", "or_profile": "~Zhiyuan_Fan2;~Shizhu_He2", "aff": "Tianjin University;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "tju.edu.cn;ia.ac.cn", "position": "Undergrad student;Associate Researcher", "bibtex": "@inproceedings{\nfan2023efficient,\ntitle={Efficient Data Learning for Open Information Extraction with Pre-trained Language Models},\nauthor={Zhiyuan Fan and Shizhu He},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=BGsssE3E4i}\n}", "github": "", "project": "", "reviewers": "DMKQ;8h8N;iPLc;WhNg", "site": "https://openreview.net/forum?id=BGsssE3E4i", "pdf_size": 0, "rating": "2;2;2;2", "confidence": "4;3;2;3", "excitement": "4;4;1;3", "reproducibility": "5;4;2;3", "correctness": "4;4;1;3", "rating_avg": 2.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.5, "correctness_avg": 3.0, "replies_avg": 13, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;1", "aff_unique_norm": "Tianjin University;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Automation", "aff_unique_url": "http://www.tju.edu.cn;http://www.ia.cas.cn", "aff_unique_abbr": "TJU;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "BKchhwhNh3", "title": "Roles of Scaling and Instruction Tuning in Language Perception: Model vs. Human Attention", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recent large language models (LLMs) have revealed strong abilities to understand natural language. Since most of them share the same basic structure, i.e. the transformer block, possible contributors to their success in the training process are scaling and instruction tuning. However, how these factors affect the models' language perception is unclear. This work compares the self-attention of several existing LLMs (LLaMA, Alpaca and Vicuna) in different sizes (7B, 13B, 30B, 65B), together with eye saccade, an aspect of human reading attention, to assess the effect of scaling and instruction tuning on language perception. Results show that scaling enhances the human resemblance and improves the effective attention by reducing the trivial pattern reliance, while instruction tuning does not. However, instruction tuning significantly enhances the models' sensitivity to instructions. We also find that current LLMs are consistently closer to non-native than native speakers in attention, suggesting a sub-optimal language perception of all models. Our code and data used in the analysis is available on GitHub.", "keywords": "Large Language Models;Attention;Human Resemblance;Instruction Tuning", "primary_area": "", "supplementary_material": "", "author": "Changjiang Gao;Shujian Huang;Jixing Li;Jiajun Chen", "authorids": "~Changjiang_Gao1;~Shujian_Huang1;~Jixing_Li1;~Jiajun_Chen1", "gender": "M;M;F;M", "homepage": ";http://nlp.nju.edu.cn/huangsj/;https://compneurolinglab.github.io/;https://cs.nju.edu.cn/chenjiajun/index_en.htm", "dblp": "304/9448;57/8451;;", "google_scholar": "MrAKVxoAAAAJ;HF3-E9kAAAAJ;T5EokYYAAAAJ;https://scholar.google.com.tw/citations?user=WIF7VaoAAAAJ", "or_profile": "~Changjiang_Gao1;~Shujian_Huang1;~Jixing_Li1;~Jiajun_Chen1", "aff": "Nanjing University;Nanjing University;City University of Hong Kong;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;cityu.edu.hk;nju.edu.cn", "position": "PhD student;Associate Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ngao2023roles,\ntitle={Roles of Scaling and Instruction Tuning in Language Perception: Model vs. Human Attention},\nauthor={Changjiang Gao and Shujian Huang and Jixing Li and Jiajun Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=BKchhwhNh3}\n}", "github": "", "project": "", "reviewers": "GxRX;dzum;pjsV", "site": "https://openreview.net/forum?id=BKchhwhNh3", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;5;3", "excitement": "2;2;4", "reproducibility": "3;3;3", "correctness": "2;2;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 2.6666666666666665, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0001-0763-8854;;;", "linkedin": ";;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Nanjing University;City University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.nju.edu.cn;https://www.cityu.edu.hk", "aff_unique_abbr": "Nanjing U;CityU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "BMIjPXooNq", "title": "Harnessing Dataset Cartography for Improved Compositional Generalization in Transformers", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Neural networks have revolutionized language modeling and excelled in various downstream tasks. However, the extent to which these models achieve compositional generalization comparable to human cognitive abilities remains a topic of debate. While existing approaches in the field have mainly focused on novel architectures and alternative learning paradigms, we introduce a pioneering method harnessing the power of dataset cartography (Swayamdipta et al., 2020). By strategically identifying a subset of compositional generalization data using this approach, we achieve a remarkable improvement in model accuracy, yielding enhancements of up to 10% on CFQ and COGS datasets. Notably, our technique incorporates dataset cartography as a curriculum learning criterion, eliminating the need for hyperparameter tuning while consistently achieving superior performance. Our findings highlight the untapped potential of dataset cartography in unleashing the full capabilities of compositional generalization within Transformer models.", "keywords": "dataset cartography;compositional generalization;training dynamics", "primary_area": "", "supplementary_material": "", "author": "Osman Batur \u0130nce;Tanin Zeraati;Semih Yagcioglu;Yadollah Yaghoobzadeh;Erkut Erdem;Aykut Erdem", "authorids": "~Osman_Batur_\u0130nce1;~Tanin_Zeraati1;~Semih_Yagcioglu1;~Yadollah_Yaghoobzadeh2;~Erkut_Erdem1;~Aykut_Erdem1", "gender": "M;F;M;M;M;M", "homepage": "https://ospanbatyr.github.io;;;https://github.com/google/BIG-bench;https://web.cs.hacettepe.edu.tr/~erkut;https://aykuterdem.github.io", "dblp": "359/3145;;159/1196;127/0138;79/6569;04/1832", "google_scholar": "3fKgzG4AAAAJ;https://scholar.google.com/citations?hl=en;;TvGqaqAAAAAJ;https://scholar.google.com.tr/citations?user=eALwl74AAAAJ;-xA1_OAAAAAJ", "or_profile": "~Osman_Batur_\u0130nce1;~Tanin_Zeraati1;~Semih_Yagcioglu1;~Yadollah_Yaghoobzadeh2;~Erkut_Erdem1;~Aykut_Erdem1", "aff": "Ko\u00e7 University;University of Tehran, University of Tehran;Hacettepe University;University of Tehran;Hacettepe University;Ko\u00e7 University", "aff_domain": "ku.edu.tr;ut.ac.ir;hacettepe.edu.tr;ut.ac.ir;hacettepe.edu.tr;ku.edu.tr", "position": "MS student;Undergrad student;PhD student;Assistant Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\ni{\\ensuremath{\\dot{}}}nce2023harnessing,\ntitle={Harnessing Dataset Cartography for Improved Compositional Generalization in Transformers},\nauthor={Osman Batur {\\.I}nce and Tanin Zeraati and Semih Yagcioglu and Yadollah Yaghoobzadeh and Erkut Erdem and Aykut Erdem},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=BMIjPXooNq}\n}", "github": "", "project": "", "reviewers": "UCsN;xSzh;bjGd;1Pgd", "site": "https://openreview.net/forum?id=BMIjPXooNq", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "2;3;4;4", "excitement": "4;3;3;3", "reproducibility": "3;3;4;4", "correctness": "4;2;4;4", "rating_avg": 3.0, "confidence_avg": 3.25, "excitement_avg": 3.25, "reproducibility_avg": 3.5, "correctness_avg": 3.5, "replies_avg": 13, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0008-9538-2481;;;0000-0003-0646-0852;;0000-0002-6280-8422", "linkedin": "osmanbatur/;tanin-zeraati-9a2a80196/;;;;", "aff_unique_index": "0;1;2;1;2;0", "aff_unique_norm": "Ko\u00e7 University;University of Tehran;Hacettepe University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ku.edu.tr;https://ut.ac.ir;https://www.hacettepe.edu.tr", "aff_unique_abbr": "Ko\u00e7;UT;Hacettepe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;0;0", "aff_country_unique": "T\u00fcrkiye;Iran" }, { "id": "BNcTB8RZfG", "title": "Explicit Alignment and Many-to-many Entailment Based Reasoning for Conversational Machine Reading", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Conversational Machine Reading (CMR) requires answering a user's initial question through multi-turn dialogue interactions based on a given document. Although there exist many effective methods, they largely neglected the alignment between the $\\textit{document}$ and the $\\textit{user-provided information}$, which significantly affects the intermediate decision-making and subsequent follow-up question generation. To address this issue, we propose a pipeline framework that (1) aligns the aforementioned two sides in an explicit way, (2) makes decisions using a lightweight many-to-many entailment reasoning module, and (3) directly generates follow-up questions based on the document and previously asked questions. Our proposed method achieves state-of-the-art in micro-accuracy and ranks the first place on the public leaderboard of the CMR benchmark dataset ShARC.", "keywords": "Conversational Machine Reading;Task-oriented dialogue;Textual Entailment", "primary_area": "", "supplementary_material": "", "author": "Yangyang Luo;Shiyu Tian;Caixia Yuan;Xiaojie Wang", "authorids": "~Yangyang_Luo1;~Shiyu_Tian1;~Caixia_Yuan1;~Xiaojie_Wang1", "gender": "M;M;F;M", "homepage": ";;https://teacher.bupt.edu.cn/yuancaixia/en/index.htm;", "dblp": ";302/1805;69/9013;99/7033-6", "google_scholar": ";;;", "or_profile": "~Yangyang_Luo1;~Shiyu_Tian1;~Caixia_Yuan1;~Xiaojie_Wang1", "aff": "Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Post and Telecommunication", "aff_domain": "bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn", "position": "MS student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nluo2023explicit,\ntitle={Explicit Alignment and Many-to-many Entailment Based Reasoning for Conversational Machine Reading},\nauthor={Yangyang Luo and Shiyu Tian and Caixia Yuan and Xiaojie Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=BNcTB8RZfG}\n}", "github": "", "project": "", "reviewers": "1skF;HUMs;QzcE;AKtE;xtUY;DMCN", "site": "https://openreview.net/forum?id=BNcTB8RZfG", "pdf_size": 0, "rating": "3;3;3;3;3;3", "confidence": "5;3;4;2;4;3", "excitement": "2;3;4;3;3;2", "reproducibility": "4;3;4;4;3;3", "correctness": "3;3;4;4;3;3", "rating_avg": 3.0, "confidence_avg": 3.5, "excitement_avg": 2.8333333333333335, "reproducibility_avg": 3.5, "correctness_avg": 3.3333333333333335, "replies_avg": 19, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-0854-9631;;", "linkedin": "https://www.linkedin.cn/in/%E7%BD%97-%E6%B4%8B%E6%B4%8B-b356631a5;;;xiaojie-wang-4b732936/", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications", "aff_unique_dep": "", "aff_unique_url": "http://www.bupt.edu.cn/", "aff_unique_abbr": "BUPT", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Beijing", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "BSApuhuM87", "title": "Beware of Model Collapse! Fast and Stable Test-time Adaptation for Robust Question Answering", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Although pre-trained language models (PLM) have achieved great success in question answering (QA), their robustness is still insufficient to support their practical applications, especially in the face of distribution shifts.\nRecently, test-time adaptation (TTA) has shown great potential for solving this problem, which adapts the model to fit the test samples at test time. \nHowever, TTA sometimes causes model collapse, making almost all the model outputs incorrect, which has raised concerns about its stability and reliability.\nIn this paper, we delve into why TTA causes model collapse and find that the imbalanced label distribution inherent in QA is the reason for it.\nTo address this problem, we propose Anti-Collapse Fast test-time adaptation (Anti-CF), which utilizes the source model\u2018s output to regularize the update of the adapted model during test time.\nWe further design an efficient side block to reduce its inference time.\nExtensive experiments on various distribution shift scenarios and pre-trained language models (e.g., XLM-RoBERTa, BLOOM) demonstrate that our method can achieve comparable or better results than previous TTA methods at a speed close to vanilla forward propagation, which is 1.8\u00d7 to 4.4\u00d7 speedup compared to previous TTA methods.", "keywords": "Question Answering;Roboustness;Test-time adaptation", "primary_area": "", "supplementary_material": "", "author": "Yi Su;Yixin Ji;Juntao Li;Hai Ye;Min Zhang", "authorids": "~Yi_Su3;~Yixin_Ji2;~Juntao_Li2;~Hai_Ye2;~Min_Zhang9", "gender": "M;M;M;M;M", "homepage": "http://yisunlp.github.io;https://github.com/Dereck0602;https://lijuntaopku.github.io/;;https://zhangmin-nlp-ai.github.io/", "dblp": ";;;190/;83/5342-5", "google_scholar": "YAvMAQwAAAAJ;I3UQhtIAAAAJ;sZSygsYAAAAJ;_dQWEzEAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Yi_Su3;~Yixin_Ji2;~Juntao_Li2;~Hai_Ye2;~Min_Zhang9", "aff": "Suzhou University;Soochow University;Soochow University, China;National University of Singapore;Harbin Institute of Technology, Shenzhen", "aff_domain": "suda.edu.cn;suda.edu.cn;suda.edu.cn;nus.edu.sg;hit.edu.cn", "position": "Undergrad student;PhD student;Associate Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nsu2023beware,\ntitle={Beware of Model Collapse! Fast and Stable Test-time Adaptation for Robust Question Answering},\nauthor={Yi Su and Yixin Ji and Juntao Li and Hai Ye and Min Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=BSApuhuM87}\n}", "github": "", "project": "", "reviewers": "76Fj;CXeg;kMrG", "site": "https://openreview.net/forum?id=BSApuhuM87", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "2;4;4", "reproducibility": "3;4;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0005-3485-405X;;0000-0002-6286-7529;;", "linkedin": ";;;;", "aff_unique_index": "0;1;1;2;3", "aff_unique_norm": "Suzhou University;Soochow University;National University of Singapore;Harbin Institute of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.suda.edu.cn;https://www.soochow.edu.cn;https://www.nus.edu.sg;http://en.hhit.edu.cn/", "aff_unique_abbr": "Suda;Soochow U;NUS;HIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;Singapore" }, { "id": "BViIHjzvoY", "title": "Data-efficient Active Learning for Structured Prediction with Partial Annotation and Self-Training", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In this work we propose a pragmatic method that reduces the annotation cost for structured label spaces using active learning. Our approach leverages partial annotation, which reduces labeling costs for structured outputs by selecting only the most informative sub-structures for annotation. We also utilize self-training to incorporate the current model\u2019s automatic predictions as pseudo-labels for un-annotated sub-structures. A key challenge in effectively combining partial annotation with self-training to reduce annotation cost is determining which sub-structures to select to label. To address this challenge, we adopt an error estimator to adaptively decide the partial selection ratio according to the current model\u2019s capability. In evaluations spanning four structured prediction tasks, we show that our combination of partial annotation and self-training using an adaptive selection ratio reduces annotation cost over strong full annotation baselines under a fair comparison scheme that takes reading time into consideration.", "keywords": "Active Learning;Structured Prediction;Partial Annotation;Self-training", "primary_area": "", "supplementary_material": "", "author": "Zhisong Zhang;Emma Strubell;Eduard H Hovy", "authorids": "~Zhisong_Zhang1;~Emma_Strubell1;~Eduard_H_Hovy1", "gender": "M;Non-Binary;M", "homepage": "https://zzsfornlp.github.io/;http://strubell.github.io;http://www.cs.cmu.edu/~hovy", "dblp": "174/7415;153/2253;47/2454", "google_scholar": "373vlUEAAAAJ;UCDMtM0AAAAJ;https://scholar.google.com.tw/citations?user=PUFxrroAAAAJ", "or_profile": "~Zhisong_Zhang1;~Emma_Strubell1;~Eduard_H_Hovy1", "aff": "School of Computer Science, Carnegie Mellon University;Allen Institute for Artificial Intelligence;Carnegie Mellon University", "aff_domain": "cs.cmu.edu;allenai.org;cmu.edu", "position": "PhD student;Visiting Researcher;Adjunct Professor", "bibtex": "@inproceedings{\nzhang2023dataefficient,\ntitle={Data-efficient Active Learning for Structured Prediction with Partial Annotation and Self-Training},\nauthor={Zhisong Zhang and Emma Strubell and Eduard H Hovy},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=BViIHjzvoY}\n}", "github": "", "project": "", "reviewers": "GDw5;gpGM;8arV", "site": "https://openreview.net/forum?id=BViIHjzvoY", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "4;3;3", "reproducibility": "4;4;4", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Carnegie Mellon University;Allen Institute for Artificial Intelligence", "aff_unique_dep": "School of Computer Science;", "aff_unique_url": "https://www.cmu.edu;https://allenai.org", "aff_unique_abbr": "CMU;AI2", "aff_campus_unique_index": "0", "aff_campus_unique": "Pittsburgh;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "BYkD1gjbxm", "title": "Optimized Tokenization for Transcribed Error Correction", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The challenges facing speech recognition systems, such as variations in pronunciations, adverse audio conditions, and the scarcity of labeled data, emphasize the necessity for a post-processing step that corrects recurring errors. Previous research has shown the advantages of employing dedicated error correction models, yet training such models requires large amounts of labeled data which is not easily obtained. To overcome this limitation, synthetic transcribed-like data is often utilized, however, bridging the distribution gap between transcribed errors and synthetic noise is not trivial.\nIn this paper, we demonstrate that the performance of correction models can be significantly increased by training solely using synthetic data. \nSpecifically, we empirically show that: (1) synthetic data generated using the error distribution derived from a set of transcribed data outperforms the common approach of applying random perturbations; (2) applying language-specific adjustments to the vocabulary of a BPE tokenizer strike a balance between adapting to unseen distributions and retaining knowledge of transcribed errors.\nWe showcase the benefits of these key observations, and evaluate our approach using multiple languages, speech recognition systems and prominent speech recognition datasets.", "keywords": "NLP;Speech Recognition;Error Correction", "primary_area": "", "supplementary_material": "", "author": "Tomer Wullach;Shlomo Chazan", "authorids": "~Tomer_Wullach1;~Shlomo_Chazan1", "gender": "M;M", "homepage": "https://scholar.google.com/citations?user=DGNBLokAAAAJ&hl=iw&oi=ao;https://shlomichazan.wix.com/personal", "dblp": ";170/0036", "google_scholar": ";https://scholar.google.co.il/citations?user=I3oxpKUAAAAJ", "or_profile": "~Tomer_Wullach1;~Shlomo_Chazan1", "aff": ";", "aff_domain": ";", "position": ";", "bibtex": "@inproceedings{\nwullach2023optimized,\ntitle={Optimized Tokenization for Transcribed Error Correction},\nauthor={Tomer Wullach and Shlomo Chazan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=BYkD1gjbxm}\n}", "github": "", "project": "", "reviewers": "z7J7;UNKv;AQUj", "site": "https://openreview.net/forum?id=BYkD1gjbxm", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;3;4", "excitement": "4;3;4", "reproducibility": "4;2;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-9158-9070;", "linkedin": ";shlomke-chazan" }, { "id": "BYxHeGsiay", "title": "From Words to Wires: Generating Functioning Electronic Devices from Natural Language Descriptions", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In this work, we show that contemporary language models have a previously unknown skill -- the capacity for electronic circuit design from high-level textual descriptions, akin to code generation. We introduce two benchmarks: PINS100, assessing model knowledge of electrical components, and MICRO25, evaluating a model's capability to design common microcontroller circuits and code in the Arduino ecosystem that involve input, output, sensors, motors, protocols, and logic -- with models such as GPT-4 and Claude-V1 achieving between 60% to 96% Pass@1 on generating full devices. We include six case studies of using language models as a design assistant for moderately complex devices, such as a radiation-powered random number generator, an emoji keyboard, a visible spectrometer, and several assistive devices, while offering a qualitative analysis performance, outlining evaluation challenges, and suggesting areas of development to improve complex circuit design and practical utility. With this work, we aim to spur research at the juncture of natural language processing and electronic design.", "keywords": "applications;code generation;electronics;language models", "primary_area": "", "supplementary_material": "", "author": "Peter Jansen", "authorids": "~Peter_Jansen1", "gender": "", "homepage": "http://www.cognitiveai.org", "dblp": "72/5962", "google_scholar": "wc1Hbl8AAAAJ", "or_profile": "~Peter_Jansen1", "aff": "University of Arizona", "aff_domain": "arizona.edu", "position": "Assistant Professor", "bibtex": "@inproceedings{\njansen2023from,\ntitle={From Words to Wires: Generating Functioning Electronic Devices from Natural Language Descriptions},\nauthor={Peter Jansen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=BYxHeGsiay}\n}", "github": "", "project": "", "reviewers": "zUFP;8oFb;bhLT;P9mH", "site": "https://openreview.net/forum?id=BYxHeGsiay", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;3;3", "excitement": "3;2;3;3", "reproducibility": "2;1;3;2", "correctness": "3;1;2;3", "rating_avg": 3.0, "confidence_avg": 3.5, "excitement_avg": 2.75, "reproducibility_avg": 2.0, "correctness_avg": 2.25, "replies_avg": 14, "authors#_avg": 1, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "", "linkedin": "", "aff_unique_index": "0", "aff_unique_norm": "University of Arizona", "aff_unique_dep": "", "aff_unique_url": "https://www.arizona.edu", "aff_unique_abbr": "UA", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "BacLV3QUi8", "title": "AniEE: A Dataset of Animal Experimental Literature for Event Extraction", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Event extraction (EE), as a crucial information extraction (IE) task, aims to identify event triggers and their associated arguments from unstructured text, subsequently classifying them into pre-defined types and roles. In the biomedical domain, EE is widely used to extract complex structures representing biological events from literature. Due to the complicated semantics and specialized domain knowledge, it is challenging to construct biomedical event extraction datasets. Additionally, most existing biomedical EE datasets primarily focus on cell experiments or the overall experimental procedures. Therefore, we introduce AniEE, an event extraction dataset concentrated on the animal experiment stage. We establish a novel animal experiment customized entity and event scheme in collaboration with domain experts. We then create an expert-annotated high-quality dataset containing discontinuous entities and nested events and evaluate our dataset on the recent outstanding NER and EE models.", "keywords": "Information Extraction;Event Extraction;Named Entity Recognition;Biomedical Corpus;Scientific Literature;Animal Experiments", "primary_area": "", "supplementary_material": "", "author": "Dohee Kim;Ra Yoo;Soyoung Yang;Hee Yang;Jaegul Choo", "authorids": "~Dohee_Kim1;~Ra_Yoo1;~Soyoung_Yang1;~Hee_Yang1;~Jaegul_Choo1", "gender": "F;;F;F;M", "homepage": ";;;http://fn.kookmin.ac.kr/;https://sites.google.com/site/jaegulchoo/", "dblp": ";;239/8032;;07/2074", "google_scholar": "https://scholar.google.com/citations?hl=en;;5Mw3sVAAAAAJ;;GHJYsLEAAAAJ", "or_profile": "~Dohee_Kim1;~Ra_Yoo1;~Soyoung_Yang1;~Hee_Yang1;~Jaegul_Choo1", "aff": "Korea Advanced Institute of Science & Technology;Advanced Institute of Convergence Technology;SAIT;Kookmin University;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;snu.ac.kr;samsung.co.kr;kookmin.ac.kr;kaist.ac.kr", "position": "MS student;Postdoc;Intern;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nkim2023aniee,\ntitle={Ani{EE}: A Dataset of Animal Experimental Literature for Event Extraction},\nauthor={Dohee Kim and Ra Yoo and Soyoung Yang and Hee Yang and Jaegul Choo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=BacLV3QUi8}\n}", "github": "", "project": "", "reviewers": "7ePo;tSdi;zNXM", "site": "https://openreview.net/forum?id=BacLV3QUi8", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;5;3", "excitement": "4;3;3", "reproducibility": "4;3;2", "correctness": "4;3;2", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-0457-5496;;;", "linkedin": ";;soyoung-yang-b96032166/;;", "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Advanced Institute of Convergence Technology;Southern Alberta Institute of Technology;Kookmin University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.kaist.ac.kr;http://www.ict.org.kr;https://www.sait.ca;https://www.kookmin.ac.kr", "aff_unique_abbr": "KAIST;;SAIT;KMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "South Korea;Canada" }, { "id": "BcYvkVgkZy", "title": "On Event Individuation for Document-Level Information Extraction", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "As information extraction (IE) systems have grown more adept at processing whole documents, the classic task of *template filling* has seen renewed interest as a benchmark for document-level IE. In this position paper, we call into question the suitability of template filling for this purpose. We argue that the task demands definitive answers to thorny questions of *event individuation* \u2014 the problem of distinguishing distinct events \u2014 about which even human experts disagree. Through an annotation study and error analysis, we show that this raises concerns about the usefulness of template filling metrics, the quality of datasets for the task, and the ability of models to learn it. Finally, we consider possible solutions.", "keywords": "Information Extraction;Template Filling;Events;Reproducibility", "primary_area": "", "supplementary_material": "", "author": "William Gantt;Reno Kriz;Yunmo Chen;Siddharth Vashishtha;Aaron Steven White", "authorids": "~William_Gantt1;~Reno_Kriz1;~Yunmo_Chen1;~Siddharth_Vashishtha1;~Aaron_Steven_White1", "gender": "M;M;M;M;M", "homepage": "https://wgantt.github.io/;;https://omnuy.me;https://sidsvash26.github.io/;http://aaronstevenwhite.io", "dblp": "277/0935;220/2001;252/7831;236/4588;188/5734", "google_scholar": "SpOIH2MAAAAJ;XXjftl4AAAAJ;V-g2Tx8AAAAJ;4Q4zhC0AAAAJ;R-ZVWNEAAAAJ", "or_profile": "~William_Gantt1;~Reno_Kriz1;~Yunmo_Chen1;~Siddharth_Vashishtha1;~Aaron_Steven_White1", "aff": "University of Rochester;Johns Hopkins University;Johns Hopkins University;University of Rochester;University of Rochester", "aff_domain": "cs.rochester.edu;jhu.edu;jhu.edu;rochester.edu;rochester.edu", "position": "PhD student;Researcher;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ngantt2023on,\ntitle={On Event Individuation for Document-Level Information Extraction},\nauthor={William Gantt and Reno Kriz and Yunmo Chen and Siddharth Vashishtha and Aaron Steven White},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=BcYvkVgkZy}\n}", "github": "", "project": "", "reviewers": "vPzg;HbG3;yycY", "site": "https://openreview.net/forum?id=BcYvkVgkZy", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;2;3", "excitement": "4;4;4", "reproducibility": "5;4;0", "correctness": "4;3;4", "rating_avg": 3.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-9931-2861;0000-0002-0239-9989;;;", "linkedin": "will-gantt-9b8994220/;reno-kriz;yunmochen;;aaronstevenwhite/", "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "University of Rochester;Johns Hopkins University", "aff_unique_dep": ";", "aff_unique_url": "https://www.rochester.edu;https://www.jhu.edu", "aff_unique_abbr": "U of R;JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "BdpoEj33DZ", "title": "MailEx: Email Event and Argument Extraction", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In this work, we present the first dataset, MailEx, for performing event extraction from conversational email threads. To this end, we first proposed a new taxonomy covering 10 event types and 76 arguments in the email domain. Our final dataset includes 1.5K email threads and ~4K emails, which are annotated with a total of ~8K event instances. To understand the task challenges, we conducted a series of experiments comparing three types of approaches, i.e., fine-tuned sequence labeling, fine-tuned generative extraction, and few-shot in-context learning. Our results showed that the task of email event extraction is far from being addressed, due to challenges lying in, e.g., extracting non-continuous, shared trigger spans, extracting non-named entity arguments, and modeling the email conversational history. Our work thus suggests more future investigations in this domain-specific event extraction task.", "keywords": "Event Extraction;Email;Information Extraction", "primary_area": "", "supplementary_material": "", "author": "Saurabh Srivastava;Gaurav Singh;Shou Matsumoto;Ali K Raz;Paulo Costa;Joshua Campbell Poore;Ziyu Yao", "authorids": "~Saurabh_Srivastava2;~Gaurav_Singh5;~Shou_Matsumoto1;~Ali_K_Raz1;~Paulo_Costa1;~Joshua_Campbell_Poore1;~Ziyu_Yao1", "gender": "M;M;M;M;M;M;F", "homepage": "https://salokr.github.io/;;https://mason.gmu.edu/~smatsum2/;;http://mason.gmu.edu/~pcosta;;http://ziyuyao.org", "dblp": ";;68/1148;;https://dblp.uni-trier.de/pers/hd/c/Costa:Paulo_Cesar_G=_da;;", "google_scholar": "LuCi6M4AAAAJ;;2ukan3AAAAAJ;dVMdGAQAAAAJ;2vE_NmMAAAAJ;;4lYrMNUAAAAJ", "or_profile": "~Saurabh_Srivastava2;~Gaurav_Singh5;~Shou_Matsumoto1;~Ali_K_Raz1;~Paulo_Costa1;~Joshua_Campbell_Poore1;~Ziyu_Yao1", "aff": "George Mason University;George Mason University;George Mason University;George Mason University;George Mason University;University of Maryland, College Park;George Mason University", "aff_domain": "gmu.edu;gmu.edu;gmu.edu;gmu.edu;gmu.edu;umd.edu;gmu.edu", "position": "PhD student;MS student;Assistant Professor;Assistant Professor;Associate Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nsrivastava2023mailex,\ntitle={MailEx: Email Event and Argument Extraction},\nauthor={Saurabh Srivastava and Gaurav Singh and Shou Matsumoto and Ali K Raz and Paulo Costa and Joshua Campbell Poore and Ziyu Yao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=BdpoEj33DZ}\n}", "github": "", "project": "", "reviewers": "UBTU;LSSd;FtbS;uHNK", "site": "https://openreview.net/forum?id=BdpoEj33DZ", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;4;4;4", "excitement": "4;3;4;3", "reproducibility": "4;4;4;4", "correctness": "4;4;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.5, "reproducibility_avg": 4.0, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-2589-1738;0000-0003-2562-1631;0000-0002-8280-1551;;0009-0007-4571-3505", "linkedin": "saurabh-srivastava-34941560/;gaurav-s-52498963/;shou-m-0a06152b2/;akraz/;paulocesarcosta/;joshua-poore-9a67a3aa;", "aff_unique_index": "0;0;0;0;0;1;0", "aff_unique_norm": "George Mason University;University of Maryland", "aff_unique_dep": ";", "aff_unique_url": "https://www.gmu.edu;https://www/umd.edu", "aff_unique_abbr": "GMU;UMD", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Beho3ly3qx", "title": "COMET-M: Reasoning about Multiple Events in Complex Sentences", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Understanding the speaker\u2019s intended meaning often involves drawing commonsense inferences to reason about what is not stated explicitly. In multi-event sentences, it requires understanding the relationships between events based on contextual knowledge. We propose COMET-M (Multi-Event), an event-centric commonsense model capable of generating commonsense inferences for a target event within a complex sentence. COMET-M builds upon COMET (Bosselut et al., 2019), which excels at generating event-centric inferences for simple sentences, but struggles with the complexity of multi-event sentences prevalent in natural text. To overcome this limitation, we curate a Multi-Event Inference (MEI) dataset of 35K human-written inferences. We train COMET-M on the human-written inferences and also create baselines using automatically labeled examples. Experimental results demonstrate the significant performance improvement of COMET-M over COMET in generating multi-event inferences. Moreover, COMET-M successfully produces distinct inferences for each target event, taking the complete context into consideration. COMET-M holds promise for downstream tasks involving natural text such as coreference resolution, dialogue, and story understanding.", "keywords": "Reasoning about events;Complex contexts;Discourse", "primary_area": "", "supplementary_material": "", "author": "Sahithya Ravi;Raymond T. Ng;Vered Shwartz", "authorids": "~Sahithya_Ravi1;~Raymond_T._Ng1;~Vered_Shwartz1", "gender": "F;M;F", "homepage": "https://sahithyaravi.github.io/;http://www.cs.ubc.ca/~rng;https://www.cs.ubc.ca/~vshwartz/", "dblp": "236/5862;n/RTNg;166/2038", "google_scholar": "mlWmdUsAAAAJ;;bbe4ResAAAAJ", "or_profile": "~Sahithya_Ravi1;~Raymond_T._Ng1;~Vered_Shwartz1", "aff": "University of British Columbia;University of British Columbia;University of British Columbia", "aff_domain": "cs.ubc.ca;cs.ubc.ca;ubc.ca", "position": "PhD student;Professor;Assistant Professor", "bibtex": "@inproceedings{\nravi2023cometm,\ntitle={{COMET}-M: Reasoning about Multiple Events in Complex Sentences},\nauthor={Sahithya Ravi and Raymond T. Ng and Vered Shwartz},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Beho3ly3qx}\n}", "github": "", "project": "", "reviewers": "QbAU;VGTR;2nKM", "site": "https://openreview.net/forum?id=Beho3ly3qx", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "4;4;4", "reproducibility": "4;2;2", "correctness": "3;3;2", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;vered-shwartz-99548633/", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of British Columbia", "aff_unique_dep": "", "aff_unique_url": "https://www.ubc.ca", "aff_unique_abbr": "UBC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "id": "BoKg2pcF0H", "title": "DiffusionSL: Sequence Labeling via Tag Diffusion Process", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Sequence Labeling (SL) is long-standing in Natural Language Processing (NLP). Traditionally, discriminative models have been widely used to capture the conditional distribution of sequence tags, rather than generative models. In this paper, we present DiffusionSL, a framework that utilizes a conditional discrete diffusion model for generating discrete tag data, resulting in a Tag Diffusion Process. We treat the natural language sequence as the conditional signal and the sequence tags as the generation target, iteratively refining the noisy tags to obtain clean ones. To address the discreteness issue, we propose the Bit-Tag Converter (BTConverter) to model the target in continuous data space. Furthermore, we introduce the Bit Diffusion Transformer (BitDiT) to model the process of noise elimination. Leveraging the powerful iterative refinement capability of the diffusion model, DiffusionSL achieves superior performance against previous state-of-the-art (SOTA) baselines and outperforms gpt-3.5-turbo significantly across multiple benchmark datasets and various tasks.", "keywords": "Generative Models;Sequence Labeling;Tag Diffusion Process", "primary_area": "", "supplementary_material": "", "author": "Ziyang Huang;Pengfei Cao;Jun Zhao;Kang Liu", "authorids": "~Ziyang_Huang2;~Pengfei_Cao1;~Jun_Zhao4;~Kang_Liu1", "gender": ";;M;M", "homepage": ";https://cpf-nlpr.github.io/;http://nlpr-web.ia.ac.cn/cip/english/~junzhao/index.html;http://www.nlpr.ia.ac.cn/cip/~liukang/index.html", "dblp": ";182/7941;https://dblp.uni-trier.de/pid/47/2026-1.html;42/4903.html", "google_scholar": ";lP5_LJIAAAAJ;https://scholar.google.com.hk/citations?user=HljRttwAAAAJ;DtZCfl0AAAAJ", "or_profile": "~Ziyang_Huang2;~Pengfei_Cao1;~Jun_Zhao4;~Kang_Liu1", "aff": ";Institute of Automation, Chinese Academy of Sciences;Institute of automation, Chinese academy of science;Institute of Automation, Chinese Academy of Sciences", "aff_domain": ";ia.ac.cn;nlpr.ia.ac.cn;ia.ac.cn", "position": ";PhD student;Full Professor;Professor", "bibtex": "@inproceedings{\nhuang2023diffusionsl,\ntitle={Diffusion{SL}: Sequence Labeling via Tag Diffusion Process},\nauthor={Ziyang Huang and Pengfei Cao and Jun Zhao and Kang Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=BoKg2pcF0H}\n}", "github": "", "project": "", "reviewers": "ckkr;kB8h;zKAW", "site": "https://openreview.net/forum?id=BoKg2pcF0H", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "4;3;3", "reproducibility": "3;4;4", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation", "aff_unique_url": "http://www.ia.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "Bou2YHsRvG", "title": "Code-Switching with Word Senses for Pretraining in Neural Machine Translation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Lexical ambiguity is a significant and pervasive challenge in Neural Machine Translation (NMT), with many state-of-the-art (SOTA) NMT systems struggling to handle polysemous words (Campolungo et al., 2022). The same holds for the NMT pretraining paradigm of denoising synthetic \"code-switched\" text (Pan et al., 2021; Iyer et al., 2023), where word senses are ignored in the noising stage -- leading to harmful sense biases in the pretraining data that are subsequently inherited by the resulting models. In this work, we introduce Word Sense Pretraining for Neural Machine Translation (WSP-NMT) - an end-to-end approach for pretraining multilingual NMT models leveraging word sense-specific information from Knowledge Bases. Our experiments show significant improvements in overall translation quality. Then, we show the robustness of our approach to scale to various challenging data and resource-scarce scenarios and, finally, report fine-grained accuracy improvements on the DiBiMT disambiguation benchmark. Our studies yield interesting and novel insights into the merits and challenges of integrating word sense information and structured knowledge in multilingual pretraining for NMT.", "keywords": "Word Sense Disambiguation;Pretraining approaches;Neural Machine Translation", "primary_area": "", "supplementary_material": "", "author": "Vivek Iyer;Edoardo Barba;Alexandra Birch;Jeff Z. Pan;Roberto Navigli", "authorids": "~Vivek_Iyer1;~Edoardo_Barba1;~Alexandra_Birch1;~Jeff_Z._Pan1;~Roberto_Navigli2", "gender": "M;M;F;M;M", "homepage": ";https://edobobo.github.io/;http://homepages.inf.ed.ac.uk/abmayne/;https://knowledge-representation.org/j.z.pan/;http://wwwusers.di.uniroma1.it/~navigli/", "dblp": ";269/4565;24/6740;59/6490;https://dblp.org/pers/n/Navigli:Roberto.html", "google_scholar": "https://scholar.google.co.uk/citations?user=LbDh_igAAAAJ;hVjbi_QAAAAJ;https://scholar.google.co.uk/citations?user=gZOV9kMAAAAJ;https://scholar.google.co.uk/citations?hl=en;https://scholar.google.it/citations?user=BsgVJ-EAAAAJ", "or_profile": "~Vivek_Iyer1;~Edoardo_Barba1;~Alexandra_Birch1;~Jeff_Z._Pan1;~Roberto_Navigli2", "aff": "University of Edinburgh, University of Edinburgh;University of Roma \"La Sapienza\";University of Edinburgh;University of Edinburgh, University of Edinburgh;Sapienza University of Rome", "aff_domain": "ed.ac.uk;uniroma1.it;ed.ac.uk;ed.ac.uk;uniroma1.it", "position": "PhD student;Assistant Professor;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\niyer2023codeswitching,\ntitle={Code-Switching with Word Senses for Pretraining in Neural Machine Translation},\nauthor={Vivek Iyer and Edoardo Barba and Alexandra Birch and Jeff Z. Pan and Roberto Navigli},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Bou2YHsRvG}\n}", "github": "", "project": "", "reviewers": "MJt2;sGfj;4nWC", "site": "https://openreview.net/forum?id=Bou2YHsRvG", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "3;4;3", "reproducibility": "4;4;3", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-4451-8293;;;0000-0002-9779-2088;0000-0003-3831-9706", "linkedin": "vivekiyer98/;;;;", "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "University of Edinburgh;University of Rome La Sapienza;Sapienza University of Rome", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ed.ac.uk;https://www.uniroma1.it;https://www.uniroma1.it", "aff_unique_abbr": "Edinburgh;La Sapienza;Sapienza", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Rome", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "United Kingdom;Italy" }, { "id": "BpibUh0aB3", "title": "Probing the \u201cCreativity\u201d of Large Language Models: Can models produce divergent semantic association?", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Large language models possess remarkable capacity for processing language, but it remains unclear whether these models can further generate creative content. The present study aims to investigate the creative thinking of large language models through a cognitive perspective. We utilize the divergent association task (DAT), an objective measurement of creativity that asks models to generate unrelated words and calculates the semantic distance between them. We compare the results across different models and decoding strategies. Our findings indicate that: (1) When using the greedy search strategy, GPT-4 outperforms 96% of humans, while GPT-3.5-turbo exceeds the average human level. (2) Stochastic sampling and temperature scaling are effective to obtain higher DAT scores for models except GPT-4, but face a trade-off between creativity and stability. These results imply that advanced large language models have divergent semantic associations, which is a fundamental process underlying creativity.", "keywords": "Creativity;Semantic association;Text generation", "primary_area": "", "supplementary_material": "", "author": "Honghua Chen;Nai Ding", "authorids": "~Honghua_Chen3;~Nai_Ding1", "gender": "M;M", "homepage": ";", "dblp": ";128/4756", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;Q_mMDVMAAAAJ", "or_profile": "~Honghua_Chen3;~Nai_Ding1", "aff": "Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nchen2023probing,\ntitle={Probing the {\\textquotedblleft}Creativity{\\textquotedblright} of Large Language Models: Can models produce divergent semantic association?},\nauthor={Honghua Chen and Nai Ding},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=BpibUh0aB3}\n}", "github": "", "project": "", "reviewers": "qcAx;hFka;9M5g", "site": "https://openreview.net/forum?id=BpibUh0aB3", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "3;4;3", "reproducibility": "4;5;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "BpsWrnfIIn", "title": "Misery Loves Complexity: Exploring Linguistic Complexity in the Context of Emotion Detection", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Given the omnipresence of social media in our society, thoughts and opinions are being shared online in an unprecedented manner. This means that both positive and negative emotions can be equally and freely expressed. However, the negativity bias posits that human beings are inherently drawn to and more moved by negativity and, as a consequence, negative emotions get more traffic. Correspondingly, when writing about emotions this negativity bias could lead to expressions of negative emotions that are linguistically more complex. In this paper, we attempt to use readability and linguistic complexity metrics to better understand the manifestation of emotions on social media platforms like Reddit based on the widely-used GoEmotions dataset. We demonstrate that according to most metrics, negative emotions indeed tend to generate more complex text than positive emotions. In addition, we examine whether a higher complexity hampers the automatic identification of emotions. To answer this question, we fine-tuned three state-of-the-art transformers (BERT, RoBERTa, and SpanBERT) on the same emotion detection dataset. We demonstrate that these models often fail to predict emotions for the more complex texts. More advanced LLMs like RoBERTa and SpanBERT also fail to improve by significant margins on complex samples. This calls for a more nuanced interpretation of the emotion detection performance of transformer models. We make the automatically annotated data available for further research at: https://huggingface.co/datasets/pranaydeeps/CAMEO", "keywords": "emotion detection;complexity;readability", "primary_area": "", "supplementary_material": "", "author": "Pranaydeep Singh;Luna De Bruyne;Orphee De Clercq;Els Lefever", "authorids": "~Pranaydeep_Singh1;~Luna_De_Bruyne1;~Orphee_De_Clercq1;~Els_Lefever1", "gender": "M;F;F;F", "homepage": "https://pranaydeep.xyz;https://lt3.ugent.be/people/luna-de-bruyne/;https://lt3.ugent.be/people/orphee-de-clercq/;https://www.lt3.ugent.be/people/els-lefever/", "dblp": "225/5468;220/0901;96/8156;70/46", "google_scholar": "8KSmDe4AAAAJ;https://scholar.google.be/citations?user=Tuc-YqcAAAAJ;https://scholar.google.be/citations?user=UTexewwAAAAJ;8IqAUe0AAAAJ", "or_profile": "~Pranaydeep_Singh1;~Luna_De_Bruyne1;~Orphee_De_Clercq1;~Els_Lefever1", "aff": "Universiteit Gent;Universiteit Gent;Ghent University;Ghent University", "aff_domain": "ugent.be;ugent.be;ugent.be;ugent.be", "position": "PhD student;Postdoc;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nsingh2023misery,\ntitle={Misery Loves Complexity: Exploring Linguistic Complexity in the Context of Emotion Detection},\nauthor={Pranaydeep Singh and Luna De Bruyne and Orphee De Clercq and Els Lefever},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=BpsWrnfIIn}\n}", "github": "", "project": "", "reviewers": "hKnV;nrYf;z7G8", "site": "https://openreview.net/forum?id=BpsWrnfIIn", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "2;3;4", "reproducibility": "4;3;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-5288-1650;0000-0002-6090-5552;0000-0002-7755-0591", "linkedin": ";luna-de-bruyne-2b4152144/;;els-lefever-5256b2/?originalSubdomain=be", "aff_unique_index": "0;0;1;1", "aff_unique_norm": "University of Ghent;Ghent University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ugent.be/en;https://www.ugent.be/en", "aff_unique_abbr": "UGent;UGent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Belgium" }, { "id": "BrqDTTga8J", "title": "Multi-view Contrastive Learning for Entity Typing over Knowledge Graphs", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Knowledge graph entity typing (KGET) aims at inferring plausible types of entities in knowledge graphs. Existing approaches to KGET focus on how to better encode the knowledge provided by the neighbors and types of an entity into its representation. However, they ignore the semantic knowledge provided by the way in which types can be clustered together. In this paper, we propose a novel method called Multi-view Contrastive Learning for knowledge graph Entity Typing MCLET, which effectively encodes the coarse-grained knowledge provided by clusters into entity and type embeddings. MCLET is composed of three modules: i) Multi-view Generation and Encoder module, which encodes structured information from entity-type, entity-cluster and cluster-type views; ii) Cross-view Contrastive Learning module, which encourages different views to collaboratively improve view-specific representations of entities and types; iii) Entity Typing Prediction module, which integrates multi-head attention and a Mixture-of-Experts strategy to infer missing entity types. Extensive experiments show the strong performance of MCLET compared to the state-of-the-art", "keywords": "Knowledge Graphs;Entity Typing;Knowledge-based Typing", "primary_area": "", "supplementary_material": "", "author": "Zhiwei Hu;Victor Gutierrez Basulto;Zhiliang Xiang;Ru Li;Jeff Z. Pan", "authorids": "~Zhiwei_Hu4;~Victor_Gutierrez_Basulto1;~Zhiliang_Xiang1;~Ru_Li2;~Jeff_Z._Pan1", "gender": ";;;F;M", "homepage": ";;https://zl-xiang.github.io/;http://cs.sxu.edu.cn/faculty/professor/1448/index.htm;https://knowledge-representation.org/j.z.pan/", "dblp": ";;319/5665;90/3813-1;59/6490", "google_scholar": ";;jzhgmNIAAAAJ;;https://scholar.google.co.uk/citations?hl=en", "or_profile": "~Zhiwei_Hu4;~Victor_Gutierrez_Basulto1;~Zhiliang_Xiang1;~Ru_Li2;~Jeff_Z._Pan1", "aff": ";;Cardiff University;Shanxi University;University of Edinburgh, University of Edinburgh", "aff_domain": ";;cardiff.ac.uk;sxu.edu.cn;ed.ac.uk", "position": ";;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nhu2023multiview,\ntitle={Multi-view Contrastive Learning for Entity Typing over Knowledge Graphs},\nauthor={Zhiwei Hu and Victor Gutierrez Basulto and Zhiliang Xiang and Ru Li and Jeff Z. Pan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=BrqDTTga8J}\n}", "github": "", "project": "", "reviewers": "sir8;sXvZ;Hx7N", "site": "https://openreview.net/forum?id=BrqDTTga8J", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;3;3", "excitement": "4;3;3", "reproducibility": "4;4;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-0263-7289;0000-0003-1545-5553;0000-0002-9779-2088", "linkedin": ";;;;", "aff_unique_index": "0;1;2", "aff_unique_norm": "Cardiff University;Shanxi University;University of Edinburgh", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cardiff.ac.uk;http://www.sxu.edu.cn;https://www.ed.ac.uk", "aff_unique_abbr": "Cardiff;SXU;Edinburgh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;China" }, { "id": "BscCXmZopv", "title": "SODA: Million-scale Dialogue Distillation with Social Commonsense Contextualization", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Data scarcity has been a long standing issue in the field of open-domain social dialogue. To quench this thirst, we present SODA: the first publicly available, million-scale high-quality social dialogue dataset. By contextualizing social commonsense knowledge from a knowledge graph, we are able to distill an exceptionally broad spectrum of social interactions from a large language model. Human evaluation shows that conversations in SODA are more consistent, specific, and (surprisingly) natural than those in prior human-authored datasets. \n\nUsing SODA, we train COSMO: a generalizable conversation model that is significantly more natural and consistent on unseen datasets than best-performing conversation models (e.g., GODEL, BlenderBot-1, Koala, Vicuna). Experiments reveal COSMO is sometimes even preferred to the original human-written gold responses. Additionally, our results shed light on the distinction between knowledge-enriched conversations and natural social chitchats. We plan to make our data, model, and code public.", "keywords": "social commonsense;distillation;dialogue;dataset", "primary_area": "", "supplementary_material": "", "author": "Hyunwoo Kim;Jack Hessel;Liwei Jiang;Peter West;Ximing Lu;Youngjae Yu;Pei Zhou;Ronan Le Bras;Malihe Alikhani;Gunhee Kim;Maarten Sap;Yejin Choi", "authorids": "~Hyunwoo_Kim3;~Jack_Hessel1;~Liwei_Jiang2;~Peter_West1;~Ximing_Lu1;~Youngjae_Yu1;~Pei_Zhou1;~Ronan_Le_Bras1;~Malihe_Alikhani2;~Gunhee_Kim1;~Maarten_Sap1;~Yejin_Choi1", "gender": "M;M;F;M;F;M;M;M;F;M;M;F", "homepage": "http://hyunwookim.com;https://www.jmhessel.com;https://liweijiang.me;https://peterwestai.notion.site/;https://gloriaximinglu.github.io/;https://yj-yu.github.io/home/;https://shaoxia57.github.io/;https://rlebras.github.io/index.html;https://www.malihealikhani.com/;http://vision.snu.ac.kr/gunhee/;http://maartensap.com;https://yejinc.github.io/", "dblp": "02/8768-2;https://dblp.uni-trier.de/pid/132/5250.html;;179/4587;24/10879;188/6210;;;163/2171;45/115;153/9519;89/579-1", "google_scholar": "https://scholar.google.co.kr/citations?user=PAXFuxsAAAAJ;SxQQ1msAAAAJ;lcPsDgUAAAAJ;https://scholar.google.ca/citations?user=9ubCBYwAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.kr/citations?user=WDO24ZYAAAAJ;13PGDZsAAAAJ;8dXLDSsAAAAJ;w24_ETkAAAAJ;https://scholar.google.co.kr/citations?user=CiSdOV0AAAAJ;gFN4QUYAAAAJ;vhP-tlcAAAAJ", "or_profile": "~Hyunwoo_Kim3;~Jack_Hessel1;~Liwei_Jiang2;~Peter_West1;~Ximing_Lu1;~Youngjae_Yu1;~Pei_Zhou1;~Ronan_Le_Bras1;~Malihe_Alikhani2;~Gunhee_Kim1;~Maarten_Sap1;~Yejin_Choi1", "aff": "Seoul National University;Allen Institute for Artificial Intelligence;University of Washington;Allen Institute for Artificial Intelligence;University of Washington;Allen Institute for Artificial Intelligence;University of Southern California;Allen Institute for Artificial Intelligence;Northeastern University ;Seoul National University;Carnegie Mellon University;Department of Computer Science, University of Washington", "aff_domain": "snu.ac.kr;allenai.org;washington.edu;allenai.org;cs.washington.edu;allenai.org;usc.edu;allenai.org;northeastern.edu;snu.ac.kr;cmu.edu;cs.washington.edu", "position": "PhD student;Researcher;PhD student;Intern;Undergrad student;Postdoc;PhD student;Researcher;Assistant Professor;Full Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nkim2023soda,\ntitle={{SODA}: Million-scale Dialogue Distillation with Social Commonsense Contextualization},\nauthor={Hyunwoo Kim and Jack Hessel and Liwei Jiang and Peter West and Ximing Lu and Youngjae Yu and Pei Zhou and Ronan Le Bras and Malihe Alikhani and Gunhee Kim and Maarten Sap and Yejin Choi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=BscCXmZopv}\n}", "github": "", "project": "", "reviewers": "FFJY;1GY1;Czz8;H9QN;ZTzC", "site": "https://openreview.net/forum?id=BscCXmZopv", "pdf_size": 0, "rating": "5;5;5;5;5", "confidence": "4;4;4;4;3", "excitement": "3;4;4;4;4", "reproducibility": "3;4;3;4;4", "correctness": "4;4;4;4;4", "rating_avg": 5.0, "confidence_avg": 3.8, "excitement_avg": 3.8, "reproducibility_avg": 3.6, "correctness_avg": 4.0, "replies_avg": 17, "authors#_avg": 12, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0002-2714-1287;0000-0002-4012-8979;;;;;;;;0000-0002-9543-7453;;", "linkedin": "hyunw-kim/;;;;;;pei-zhou-169051119/;;alikhanimalihe/;;;", "aff_unique_index": "0;1;2;1;2;1;3;1;4;0;5;2", "aff_unique_norm": "Seoul National University;Allen Institute for Artificial Intelligence;University of Washington;University of Southern California;Northeastern University;Carnegie Mellon University", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.snu.ac.kr;https://allenai.org;https://www.washington.edu;https://www.usc.edu;https://www.northeastern.edu;https://www.cmu.edu", "aff_unique_abbr": "SNU;AI2;UW;USC;NEU;CMU", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Los Angeles;Seattle", "aff_country_unique_index": "0;1;1;1;1;1;1;1;1;0;1;1", "aff_country_unique": "South Korea;United States" }, { "id": "BxY99WBKSV", "title": "Ties Matter: Meta-Evaluating Modern Metrics with Pairwise Accuracy and Tie Calibration", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Kendall's tau is frequently used to meta-evaluate how well machine translation (MT) evaluation metrics score individual translations. Its focus on pairwise score comparisons is intuitive but raises the question of how ties should be handled, a gray area that has motivated different variants in the literature. We demonstrate that, in settings like modern MT meta-evaluation, existing variants have weaknesses arising from their handling of ties, and in some situations can even be gamed. We propose instead to meta-evaluate metrics with a version of pairwise accuracy that gives metrics credit for correctly predicting ties, in combination with a tie calibration procedure that automatically introduces ties into metric scores, enabling fair comparison between metrics that do and do not predict ties. We argue and provide experimental evidence that these modifications lead to fairer ranking-based assessments of metric performance.", "keywords": "meta-evaluation;metrics;kendall's tau", "primary_area": "", "supplementary_material": "", "author": "Daniel Deutsch;George Foster;Markus Freitag", "authorids": "~Daniel_Deutsch1;~George_Foster1;~Markus_Freitag2", "gender": ";M;M", "homepage": "https://danieldeutsch.github.io/;http://www.iro.umontreal.ca/~foster;", "dblp": "222/9395;02/1712;57/8503", "google_scholar": "TQYzWDEAAAAJ;Hr8KyG4AAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Daniel_Deutsch1;~George_Foster1;~Markus_Freitag2", "aff": "Google;Google;Google", "aff_domain": "google.com;google.com;google.com", "position": "Researcher;Research Scientist;Researcher", "bibtex": "@inproceedings{\ndeutsch2023ties,\ntitle={Ties Matter: Meta-Evaluating Modern Metrics with Pairwise Accuracy and Tie Calibration},\nauthor={Daniel Deutsch and George Foster and Markus Freitag},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=BxY99WBKSV}\n}", "github": "", "project": "", "reviewers": "FkvL;HC2C;7hqG", "site": "https://openreview.net/forum?id=BxY99WBKSV", "pdf_size": 0, "rating": "5;5;5", "confidence": "2;4;4", "excitement": "4;5;4", "reproducibility": "2;5;5", "correctness": "4;4;5", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.333333333333333, "reproducibility_avg": 4.0, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;markus-freitag-7b17b4101/", "aff_unique_index": "0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "C68cYdgLUs", "title": "We are Who We Cite: Bridges of Influence Between Natural Language Processing and Other Academic Fields", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Natural Language Processing (NLP) is poised to substantially influence the world. However, significant progress comes hand-in-hand with substantial risks. Addressing them requires broad engagement with various fields of study. Yet, little empirical work examines the state of such engagement (past or current). In this paper, we quantify the degree of influence between 23 fields of study and NLP (on each other). We analyzed ~77k NLP papers, ~3.1m citations from NLP papers to other papers, and ~1.8m citations from other papers to NLP papers. We show that, unlike most fields, the cross-field engagement of NLP, measured by our proposed Citation Field Diversity Index (CFDI), has declined from 0.58 in 1980 to 0.31 in 2022 (an all-time low). In addition, we find that NLP has grown more insular---citing increasingly more NLP papers and having fewer papers that act as bridges between fields. NLP citations are dominated by computer science; Less than 8% of NLP citations are to linguistics, and less than 3% are to math and psychology. These findings underscore NLP's urgent need to reflect on its engagement with various fields.", "keywords": "responsible nlp;scientific influence;interdisciplinarity;bibliometrics", "primary_area": "", "supplementary_material": "", "author": "Jan Philip Wahle;Terry Ruas;Mohamed Abdalla;Bela Gipp;Saif M. Mohammad", "authorids": "~Jan_Philip_Wahle1;~Terry_Ruas1;~Mohamed_Abdalla3;~Bela_Gipp1;~Saif_M._Mohammad1", "gender": "M;;;M;M", "homepage": "https://jpwahle.com;;;https://gipplab.org/team/prof-dr-bela-gipp/;http://saifmohammad.com", "dblp": "288/1075.html;;;12/6082;58/380", "google_scholar": "MI0C9mAAAAAJ;;;No2ot2YAAAAJ;zJHymXh9EVwC", "or_profile": "~Jan_Philip_Wahle1;~Terry_Ruas1;~Mohamed_Abdalla3;~Bela_Gipp1;~Saif_M._Mohammad1", "aff": "University of G\u00f6ttingen, Germany;;;Georg-August Universit\u00e4t G\u00f6ttingen;National Research Council Canada", "aff_domain": "uni-goettingen.de;;;uni-goettingen.de;nrc-cnrc.gc.ca", "position": "PhD student;;;Full Professor;Researcher", "bibtex": "@inproceedings{\nwahle2023we,\ntitle={We are Who We Cite: Bridges of Influence Between Natural Language Processing and Other Academic Fields},\nauthor={Jan Philip Wahle and Terry Ruas and Mohamed Abdalla and Bela Gipp and Saif M. Mohammad},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=C68cYdgLUs}\n}", "github": "", "project": "", "reviewers": "H61G;GqaD;k77Z", "site": "https://openreview.net/forum?id=C68cYdgLUs", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;4", "excitement": "4;3;4", "reproducibility": "3;3;4", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-2116-9767;;;0000-0001-6522-3019;0000-0003-2716-7516", "linkedin": "https://linkedin.com/in/jan-philip-wahle/;;;;", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of G\u00f6ttingen;Georg-August Universit\u00e4t G\u00f6ttingen;National Research Council Canada", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-goettingen.de;https://www.uni-goettingen.de;https://www.nrc-cnrc.gc.ca", "aff_unique_abbr": "Georg-August-Universit\u00e4t;GAU;NRC-CNRC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Germany;Canada" }, { "id": "CEPkRTOlut", "title": "Speech Recognition and Meaning Interpretation: Towards Disambiguation of Structurally Ambiguous Spoken Utterances in Indonesian", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Despite being the world's fourth-most populous country, the development of spoken language technologies in Indonesia still needs improvement. Most automatic speech recognition (ASR) systems that have been developed are still limited to transcribing the exact word-by-word, which, in many cases, consists of ambiguous sentences. In fact, speakers use prosodic characteristics of speech to convey different interpretations, which, unfortunately, these systems often ignore. In this study, we attempt to resolve structurally ambiguous utterances into unambiguous texts in Indonesian using prosodic information. To the best of our knowledge, this might be the first study to address this problem in the ASR context. Our contributions include (1) collecting the Indonesian speech corpus on structurally ambiguous sentences\\footnote{Our corpus is available at \\url{https://github.com/ha3ci-lab/struct_amb_ind}}; (2) conducting a survey on how people disambiguate structurally ambiguous sentences presented in both text and speech forms; and (3) constructing an Indonesian ASR and meaning interpretation system by utilizing both cascade and direct approaches to map speech to text, along with two additional prosodic information signals (pause and pitch). The experimental results reveal that it is possible to disambiguate these utterances. In this study, the proposed cascade system, utilizing Mel-spectrograms concatenated with F0 and energy as input, achieved a disambiguation accuracy of 79.6\\%, while the proposed direct system with the same input yielded an even more impressive disambiguation accuracy of 82.2\\%.", "keywords": "structural ambiguity in sentences;prosodic information;speech recognition;speech-to-text mapping;meaning\u00a0interpretation", "primary_area": "", "supplementary_material": "", "author": "Ruhiyah Faradishi Widiaputri;Ayu Purwarianti;Dessi Puji Lestari;Kurniawati Azizah;Dipta Tanaya;Sakriani Sakti", "authorids": "~Ruhiyah_Faradishi_Widiaputri1;~Ayu_Purwarianti1;~Dessi_Puji_Lestari1;~Kurniawati_Azizah1;~Dipta_Tanaya1;~Sakriani_Sakti1", "gender": "F;F;F;F;F;F", "homepage": ";https://www.itb.ac.id/staf/profil/ayu-purwarianti;;;;https://hai-lab.naist.jp/profile/ssakti/index.html", "dblp": ";15/2527;;;;71/3717", "google_scholar": ";8jUro_cAAAAJ;https://scholar.google.co.id/citations?user=42ng95AAAAAJ;https://scholar.google.com/citations?hl=id;MDsetwkAAAAJ;https://scholar.google.de/citations?user=cjBMTyoAAAAJ", "or_profile": "~Ruhiyah_Faradishi_Widiaputri1;~Ayu_Purwarianti1;~Dessi_Puji_Lestari1;~Kurniawati_Azizah1;~Dipta_Tanaya1;~Sakriani_Sakti1", "aff": "Institut Teknologi Bandung;Institut Teknologi Bandung;Institut Teknologi Bandung;;Faculty of Computer Science Universitas Indonesia;RIKEN", "aff_domain": "itb.ac.id;itb.ac.id;itb.ac.id;;cs.ui.ac.id;riken.jp", "position": "Undergrad student;Associate Professor;Assistant Professor;;Lecturer;Scientific Researcher", "bibtex": "@inproceedings{\nwidiaputri2023speech,\ntitle={Speech Recognition and Meaning Interpretation: Towards Disambiguation of Structurally Ambiguous Spoken Utterances in Indonesian},\nauthor={Ruhiyah Faradishi Widiaputri and Ayu Purwarianti and Dessi Puji Lestari and Kurniawati Azizah and Dipta Tanaya and Sakriani Sakti},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=CEPkRTOlut}\n}", "github": "", "project": "", "reviewers": "iZHD;PvXP;S9Q7", "site": "https://openreview.net/forum?id=CEPkRTOlut", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "5;4;4", "reproducibility": "2;3;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.333333333333333, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-5016-3700;;0000-0002-3217-7025;;0000-0001-5509-8963", "linkedin": "ruhiyah-faradishi-widiaputri-610288194/;ayu-purwarianti/;;;;", "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Institut Teknologi Bandung;Universitas Indonesia;RIKEN", "aff_unique_dep": ";Faculty of Computer Science;", "aff_unique_url": "https://www.itb.ac.id;https://www.ui.ac.id;https://www.riken.jp", "aff_unique_abbr": "ITB;UI;RIKEN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "Indonesia;Japan" }, { "id": "CHffPbQXjX", "title": "Towards Being Parameter-Efficient: A Stratified Sparsely Activated Transformer with Dynamic Capacity", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Mixture-of-experts (MoE) models that employ sparse activation have demonstrated effectiveness in significantly increasing the number of parameters while maintaining low computational requirements per token. However, recent studies have established that MoE models are inherently parameter-inefficient as the improvement in performance diminishes with an increasing number of experts. We hypothesize this parameter inefficiency is a result of all experts having equal capacity, which may not adequately meet the varying complexity requirements of different tokens or tasks. In light of this, we propose Stratified Mixture of Experts (SMoE) models, which feature a stratified structure and can assign dynamic capacity to different tokens. We demonstrate the effectiveness of SMoE on three multilingual machine translation benchmarks, containing 4, 15, and 94 language pairs, respectively. We show that SMoE outperforms multiple state-of-the-art MoE models with the same or fewer parameters.", "keywords": "Stratified Mixture-of-Experts;Parameter-efficiency;Dynamic capacity", "primary_area": "", "supplementary_material": "", "author": "Haoran Xu;Maha Elbayad;Kenton Murray;Jean Maillard;Vedanuj Goswami", "authorids": "~Haoran_Xu3;~Maha_Elbayad3;~Kenton_Murray1;~Jean_Maillard1;~Vedanuj_Goswami1", "gender": "M;;;;M", "homepage": "https://www.fe1ixxu.com/;;http://www.kentonmurray.com;;https://vedanuj.github.io/", "dblp": ";;143/9465;;156/5885", "google_scholar": "rhcrGQ0AAAAJ;;;;bh08FeIAAAAJ", "or_profile": "~Haoran_Xu3;~Maha_Elbayad3;~Kenton_Murray1;~Jean_Maillard1;~Vedanuj_Goswami1", "aff": "Johns Hopkins University;;Johns Hopkins University;;", "aff_domain": "jhu.edu;;jhu.edu;;", "position": "PhD student;;Researcher;;", "bibtex": "@inproceedings{\nxu2023towards,\ntitle={Towards Being Parameter-Efficient: A Stratified Sparsely Activated Transformer with Dynamic Capacity},\nauthor={Haoran Xu and Maha Elbayad and Kenton Murray and Jean Maillard and Vedanuj Goswami},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=CHffPbQXjX}\n}", "github": "", "project": "", "reviewers": "zL3X;3H55;T5W3", "site": "https://openreview.net/forum?id=CHffPbQXjX", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;5;3", "excitement": "3;4;3", "reproducibility": "4;3;4", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-5628-1003;;", "linkedin": "haoran-xu-0842b3194/;;kentonmurray/;;", "aff_unique_index": "0;0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "CK9mApdZFW", "title": "DISCO: A Large Scale Human Annotated Corpus for Disfluency Correction in Indo-European Languages", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Disfluency correction (DC) is the process of removing disfluent elements like fillers, repetitions and corrections from spoken utterances to create readable and interpretable text. DC is a vital post-processing step applied to Automatic Speech Recognition (ASR) outputs, before subsequent processing by downstream language understanding tasks. Existing DC research has primarily focused on English due to the unavailability of large-scale open-source datasets. Towards the goal of multilingual disfluency correction, we present a high-quality human-annotated DC corpus covering four important Indo-European languages: English, Hindi, German and French. We provide extensive analysis of results of state-of-the-art DC models across all four languages obtaining F1 scores of 97.55 (English), 94.29 (Hindi), 95.89 (German) and 92.97 (French). To demonstrate the benefits of DC on downstream tasks, we show that DC leads to 5.65 points increase in BLEU scores on average when used in conjunction with a state-of-the-art Machine Translation (MT) system. We release code to run our experiments along with our annotated dataset here.", "keywords": "Disfluency Correction;Machine Translation;Dataset", "primary_area": "", "supplementary_material": "", "author": "Vineet Bhat;Preethi Jyothi;Pushpak Bhattacharyya", "authorids": "~Vineet_Bhat1;~Preethi_Jyothi2;~Pushpak_Bhattacharyya1", "gender": "M;F;M", "homepage": ";http://www.cse.iitb.ac.in/~pjyothi;https://www.cse.iitb.ac.in/~pb/", "dblp": "348/0410;01/9014;p/PushpakBhattacharyya", "google_scholar": ";https://scholar.google.co.in/citations?user=QN_uhu8AAAAJ;https://scholar.google.com.tw/citations?user=vvg-pAkAAAAJ", "or_profile": "~Vineet_Bhat1;~Preethi_Jyothi2;~Pushpak_Bhattacharyya1", "aff": "Indian Institute of Technology, Bombay;Indian Institute of Technology Bombay;Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology", "aff_domain": "iitb.ac.in;iitb.ac.in;iitb.ac.in", "position": "MS student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nbhat2023disco,\ntitle={{DISCO}: A Large Scale Human Annotated Corpus for Disfluency Correction in Indo-European Languages},\nauthor={Vineet Bhat and Preethi Jyothi and Pushpak Bhattacharyya},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=CK9mApdZFW}\n}", "github": "", "project": "", "reviewers": "3wc9;HAs1;z3XW;J4JB", "site": "https://openreview.net/forum?id=CK9mApdZFW", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;2;4;4", "excitement": "3;3;3;4", "reproducibility": "3;3;3;4", "correctness": "3;3;3;3", "rating_avg": 3.0, "confidence_avg": 3.25, "excitement_avg": 3.25, "reproducibility_avg": 3.25, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "vineet-bhat-457716171/;;pushpakbh/?originalSubdomain=in", "aff_unique_index": "0;0;1", "aff_unique_norm": "Indian Institute of Technology Bombay;Indian Institute of Technology, Bombay", "aff_unique_dep": ";", "aff_unique_url": "https://www.iitb.ac.in;https://www.iitb.ac.in", "aff_unique_abbr": "IIT Bombay;IIT Bombay", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Bombay", "aff_country_unique_index": "0;0;0", "aff_country_unique": "India" }, { "id": "CLVOAHdybT", "title": "HFMRE: Constructing Huffman Tree in Bags to Find Excellent Instances for Distantly Supervised Relation Extraction", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Since the introduction of distantly supervised relation extraction methods, numerous approaches have been developed, the most representative of which is multi-instance learning (MIL). To find reliable features that are most representative of multi-instance bags, aggregation strategies such as AVG (average), ONE (at least one), and ATT (sentence-level attention) are commonly used. These strategies tend to train third-party vectors to select sentence-level features, leaving it to the third party to decide/identify what is noise, ignoring the intrinsic associations that naturally exist from sentence to sentence. In this paper, we propose the concept of circular cosine similarity, which is used to explicitly show the intrinsic associations between sentences within a bag. We also consider the previous methods to be a crude denoising process as they are interrupted and do not have a continuous noise detection procedure. Following this consideration, we implement a relation extraction framework (HFMRE) that relies on the Huffman tree, where sentences are considered as leaf nodes and circular cosine similarity are considered as node weights. HFMRE can continuously and iteratively discriminate noise and aggregated features during the construction of the Huffman tree, eventually finding an excellent instance that is representative of a bag-level feature. The experiments demonstrate the remarkable effectiveness of our method, outperforming previously advanced baselines on the popular DSRE datasets.", "keywords": "multi-instance learning; relation extraction; Huffman Tree; aggregation strategies", "primary_area": "", "supplementary_material": "", "author": "Min Li;Cong Shao;Gang Li;Mingle Zhou", "authorids": "~Min_Li15;~Cong_Shao1;~Gang_Li19;~Mingle_Zhou1", "gender": "F;;M;M", "homepage": ";;http://jsxb.scsc.cn/list_29/223.html;https://baike.baidu.com/item/%E5%91%A8%E9%B8%A3%E4%B9%90/58981265?fr=aladdin", "dblp": ";;;", "google_scholar": ";;;", "or_profile": "~Min_Li15;~Cong_Shao1;~Gang_Li19;~Mingle_Zhou1", "aff": "Qilu University of Technology;Qilu University of Technology;Qilu University of Technology;Qilu University of Technology", "aff_domain": "qlu.edu.cn;qlu.edu.cn;qlu.edu.cn;qlu.edu.cn", "position": "Principal Researcher;MS student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2023hfmre,\ntitle={{HFMRE}: Constructing Huffman Tree in Bags to Find Excellent Instances for Distantly Supervised Relation Extraction},\nauthor={Min Li and Cong Shao and Gang Li and Mingle Zhou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=CLVOAHdybT}\n}", "github": "", "project": "", "reviewers": "kjoj;LrYw;sGH7", "site": "https://openreview.net/forum?id=CLVOAHdybT", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;4", "excitement": "3;2;3", "reproducibility": "4;2;4", "correctness": "3;2;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-0507-5576;0000-0001-7515-3544;0000-0002-7896-4833;0000-0003-4911-276X", "linkedin": ";;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Qilu University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.qlu.edu.cn", "aff_unique_abbr": "QLU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "CLq5tqZ5SK", "title": "A Computational Interface to Translate Strategic Intent from Unstructured Language in a Low-Data Setting", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Many real-world tasks involve a mixed-initiative setup, wherein humans and AI systems collaboratively perform a task. While significant work has been conducted towards enabling humans to specify, through language, exactly how an agent should complete a task (i.e., low-level specification), prior work lacks on interpreting the high-level strategic intent of the human commanders. Parsing strategic intent from language will allow autonomous systems to independently operate according to the user's plan without frequent guidance or instruction. In this paper, we build a computational interface capable of translating unstructured language strategies into actionable intent in the form of goals and constraints. Leveraging a game environment, we collect a dataset of over 1000 examples, mapping language strategies to the corresponding goals and constraints, and show that our model, trained on this dataset, significantly outperforms human interpreters in inferring strategic intent (i.e., goals and constraints) from language (p < 0.05). Furthermore, we show that our model (125M parameters) significantly outperforms ChatGPT for this task (p < 0.05) in a low-data setting.", "keywords": "Text Classification;ChatGPT;Human-Evaluation;Low-Data", "primary_area": "", "supplementary_material": "", "author": "Pradyumna Tambwekar;Lakshita Dodeja;Nathan Vaska;Wei Xu;Matthew Gombolay", "authorids": "~Pradyumna_Tambwekar1;~Lakshita_Dodeja1;~Nathan_Vaska1;~Wei_Xu5;~Matthew_Gombolay1", "gender": "M;F;;F;M", "homepage": ";https://lakshitadodeja.github.io/website/;;https://cocoxu.github.io/;https://core-robotics.gatech.edu/", "dblp": "168/8398;;;32/1213-4.html;144/1022", "google_scholar": "efOKpCsAAAAJ;;;BfOdG-oAAAAJ;Ihyz20wAAAAJ", "or_profile": "~Pradyumna_Tambwekar1;~Lakshita_Dodeja1;~Nathan_Vaska1;~Wei_Xu5;~Matthew_Gombolay1", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;;gatech.edu;cc.gatech.edu", "position": "PhD student;MS student;;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\ntambwekar2023a,\ntitle={A Computational Interface to Translate Strategic Intent from Unstructured Language in a Low-Data Setting},\nauthor={Pradyumna Tambwekar and Lakshita Dodeja and Nathan Vaska and Wei Xu and Matthew Gombolay},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=CLq5tqZ5SK}\n}", "github": "", "project": "", "reviewers": "Y69H;HrLg;XhaB;ASwe", "site": "https://openreview.net/forum?id=CLq5tqZ5SK", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "2;4;2;3", "excitement": "2;3;3;4", "reproducibility": "4;3;3;4", "correctness": "3;4;3;3", "rating_avg": 4.0, "confidence_avg": 2.75, "excitement_avg": 3.0, "reproducibility_avg": 3.5, "correctness_avg": 3.25, "replies_avg": 13, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "pradyumna-tambwekar-a5809a12b/;lakshita-dodeja-15399321b/;;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "CMm4w1A4Yd", "title": "A Deeper (Autoregressive) Approach to Non-Convergent Discourse Parsing", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Online social platforms provide a bustling arena for information-sharing and for multi-party discussions. Various frameworks for dialogic discourse parsing were developed and used for the processing of discussions and for predicting the productivity of a dialogue. However, most of these frameworks are not suitable for the analysis of contentious discussions that are commonplace in many online platforms. A novel multi-label scheme for contentious dialog parsing was recently introduced by Zakharov et al. (2021). While the schema is well developed, the computational approach they provide is both naive and inefficient, as a different model (architecture) using a different representation of the input, is trained for each of the 31 tags in the annotation scheme. Moreover, all their models assume full knowledge of label collocations and context, which is unlikely in any realistic setting. \n\nIn this work, we present a unified model for Non-Convergent Discourse Parsing that does not require any additional input other than the previous dialog utterances. We fine-tuned a RoBERTa backbone, combining embeddings of the utterance, the context and the labels through GRN layers and an asymmetric loss function.\nOverall, our model achieves results comparable with SOTA, without using label collocation and without training a unique architecture/model for each label. Our proposed architecture makes the labeling feasible at large scale, promoting the development of tools that deepen our understanding of discourse dynamics.", "keywords": "Discourse;dialogue;style", "primary_area": "", "supplementary_material": "", "author": "Oren Tsur;Yoav Tulpan", "authorids": "~Oren_Tsur1;~Yoav_Tulpan1", "gender": ";M", "homepage": "https://www.naslab.ise.bgu.ac.il/orentsur;", "dblp": "89/1576;", "google_scholar": "https://scholar.google.com/citations?hl=en;", "or_profile": "~Oren_Tsur1;~Yoav_Tulpan1", "aff": "Ben-Gurion University of the Negev;Ben-Gurion University of the Negev", "aff_domain": "bgu.ac.il;bgu.ac.il", "position": "Assistant Professor;MS student", "bibtex": "@inproceedings{\ntsur2023a,\ntitle={A Deeper (Autoregressive) Approach to Non-Convergent Discourse Parsing},\nauthor={Oren Tsur and Yoav Tulpan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=CMm4w1A4Yd}\n}", "github": "", "project": "", "reviewers": "brxV;MyNg;KWYb;Ks1Q", "site": "https://openreview.net/forum?id=CMm4w1A4Yd", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;3;2", "excitement": "3;3;3;3", "reproducibility": "3;3;3;5", "correctness": "4;3;3;3", "rating_avg": 3.0, "confidence_avg": 3.25, "excitement_avg": 3.0, "reproducibility_avg": 3.5, "correctness_avg": 3.25, "replies_avg": 13, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-6809-2234;", "linkedin": ";yoav-tulpan-401027208", "aff_unique_index": "0;0", "aff_unique_norm": "Ben-Gurion University of the Negev", "aff_unique_dep": "", "aff_unique_url": "https://www.bgu.ac.il", "aff_unique_abbr": "BGU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "id": "CO40wNIY5i", "title": "A Unified View of Evaluation Metrics for Structured Prediction", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We present a conceptual framework that unifies a variety of evaluation metrics for different structured prediction tasks (e.g. event and relation extraction, syntactic and semantic parsing). Our framework requires representing the outputs of these tasks as objects of certain data types, and derives metrics through matching of common substructures, possibly followed by normalization. We demonstrate how commonly used metrics for a number of tasks can be succinctly expressed by this framework, and show that new metrics can be naturally derived in a bottom-up way based on an output structure. We release a library that enables this derivation to create new metrics. Finally, we consider how specific characteristics of tasks motivate metric design decisions, and suggest possible modifications to existing metrics in line with those motivations.", "keywords": "structured prediction;information extraction;evaluation metrics;template extraction;n-ary relation extraction", "primary_area": "", "supplementary_material": "", "author": "Yunmo Chen;William Gantt;Tongfei Chen;Aaron Steven White;Benjamin Van Durme", "authorids": "~Yunmo_Chen1;~William_Gantt1;~Tongfei_Chen1;~Aaron_Steven_White1;~Benjamin_Van_Durme2", "gender": "M;M;M;M;", "homepage": "https://omnuy.me;https://wgantt.github.io/;http://cs.jhu.edu/~tongfei;http://aaronstevenwhite.io;", "dblp": "252/7831;277/0935;137/9630;188/5734;", "google_scholar": "V-g2Tx8AAAAJ;SpOIH2MAAAAJ;_OS1gScAAAAJ;R-ZVWNEAAAAJ;", "or_profile": "~Yunmo_Chen1;~William_Gantt1;~Tongfei_Chen1;~Aaron_Steven_White1;~Benjamin_Van_Durme2", "aff": "Johns Hopkins University;University of Rochester;Microsoft;University of Rochester;", "aff_domain": "jhu.edu;cs.rochester.edu;microsoft.com;rochester.edu;", "position": "PhD student;PhD student;Researcher;Assistant Professor;", "bibtex": "@inproceedings{\nchen2023a,\ntitle={A Unified View of Evaluation Metrics for Structured Prediction},\nauthor={Yunmo Chen and William Gantt and Tongfei Chen and Aaron Steven White and Benjamin Van Durme},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=CO40wNIY5i}\n}", "github": "", "project": "", "reviewers": "z6Hc;Aomu;J1er", "site": "https://openreview.net/forum?id=CO40wNIY5i", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;5", "excitement": "3;4;4", "reproducibility": "4;0;0", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 1.3333333333333333, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-9931-2861;;;", "linkedin": "yunmochen;will-gantt-9b8994220/;;aaronstevenwhite/;", "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Johns Hopkins University;University of Rochester;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://www.jhu.edu;https://www.rochester.edu;https://www.microsoft.com", "aff_unique_abbr": "JHU;U of R;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "CP1PLnFzbr", "title": "Context Compression for Auto-regressive Transformers with Sentinel Tokens", "track": "main", "status": "Short Main", "tldr": "", "abstract": "The quadratic complexity of the attention module makes it gradually become the bulk of compute in Transformer-based LLMs during generation. Moreover, the excessive key-value cache that arises when dealing with long inputs also brings severe issues on memory footprint and inference latency. In this work, we propose a plug-and-play approach that is able to incrementally compress the intermediate activation of a specified span of tokens into compact ones, thereby reducing both memory and computational cost when processing subsequent context. \nExperiments on both in-domain language modeling and zero-shot open-ended document generation demonstrate the advantage of our approach over sparse attention baselines in terms of fluency, n-gram matching, and semantic similarity. At last, we comprehensively profile the benefit of context compression on improving the system throughout. Code is available at \\url{https://github.com/DRSY/KV_Compression}.", "keywords": "context compression;key-value cache compression", "primary_area": "", "supplementary_material": "", "author": "Siyu Ren;Qi Jia;Kenny Q. Zhu", "authorids": "~Siyu_Ren1;~Qi_Jia3;~Kenny_Q._Zhu1", "gender": "M;F;M", "homepage": "https://drsy.github.io/;;http://www.cs.sjtu.edu.cn/~kzhu/", "dblp": ";69/1921-3;z/KennyQiliZhu", "google_scholar": "jkJDyrkAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=ZIRJ6lIAAAAJ", "or_profile": "~Siyu_Ren1;~Qi_Jia3;~Kenny_Q._Zhu1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;cs.sjtu.edu.cn", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nren2023context,\ntitle={Context Compression for Auto-regressive Transformers with Sentinel Tokens},\nauthor={Siyu Ren and Qi Jia and Kenny Q. Zhu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=CP1PLnFzbr}\n}", "github": "", "project": "", "reviewers": "BNcU;UZc8;dqav;xYfS", "site": "https://openreview.net/forum?id=CP1PLnFzbr", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;3;1;3", "excitement": "3;3;4;3", "reproducibility": "3;5;2;2", "correctness": "3;3;4;3", "rating_avg": 3.0, "confidence_avg": 2.75, "excitement_avg": 3.25, "reproducibility_avg": 3.0, "correctness_avg": 3.25, "replies_avg": 13, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-6104-7249;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "CPBEn5mGle", "title": "CQE: A Comprehensive Quantity Extractor", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Quantities are essential in documents to describe factual information. They are ubiquitous in application domains such as finance, business, medicine, and science in general. Compared to other information extraction approaches, interestingly only a few works exist that describe methods for a proper extraction and representation of quantities in text.\n In this paper, we present such a comprehensive quantity extraction framework from text data. It efficiently detects combinations of values and units, the behavior of a quantity (e.g., rising or falling), and the concept a quantity is associated with. Our framework makes use of dependency parsing and a dictionary of units, and it provides for a proper normalization and standardization of detected quantities. Using a novel dataset for evaluation, we show that our open source framework outperforms other systems and -- to the best of our knowledge -- is the first to detect concepts associated with identified quantities. The code and data underlying our framework are available at https://github.com/vivkaz/CQE.", "keywords": "quantities;quantity extraction;information extraction", "primary_area": "", "supplementary_material": "", "author": "Satya Almasian;Vivian Kazakova;Philipp G\u00f6ldner;Michael Gertz", "authorids": "~Satya_Almasian1;~Vivian_Kazakova1;~Philipp_G\u00f6ldner1;~Michael_Gertz1", "gender": ";F;;", "homepage": "https://dbs.ifi.uni-heidelberg.de/team/almasian/;;;https://dbs.ifi.uni-heidelberg.de/team/gertz/", "dblp": "198/5463;347/2459;;g/MichaelGertz.html", "google_scholar": "https://scholar.google.com/citations?hl=en;;;https://scholar.google.de/citations?user=F4eflRgAAAAJ", "or_profile": "~Satya_Almasian1;~Vivian_Kazakova1;~Philipp_G\u00f6ldner1;~Michael_Gertz1", "aff": ";Ruprecht-Karls-Universit\u00e4t Heidelberg;;Ruprecht-Karls-Universit\u00e4t Heidelberg", "aff_domain": ";uni-heidelberg.de;;uni-heidelberg.de", "position": ";Undergrad student;;Full Professor", "bibtex": "@inproceedings{\nalmasian2023cqe,\ntitle={{CQE}: A Comprehensive Quantity Extractor},\nauthor={Satya Almasian and Vivian Kazakova and Philipp G{\\\"o}ldner and Michael Gertz},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=CPBEn5mGle}\n}", "github": "", "project": "", "reviewers": "vmbW;H5PX;5wNU", "site": "https://openreview.net/forum?id=CPBEn5mGle", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;4", "excitement": "3;4;5", "reproducibility": "4;4;4", "correctness": "3;4;5", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 7, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-4530-6110", "linkedin": ";vivian-kazakova-04b393255?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3BuDlCX7qSQAiy1gtkhfXsYw%3D%3D;;michael-gertz/", "aff_unique_index": "0;0", "aff_unique_norm": "Ruprecht-Karls-Universit\u00e4t Heidelberg", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-heidelberg.de/", "aff_unique_abbr": "Uni Heidelberg", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "CQgmBmRBMb", "title": "Don\u2019t Add, don\u2019t Miss: Effective Content Preserving Generation from Pre-Selected Text Spans", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The recently introduced Controlled Text Reduction (CTR) task isolates the text generation step within typical summarization-style tasks. It does so by challenging models to generate coherent text conforming to pre-selected content within the input text (``highlights''). \n This framing enables increased modularity in summarization-like tasks, allowing to couple a single CTR model with various content-selection setups and modules. \n However, there are currently no reliable CTR models, while the performance of the existing baseline for the task is mediocre, falling short of practical utility.\n Here, we address this gap by introducing a high-quality, open-source CTR model that tackles two prior key limitations: inadequate enforcement of the content-preservation constraint, and suboptimal silver training data. \n Addressing these, we amplify the content-preservation constraint in both training, via RL, and inference, via a controlled decoding strategy. \n Further, we substantially improve the silver training data quality via GPT-4 distillation. \n Overall, pairing the distilled dataset with the highlight-adherence strategies yields marked gains over the current baseline, of up to 30 ROUGE-L points, providing a reliable CTR model for downstream use.", "keywords": "NLG;Controlled Generation;RL;Controlled Decoding;Distillation", "primary_area": "", "supplementary_material": "", "author": "Aviv Slobodkin;Avi Caciularu;Eran Hirsch;Ido Dagan", "authorids": "~Aviv_Slobodkin2;~Avi_Caciularu1;~Eran_Hirsch1;~Ido_Dagan1", "gender": "M;M;M;M", "homepage": "https://lovodkin93.github.io/;http://aviclu.github.io/;https://eranhirs.github.io/;http://u.cs.biu.ac.il/~dagan/", "dblp": "290/2100.html;https://dblp.uni-trier.de/pid/207/8509;302/4300.html;95/284", "google_scholar": "oAy77cgAAAAJ;https://scholar.google.co.il/citations?user=fPG_0aQAAAAJ;GPsTrDEAAAAJ;https://scholar.google.com.tw/citations?user=YzGAGtoAAAAJ", "or_profile": "~Aviv_Slobodkin2;~Avi_Caciularu1;~Eran_Hirsch1;~Ido_Dagan1", "aff": "Bar-Ilan University;Google;Bar-Ilan University;Bar-Ilan University", "aff_domain": "biu.ac.il;google.com;biu.ac.il;biu.ac.il", "position": "PhD student;Researcher;PhD student;Full Professor", "bibtex": "@inproceedings{\nslobodkin2023dont,\ntitle={Don{\\textquoteright}t Add, don{\\textquoteright}t Miss: Effective Content Preserving Generation from Pre-Selected Text Spans},\nauthor={Aviv Slobodkin and Avi Caciularu and Eran Hirsch and Ido Dagan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=CQgmBmRBMb}\n}", "github": "", "project": "", "reviewers": "vhDS;j7hm;AKMy", "site": "https://openreview.net/forum?id=CQgmBmRBMb", "pdf_size": 0, "rating": "3;3;3", "confidence": "1;3;3", "excitement": "4;2;4", "reproducibility": "2;3;3", "correctness": "4;3;4", "rating_avg": 3.0, "confidence_avg": 2.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "aviv-slobodkin-73926515a/;avicaciularu/;;", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Bar-Ilan University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.biu.ac.il;https://www.google.com", "aff_unique_abbr": "BIU;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Israel;United States" }, { "id": "CblASBV3d4", "title": "\"Are Your Explanations Reliable?\" Investigating the Stability of LIME in Explaining Text Classifiers by Marrying XAI and Adversarial Attack", "track": "main", "status": "Long Main", "tldr": "", "abstract": "LIME has emerged as one of the most commonly referenced tools in explainable AI (XAI) frameworks that is integrated into critical machine learning applications (e.g., healthcare and finance). However, its stability remains little explored, especially in the context of text data, due to the unique text-space constraints. To address these challenges, in this paper, we first evaluate the inherent instability of LIME on text data to establish a baseline, and then propose a novel algorithm XAIFooler to perturb text inputs and manipulate explanations that casts investigation on the stability of LIME as a text perturbation optimization problem. XAIFooler conforms to the constraints to preserve text semantics and original prediction with small perturbations, and introduces Rank-biased Overlap (RBO) as a key part to guide the optimization of XAIFooler that satisfies all the requirements for explanation similarity measure. Extensive experiments on real-world text datasets demonstrate that XAIFooler significantly outperforms all baselines by large margins in its ability to manipulate LIME's explanations with high semantic preservability.", "keywords": "Interpretability;Stability;Robustness;Explainability", "primary_area": "", "supplementary_material": "", "author": "Christopher Burger;Lingwei Chen;Thai Le", "authorids": "~Christopher_Burger1;~Lingwei_Chen1;~Thai_Le1", "gender": ";;", "homepage": ";https://lgchen.org/;https://lethaiq.github.io/tql3/", "dblp": ";169/7444;03/9889", "google_scholar": ";CnKs0mEAAAAJ;Fd8K7kAAAAAJ", "or_profile": "~Christopher_Burger1;~Lingwei_Chen1;~Thai_Le1", "aff": "University of Mississippi;Wright State University;University of Mississippi", "aff_domain": "olemiss.edu;wright.edu;olemiss.edu", "position": "MS student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nburger2023are,\ntitle={''Are Your Explanations Reliable?'' Investigating the Stability of {LIME} in Explaining Text Classifiers by Marrying {XAI} and Adversarial Attack},\nauthor={Christopher Burger and Lingwei Chen and Thai Le},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=CblASBV3d4}\n}", "github": "", "project": "", "reviewers": "pyA2;bxaW;BX5w", "site": "https://openreview.net/forum?id=CblASBV3d4", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;2;3", "excitement": "2;4;2", "reproducibility": "5;4;3", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0008-7942-3263;0000-0003-1550-6170;0000-0001-9632-6870", "linkedin": ";;", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Mississippi;Wright State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.olemiss.edu;https://www.wright.edu", "aff_unique_abbr": "UM;WSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "Cc5yhA1PrC", "title": "A Joint Matrix Factorization Analysis of Multilingual Representations", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We present an analysis tool based on joint matrix factorization for comparing latent representations of multilingual and monolingual models. An alternative to probing, this tool allows us to analyze multiple sets of representations in a joint manner. Using this tool, we study to what extent and how morphosyntactic features are reflected in the representations learned by multilingual pre-trained models. We conduct a large-scale empirical study of over 33 languages and 17 morphosyntactic categories. Our findings demonstrate variations in the encoding of morphosyntactic information across upper and lower layers, with category-specific differences influenced by language properties. Hierarchical clustering of the factorization outputs yields a tree structure that is related to phylogenetic trees manually crafted by linguists. Moreover, we find the factorization outputs exhibit strong associations with performance observed across different cross-lingual tasks. \nWe release our code to facilitate future research.", "keywords": "Representation analysis;Multilingual pre-trained models;Matrix factorization;Morphosyntactic features", "primary_area": "", "supplementary_material": "", "author": "Zheng Zhao;Yftah Ziser;Bonnie L. Webber;Shay B Cohen", "authorids": "~Zheng_Zhao2;~Yftah_Ziser1;~Bonnie_L._Webber1;~Shay_B_Cohen1", "gender": "M;M;F;M", "homepage": "http://www.inf.ed.ac.uk/people/students/Zheng_Zhao.html;https://yftah89.github.io/;;http://homepages.inf.ed.ac.uk/scohen", "dblp": "75/6680-5;188/6096.html;95/4733;04/5629", "google_scholar": "UO0MJeQAAAAJ;https://scholar.google.co.il/citations?user=37SMCrsAAAAJ;https://scholar.google.com/citations?hl=en;", "or_profile": "~Zheng_Zhao2;~Yftah_Ziser1;~Bonnie_L._Webber1;~Shay_B_Cohen1", "aff": "University of Edinburgh, University of Edinburgh;University of Edinburgh;Edinburgh University, University of Edinburgh;University of Edinburgh", "aff_domain": "ed.ac.uk;edinburgh.org;inf.ed.ac.uk;ed.ac.uk", "position": "PhD student;Postdoc;Emeritus;Reader", "bibtex": "@inproceedings{\nzhao2023a,\ntitle={A Joint Matrix Factorization Analysis of Multilingual Representations},\nauthor={Zheng Zhao and Yftah Ziser and Bonnie L. Webber and Shay B Cohen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Cc5yhA1PrC}\n}", "github": "", "project": "", "reviewers": "ETeL;3nrq;85Fw", "site": "https://openreview.net/forum?id=Cc5yhA1PrC", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;1;3", "excitement": "3;3;4", "reproducibility": "5;3;3", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0002-6228-9471;;0000-0003-4753-8353", "linkedin": ";;bonnie-webber-0655834/;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Edinburgh", "aff_unique_dep": "", "aff_unique_url": "https://www.ed.ac.uk", "aff_unique_abbr": "Edinburgh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "CdcdyN4cvL", "title": "Improving Multi-Criteria Chinese Word Segmentation through Learning Sentence Representation", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Recent Chinese word segmentation (CWS) models have shown competitive performance with pre-trained language models' knowledge. However, these models tend to learn the segmentation knowledge through in-vocabulary words rather than understanding the meaning of the entire context. To address this issue, we introduce a context-aware approach that incorporates unsupervised sentence representation learning over different dropout masks into the multi-criteria training framework. We demonstrate that our approach reaches state-of-the-art (SoTA) performance on F1 scores for six of the nine CWS benchmark datasets and out-of-vocabulary (OOV) recalls for eight of nine. Further experiments discover that substantial improvements can be brought with various sentence representation objectives.", "keywords": "Chinese Word Segmentation", "primary_area": "", "supplementary_material": "", "author": "Chun Yi Lin;Ying-Jia Lin;CHIA-JEN YEH;Yi-Ting Li;Ching Wen Yang;Hung-Yu Kao", "authorids": "~Chun_Yi_Lin1;~Ying-Jia_Lin1;~CHIA-JEN_YEH1;~Yi-Ting_Li1;~Ching_Wen_Yang1;~Hung-Yu_Kao1", "gender": "M;M;M;M;F;M", "homepage": ";https://mcps5601.github.io/about/;https://aidenzich.github.io/#/index;;;http://140.116.245.107/advisor.html", "dblp": ";257/6587;;;71/4577;64/5833.html", "google_scholar": "https://scholar.google.com.tw/citations?user=8I_uKDAAAAAJ;TM4JxJkAAAAJ;;;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com.tw/citations?user=X5Is2lAAAAAJ", "or_profile": "~Chun_Yi_Lin1;~Ying-Jia_Lin1;~CHIA-JEN_YEH1;~Yi-Ting_Li1;~Ching_Wen_Yang1;~Hung-Yu_Kao1", "aff": "National Cheng Kung University;National Cheng Kung University;National Cheng Kung University;National Cheng Kung University;National Cheng Kung University;CSIE", "aff_domain": "ncku.edu.tw;ncku.edu.tw;ncku.edu.tw;ncku.edu.tw;ncku.edu.tw;csie.ncku.edu.tw", "position": "MS student;PhD student;MS student;MS student;MS student;Full Professor", "bibtex": "@inproceedings{\nlin2023improving,\ntitle={Improving Multi-Criteria Chinese Word Segmentation through Learning Sentence Representation},\nauthor={Chun Yi Lin and Ying-Jia Lin and CHIA-JEN YEH and Yi-Ting Li and Ching Wen Yang and Hung-Yu Kao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=CdcdyN4cvL}\n}", "github": "", "project": "", "reviewers": "hrZu;9axb;kPXV", "site": "https://openreview.net/forum?id=CdcdyN4cvL", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "3;3;3", "reproducibility": "5;4;3", "correctness": "4;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-2582-2356;0000-0003-4347-0232;;;0000-0002-3825-9638;0000-0002-8890-8544", "linkedin": "%E6%9E%97-%E5%B3%BB%E6%AF%85-7ba67b268/;ying-jia-lin-0a1b1413b/;;yi-ting-li-38a0b4232/;chingwenyang-06102020/;", "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "National Cheng Kung University;College of Computer Science and Information Engineering", "aff_unique_dep": ";", "aff_unique_url": "https://www.ncku.edu.tw;", "aff_unique_abbr": "NCKU;CSIE", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Taiwan;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China;" }, { "id": "CfJiBuysQQ", "title": "CLEVR-Implicit: A Diagnostic Dataset for Implicit Reasoning in Referring Expression Comprehension", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recently, pre-trained vision-language (VL) models have achieved remarkable success in various cross-modal tasks, including referring expression comprehension (REC). These models are pre-trained on the large-scale image-text pairs to learn the alignment between words in textual descriptions and objects in the corresponding images and then fine-tuned on downstream tasks. However, the performance of VL models is hindered when dealing with implicit text, which describes objects through comparisons between two or more objects rather than explicitly mentioning them. This is because the models struggle to align the implicit text with the objects in the images. To address the challenge, we introduce CLEVR-Implicit, a dataset consisting of synthetic images and corresponding two types of implicit text for the REC task. Additionally, to enhance the performance of VL models on implicit text, we propose a method called Transforming Implicit text into Explicit text (TIE), which enables VL models to reason with the implicit text. TIE consists of two modules: (1) the prompt design module builds prompts for implicit text by adding masked tokens, and (2) the cloze procedure module fine-tunes the prompts by utilizing masked language modeling (MLM) to predict the explicit words with the implicit prompts. Experimental results on our dataset demonstrate a significant improvement of 37.94\\% in the performance of VL models on implicit text after employing our TIE method.", "keywords": "referring expression comprehension;implicit reasoning;prompt tuning", "primary_area": "", "supplementary_material": "", "author": "Jingwei Zhang;Xin Wu;Yi Cai", "authorids": "~Jingwei_Zhang12;~Xin_Wu4;~Yi_Cai1", "gender": "M;M;M", "homepage": "https://github.com/CloudZjw;;http://www2.scut.edu.cn/sse/2018/0615/c16788a270751/page.htm", "dblp": ";13/5235;58/3467-1.html", "google_scholar": ";https://scholar.google.com.hk/citations?user=hVyuW_8AAAAJ;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Jingwei_Zhang12;~Xin_Wu4;~Yi_Cai1", "aff": "South China University of Technology;South China University of Technology;South China University of Technology", "aff_domain": "scut.edu.cn;scut.edu.cn;scut.edu.cn", "position": "MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nzhang2023clevrimplicit,\ntitle={{CLEVR}-Implicit: A Diagnostic Dataset for Implicit Reasoning in Referring Expression Comprehension},\nauthor={Jingwei Zhang and Xin Wu and Yi Cai},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=CfJiBuysQQ}\n}", "github": "", "project": "", "reviewers": "rgpy;WAk9;Gmcn;avEK", "site": "https://openreview.net/forum?id=CfJiBuysQQ", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;3;3;4", "excitement": "4;4;3;3", "reproducibility": "4;3;4;3", "correctness": "4;4;4;2", "rating_avg": 4.0, "confidence_avg": 3.25, "excitement_avg": 3.5, "reproducibility_avg": 3.5, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-1767-789X", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "South China University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.scut.edu.cn", "aff_unique_abbr": "SCUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "CgAfbI4kGS", "title": "CompleQA: Benchmarking the Impacts of Knowledge Graph Completion Methods on Question Answering", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "How much success in Knowledge Graph Completion (KGC) would translate into the performance enhancement in downstream tasks is an important question that has not been studied in depth. In this paper, we introduce a novel benchmark, namely CompleQA, to comprehensively assess the influence of representative KGC methods on Knowledge Graph Question Answering (KGQA), one of the most important downstream applications. This benchmark includes a knowledge graph with 3 million triplets across 5 distinct domains, coupled with over 5000 question-answering pairs and a completion dataset that is well-aligned with these questions. \nOur evaluation of four well-known KGC methods in combination with two state-of-the-art KGQA systems shows that effective KGC can significantly mitigate the impact of knowledge graph incompleteness on question-answering performance. Surprisingly, we also find that the best-performing KGC method(s) does not necessarily lead to the best QA results, underscoring the need to consider downstream applications when doing KGC.", "keywords": "knowledge graph;link prediction;question answering", "primary_area": "", "supplementary_material": "", "author": "Donghan Yu;Yu Gu;Chenyan Xiong;Yiming Yang", "authorids": "~Donghan_Yu2;~Yu_Gu5;~Chenyan_Xiong1;~Yiming_Yang1", "gender": "M;M;M;F", "homepage": ";http://entslscheia.github.io;https://www.cs.cmu.edu/~cx/;http://www.cs.cmu.edu/~yiming/", "dblp": "204/0106;15/4208-16;18/10886;25/1666", "google_scholar": "KlwvYcEAAAAJ;c5RwjjcAAAAJ;E9BaEBYAAAAJ;MlZq4XwAAAAJ", "or_profile": "~Donghan_Yu2;~Yu_Gu5;~Chenyan_Xiong1;~Yiming_Yang1", "aff": "Carnegie Mellon University;Ohio State University;Microsoft Research;School of Computer Science, Carnegie Mellon University", "aff_domain": "cmu.edu;osu.edu;research.microsoft.com;cs.cmu.edu", "position": "PhD student;PhD student;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nyu2023compleqa,\ntitle={Comple{QA}: Benchmarking the Impacts of Knowledge Graph Completion Methods on Question Answering},\nauthor={Donghan Yu and Yu Gu and Chenyan Xiong and Yiming Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=CgAfbI4kGS}\n}", "github": "", "project": "", "reviewers": "81Bu;TkCf;kWfn;dwAq", "site": "https://openreview.net/forum?id=CgAfbI4kGS", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;4;4", "excitement": "2;4;3;2", "reproducibility": "4;4;4;4", "correctness": "2;4;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 2.75, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 6, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-8322-607X", "linkedin": ";;;yiming-yang-24100924/", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Carnegie Mellon University;Ohio State University;Microsoft", "aff_unique_dep": ";;Microsoft Research", "aff_unique_url": "https://www.cmu.edu;https://www.osu.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "CMU;OSU;MSR", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "Cib0JSAVwW", "title": "Language-Agnostic Bias Detection in Language Models with Bias Probing", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Pretrained language models (PLMs) are key components in NLP, but they contain strong social biases. Quantifying these biases is challenging because current methods focusing on fill-the-mask objectives are sensitive to slight changes in input. To address this, we propose a bias probing technique called LABDet, for evaluating social bias in PLMs with a robust and language-agnostic method. For nationality as a case study, we show that LABDet\t``surfaces'' nationality bias by training a classifier on top of a frozen PLM on non-nationality sentiment detection. We find consistent patterns of nationality bias across monolingual PLMs in six languages that align with historical and political context. We also show for English BERT that bias surfaced by LABDet correlates well with bias in the pretraining data; thus, our work is one of the few studies that directly links pretraining data to PLM behavior. Finally, we verify LABDet's reliability and applicability to different templates and languages through an extensive set of robustness checks. We publicly share our code and dataset in https://github.com/akoksal/LABDet.", "keywords": "bias detection;nationality bias;multilinguality", "primary_area": "", "supplementary_material": "", "author": "Abdullatif K\u00f6ksal;Omer Faruk Yalcin;Ahmet Akbiyik;M. Tahir Kilavuz;Anna Korhonen;Hinrich Schuetze", "authorids": "~Abdullatif_K\u00f6ksal1;~Omer_Faruk_Yalcin1;~Ahmet_Akbiyik1;~M._Tahir_Kilavuz1;~Anna_Korhonen1;~Hinrich_Schuetze3", "gender": ";M;;;;M", "homepage": "https://akoksal.com/;https://www.omerfyalcin.com/;;https://avesis.marmara.edu.tr/mkilavuz;https://sites.google.com/site/annakorhonen/;https://www.cis.uni-muenchen.de/schuetze/", "dblp": "245/9493;267/0225;;;14/6532;s/HinrichSchutze", "google_scholar": "fxf_uAwAAAAJ;SxJ94kgAAAAJ;;;https://scholar.google.co.uk/citations?user=SCoVoOYAAAAJ;", "or_profile": "~Abdullatif_K\u00f6ksal1;~Omer_Faruk_Yalcin1;~Ahmet_Akbiyik1;~M._Tahir_Kilavuz1;~Anna_Korhonen1;~Hinrich_Schuetze3", "aff": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;University of Massachusetts at Amherst;;Marmara University;University of Cambridge;Center for Information and Language Processing", "aff_domain": "lmu.de;umass.edu;;marmara.edu.tr;cam.ac.uk;lmu.de", "position": "PhD student;Lecturer;;Assistant Professor;Professor;Full Professor", "bibtex": "@inproceedings{\nk{\\\"o}ksal2023languageagnostic,\ntitle={Language-Agnostic Bias Detection in Language Models with Bias Probing},\nauthor={Abdullatif K{\\\"o}ksal and Omer Faruk Yalcin and Ahmet Akbiyik and M. Tahir Kilavuz and Anna Korhonen and Hinrich Schuetze},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Cib0JSAVwW}\n}", "github": "", "project": "", "reviewers": "W1HN;TUSA;qjcz", "site": "https://openreview.net/forum?id=Cib0JSAVwW", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "4;3;3", "reproducibility": "5;3;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": "abdullatifkoksal;ofyalcin/;;;anna-korhonen-534a9b5/;", "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;University of Massachusetts Amherst;Marmara University;University of Cambridge;Center for Information and Language Processing", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.lmu.de;https://www.umass.edu;https://www.marmara.edu.tr;https://www.cam.ac.uk;", "aff_unique_abbr": "LMU;UMass Amherst;MU;Cambridge;", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Amherst;Cambridge", "aff_country_unique_index": "0;1;2;3", "aff_country_unique": "Germany;United States;T\u00fcrkiye;United Kingdom;" }, { "id": "CihCvXPiEG", "title": "Re-weighting Tokens: A Simple and Effective Active Learning Strategy for Named Entity Recognition", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Active learning, a widely adopted technique for enhancing machine learning models in text and image classification tasks with limited annotation resources, has received relatively little attention in the domain of Named Entity Recognition (NER). The challenge of data imbalance in NER has hindered the effectiveness of active learning, as sequence labellers lack sufficient learning signals. To address these challenges, this paper presents a novel re-weighting-based active learning strategy that assigns dynamic smoothing weights to individual tokens. This adaptable strategy is compatible with various token-level acquisition functions and contributes to the development of robust active learners. Experimental results on multiple corpora demonstrate the substantial performance improvement achieved by incorporating our re-weighting strategy into existing acquisition functions, validating its practical efficacy. We will release our implementation upon the publication of this paper.", "keywords": "Named entity recognition;active learning", "primary_area": "", "supplementary_material": "", "author": "Haocheng Luo;Wei Tan;Ngoc Dang Nguyen;Lan Du", "authorids": "~Haocheng_Luo1;~Wei_Tan2;~Ngoc_Dang_Nguyen1;~Lan_Du1", "gender": ";M;M;M", "homepage": "http://monash.edu;https://davidtw999.github.io/;;https://research.monash.edu/en/persons/lan-du", "dblp": ";73/6520;333/0601;98/1504-2", "google_scholar": ";5c6UyZwAAAAJ;https://scholar.google.com.au/citations?user=iv5B-RMAAAAJ;https://scholar.google.com.au/citations?user=HtiTsgwAAAAJ", "or_profile": "~Haocheng_Luo1;~Wei_Tan2;~Ngoc_Dang_Nguyen1;~Lan_Du1", "aff": "Monash University;Monash University;Monash University;Monash University", "aff_domain": "monash.edu;monash.edu;monash.edu;monash.edu", "position": "MS student;PhD student;PhD student;Senior Lecturer", "bibtex": "@inproceedings{\nluo2023reweighting,\ntitle={Re-weighting Tokens: A Simple and Effective Active Learning Strategy for Named Entity Recognition},\nauthor={Haocheng Luo and Wei Tan and Ngoc Dang Nguyen and Lan Du},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=CihCvXPiEG}\n}", "github": "", "project": "", "reviewers": "xutz;Mdb7;9oCT", "site": "https://openreview.net/forum?id=CihCvXPiEG", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;5;5", "excitement": "3;3;4", "reproducibility": "5;3;5", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-9778-9970;;0000-0002-9925-0223", "linkedin": ";davidtw999;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Monash University", "aff_unique_dep": "", "aff_unique_url": "https://www.monash.edu", "aff_unique_abbr": "Monash", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Australia" }, { "id": "Ck3JPqoEeE", "title": "Linguistically Motivated Sign Language Segmentation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Sign language segmentation is a crucial task in sign language processing systems. It enables downstream tasks such as sign recognition, transcription, and machine translation.\nIn this work, we consider two kinds of segmentation: segmentation into individual signs and segmentation into \\textit{phrases}, larger units comprising several signs. We propose a novel approach to jointly model these two tasks.\n\nOur method is motivated by linguistic cues observed in sign language corpora. We replace the predominant IO tagging scheme with BIO tagging to account for continuous signing. Given that prosody plays a significant role in phrase boundaries, we explore the use of optical flow features. We also provide an extensive analysis of hand shapes and 3D hand normalization.\n\nWe find that introducing BIO tagging is necessary to model sign boundaries. \nExplicitly encoding prosody by optical flow improves segmentation in shallow models, but its contribution is negligible in deeper models.\nCareful tuning of the decoding algorithm atop the models further improves the segmentation quality.\n\nWe demonstrate that our final models generalize to out-of-domain video content in a different signed language, even under a zero-shot setting.\nWe observe that including optical flow and 3D hand normalization enhances the robustness of the model in this context.", "keywords": "sign language;sign language segmentation", "primary_area": "", "supplementary_material": "", "author": "Amit Moryossef;Zifan Jiang;Mathias M\u00fcller;Sarah Ebling;Yoav Goldberg", "authorids": "~Amit_Moryossef1;~Zifan_Jiang1;~Mathias_M\u00fcller1;~Sarah_Ebling1;~Yoav_Goldberg1", "gender": "M;M;M;F;M", "homepage": ";https://www.cl.uzh.ch/en/research-groups/accessibility/team/current-members/jiang.html;https://www.cl.uzh.ch/de/people/team/compling/mmueller.html;https://www.cl.uzh.ch/de/people/team/compling/ebling.html;https://www.cs.biu.ac.il/~yogo", "dblp": "236/5834;;07/9808-2;147/3601;68/5296", "google_scholar": "https://scholar.google.co.il/citations?user=Aaj_RBEAAAAJ;o22_kAgAAAAJ;kcpNn2EAAAAJ;;https://scholar.google.co.il/citations?user=0rskDKgAAAAJ", "or_profile": "~Amit_Moryossef1;~Zifan_Jiang1;~Mathias_M\u00fcller1;~Sarah_Ebling1;~Yoav_Goldberg1", "aff": "Bar-Ilan University;University of Zurich;University of Zurich;University of Zurich;Allen Institute for Artificial Intelligence", "aff_domain": "biu.ac.il;uzh.ch;cl.uzh.ch;uzh.ch;allenai.org", "position": "PhD student;PhD student;Postdoc;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nmoryossef2023linguistically,\ntitle={Linguistically Motivated Sign Language Segmentation},\nauthor={Amit Moryossef and Zifan Jiang and Mathias M{\\\"u}ller and Sarah Ebling and Yoav Goldberg},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Ck3JPqoEeE}\n}", "github": "", "project": "", "reviewers": "7cdW;ha7w;i9D9", "site": "https://openreview.net/forum?id=Ck3JPqoEeE", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;4;3", "excitement": "2;3;2", "reproducibility": "4;5;5", "correctness": "2;4;2", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 2.3333333333333335, "reproducibility_avg": 4.666666666666667, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-4403-4953;0000-0002-8248-199X;0000-0001-6511-5085;", "linkedin": ";;;sarah-ebling-491b52a1/;", "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "Bar-Ilan University;University of Zurich;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.biu.ac.il;https://www.unizh.ch;https://allenai.org", "aff_unique_abbr": "BIU;UZH;AI2", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;2", "aff_country_unique": "Israel;Switzerland;United States" }, { "id": "CkvfJdb7mw", "title": "Cultural Compass: Predicting Transfer Learning Success in Offensive Language Detection with Cultural Features", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The increasing ubiquity of language technology necessitates a shift towards considering cultural diversity in the machine learning realm, particularly for subjective tasks that rely heavily on cultural nuances, such as Offensive Language Detection (OLD). \nCurrent understanding underscores that these tasks are substantially influenced by cultural values, however, a notable gap exists in determining if cultural features can accurately predict the success of cross-cultural transfer learning for such subjective tasks. \nAddressing this, our study delves into the intersection of cultural features and transfer learning effectiveness. The findings reveal that cultural value surveys indeed possess a predictive power for cross-cultural transfer learning success in OLD tasks, and that it can be further improved using offensive word distance. \nBased on these results, we advocate for the integration of cultural information into datasets. \nAdditionally, we recommend leveraging data sources rich in cultural information, such as surveys, to enhance cultural adaptability. \nOur research signifies a step forward in the quest for more inclusive, culturally sensitive language technologies.", "keywords": "culture;transfer learning;offensive language detection", "primary_area": "", "supplementary_material": "", "author": "Li Zhou;Antonia Karamolegkou;Wenyu Chen;Daniel Hershcovich", "authorids": "~Li_Zhou4;~Antonia_Karamolegkou1;~Wenyu_Chen3;~Daniel_Hershcovich1", "gender": "F;F;M;M", "homepage": "https://lizhou21.github.io/;https://antoniakrm.github.io/;;http://danielhers.github.io/", "dblp": ";294/2937;55/6538;145/9324", "google_scholar": "https://scholar.google.com.hk/citations?user=BLWhoYcAAAAJ;hibFL4QAAAAJ;;479qIucAAAAJ", "or_profile": "~Li_Zhou4;~Antonia_Karamolegkou1;~Wenyu_Chen3;~Daniel_Hershcovich1", "aff": "University of Electronic Science and Technology of China;University of Copenhagen;University of Electronic Science and Technology of China;University of Copenhagen", "aff_domain": "uestc.edu.cn;diku.dk;uestc.edu.cn;ku.dk", "position": "PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhou2023cultural,\ntitle={Cultural Compass: Predicting Transfer Learning Success in Offensive Language Detection with Cultural Features},\nauthor={Li Zhou and Antonia Karamolegkou and Wenyu Chen and Daniel Hershcovich},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=CkvfJdb7mw}\n}", "github": "", "project": "", "reviewers": "cZgm;miiQ;CaTP", "site": "https://openreview.net/forum?id=CkvfJdb7mw", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;4", "excitement": "2;3;4", "reproducibility": "3;3;3", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-6458-0986;0000-0002-9933-8014;0000-0002-3966-8708", "linkedin": ";;;danielhershcovich", "aff_unique_index": "0;1;0;1", "aff_unique_norm": "University of Electronic Science and Technology of China;University of Copenhagen", "aff_unique_dep": ";", "aff_unique_url": "https://www.uestc.edu.cn;https://www.ku.dk", "aff_unique_abbr": "UESTC;UCPH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "China;Denmark" }, { "id": "CluDBdRhUp", "title": "Probing Representations for Document-level Event Extraction", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "The probing classi\ufb01ers framework has been employed for interpreting deep neural network models for a variety of natural language processing (NLP) applications. Studies, however, have largely focused on sentencelevel NLP tasks. This work is the \ufb01rst to apply the probing paradigm to representations learned for document-level information extraction (IE). We designed eight embedding probes to analyze surface, semantic, and event-understanding capabilities relevant to document-level event extraction. We apply them to the representations acquired by learning models from three different LLM-based document-level IE approaches on a standard dataset. We found that trained encoders from these models yield embeddings that can modestly improve argument detections and labeling but only slightly enhance event-level tasks, albeit trade-offs in information helpful for coherence and event-type prediction. We further found that encoder models struggle with document length and cross-sentence discourse.", "keywords": "Information Extraction;Interpretability;Embedding;Probing;Document-Level Information Extraction", "primary_area": "", "supplementary_material": "", "author": "Barry Wang;Xinya Du;Claire Cardie", "authorids": "~Barry_Wang1;~Xinya_Du1;~Claire_Cardie1", "gender": "M;M;F", "homepage": "https://BarryW.XYZ;https://xinyadu.github.io;https://www.cs.cornell.edu/home/cardie/", "dblp": "320/5158;200/8114;c/ClaireCardie", "google_scholar": "2S7N5bQAAAAJ;R-lKQqkAAAAJ;ex9BQiIAAAAJ", "or_profile": "~Barry_Wang1;~Xinya_Du1;~Claire_Cardie1", "aff": "Cornell University;University of Texas at Dallas;Cornell University", "aff_domain": "cornell.edu;utdallas.edu;cornell.edu", "position": "MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nwang2023probing,\ntitle={Probing Representations for Document-level Event Extraction},\nauthor={Barry Wang and Xinya Du and Claire Cardie},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=CluDBdRhUp}\n}", "github": "", "project": "", "reviewers": "FqJd;QFh8;mJYE", "site": "https://openreview.net/forum?id=CluDBdRhUp", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;5;4", "excitement": "2;3;3", "reproducibility": "3;4;5", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0008-3810-8494;;", "linkedin": "barry-w/;;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Cornell University;University of Texas at Dallas", "aff_unique_dep": ";", "aff_unique_url": "https://www.cornell.edu;https://www.utdallas.edu", "aff_unique_abbr": "Cornell;UT Dallas", "aff_campus_unique_index": "1", "aff_campus_unique": ";Dallas", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "Cn3HNSzh14", "title": "Controlling Pre-trained Language Models for Grade-Specific Text Simplification", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Text simplification systems rewrite text to make it more readable while preserving its content. However, what makes a text easy to read depends on the intended readers. Recent work has shown that pre-trained language models can simplify text using a wealth of techniques to control output simplicity, ranging from specifying only the desired reading grade level, to directly specifying low-level edit operations. Yet it remains unclear how to set these control parameters in practice. Existing approaches set them at the corpus level, disregarding the complexity of individual inputs and considering only one level of output complexity. In this work, we conduct an empirical study to understand how different control mechanisms impact the adequacy and simplicity of text simplification systems. Based on these insights, we introduce a simple method that predicts the edit operations required for simplifying a text for a specific grade level on an instance-per-instance basis. This approach improves the quality of the simplified outputs over corpus-level search-based heuristics.", "keywords": "Controllable Text generation;Text Simplification", "primary_area": "", "supplementary_material": "", "author": "Sweta Agrawal;Marine Carpuat", "authorids": "~Sweta_Agrawal1;~Marine_Carpuat1", "gender": "F;F", "homepage": "https://sweta20.github.io/;http://www.cs.umd.edu/~marine/", "dblp": "210/7863.html;71/1827", "google_scholar": "Avsw9IkAAAAJ;iPAX6jcAAAAJ", "or_profile": "~Sweta_Agrawal1;~Marine_Carpuat1", "aff": "University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "umd.edu;umd.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nagrawal2023controlling,\ntitle={Controlling Pre-trained Language Models for Grade-Specific Text Simplification},\nauthor={Sweta Agrawal and Marine Carpuat},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Cn3HNSzh14}\n}", "github": "", "project": "", "reviewers": "Sk1b;jeYZ;ctbg;ydY4", "site": "https://openreview.net/forum?id=Cn3HNSzh14", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;3;4;4", "excitement": "3;3;4;4", "reproducibility": "3;4;3;3", "correctness": "2;3;4;4", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 3.5, "reproducibility_avg": 3.25, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "University of Maryland", "aff_unique_dep": "", "aff_unique_url": "https://www/umd.edu", "aff_unique_abbr": "UMD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "College Park", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "CnLpDkgnCn", "title": "Mandarin classifier systems optimize to accommodate communicative pressures", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Previous work on noun classification implies that gender systems are inherently optimized to accommodate communicative pressures on human language learning and processing (Dye. et al 2017, 2018). They state that languages make use of either grammatical (e.g., gender) or probabilistic (pre-nominal modifiers) to smoothe the entropy of nouns in context. We show that even languages that are considered genderless, like Mandarin Chinese, possess a noun classification device that plays the same functional role as gender markers. Based on close to 1M Mandarin noun phrases extracted from the Leipzig Corpora Collection (Goldhahn et al. 2012) and their corresponding fastText embeddings (Bojanowski et al. 2016), we show that noun-classifier combinations are sensitive to same frequency, similarity, and co-occurrence interactions that structure gender systems. We also present the first study of the effects of the interaction between grammatical and probabilisitic noun classification.", "keywords": "Mandarin Chinese;classifiers;noun classes;noun class processing;word embeddings;mutual information;GAM", "primary_area": "", "supplementary_material": "", "author": "Yamei Wang;G\u00e9raldine Walther", "authorids": "~Yamei_Wang1;~G\u00e9raldine_Walther1", "gender": "F;F", "homepage": "https://english.gmu.edu/people/ywang78;https://linguistics.gmu.edu/people/gwalthe", "dblp": ";58/8156.html", "google_scholar": ";fMsDs2oAAAAJ", "or_profile": "~Yamei_Wang1;~G\u00e9raldine_Walther1", "aff": "George Mason University;George Mason University", "aff_domain": "gmu.edu;gmu.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwang2023mandarin,\ntitle={Mandarin classifier systems optimize to accommodate communicative pressures},\nauthor={Yamei Wang and G{\\'e}raldine Walther},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=CnLpDkgnCn}\n}", "github": "", "project": "", "reviewers": "pvuj;igeR;DR8p", "site": "https://openreview.net/forum?id=CnLpDkgnCn", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "2;3;4", "reproducibility": "3;3;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-8368-7300", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "George Mason University", "aff_unique_dep": "", "aff_unique_url": "https://www.gmu.edu", "aff_unique_abbr": "GMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "CoEuk8SNI1", "title": "Enhancing Emotion Recognition in Conversation via Multi-view Feature Alignment and Memorization", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Emotion recognition in conversation (ERC) has attracted increasing attention in natural language processing community. Previous work commonly first extract semantic-view features via fine-tuning PLMs, then models context-view features based on the obtained semantic-view features by various graph neural networks. However, it is difficult to fully model interaction between utterances simply through a graph neural network and the features at semantic-view and context-view are not well aligned. Moreover, the previous parametric learning paradigm struggle to learn the patterns of tail class given fewer instances. To this end, we treat the pre-trained conversation model as a prior knowledge base and from which we elicit correlations between utterances by a probing procedure. And we adopt supervised contrastive learning to align semantic-view and context-view features, these two views of features work together in a complementary manner, contributing to ERC from distinct perspectives. Meanwhile, we propose a new semi-parametric paradigm of inferencing through memorization to solve the recognition problem of tail class samples. We consistently achieve state-of-the-art results on four widely used benchmarks. Extensive experiments demonstrate the effectiveness of our proposed multi-view feature alignment and memorization.", "keywords": "Emotion Recognition in Conversation;Multi-view Feature Alignment;Memorization", "primary_area": "", "supplementary_material": "", "author": "Guiyang Hou;Yongliang Shen;Wenqi Zhang;Wei Xue;Weiming Lu", "authorids": "~Guiyang_Hou1;~Yongliang_Shen1;~Wenqi_Zhang2;~Wei_Xue6;~Weiming_Lu1", "gender": ";M;;;", "homepage": ";;;;", "dblp": "362/8551;221/5612-1.html;;;", "google_scholar": ";UT3NzFAAAAAJ;;;", "or_profile": "~Guiyang_Hou1;~Yongliang_Shen1;~Wenqi_Zhang2;~Wei_Xue6;~Weiming_Lu1", "aff": ", Tsinghua University;;;;", "aff_domain": "cs.tsinghua.edu.cn;;;;", "position": "Intern;;;;", "bibtex": "@inproceedings{\nhou2023enhancing,\ntitle={Enhancing Emotion Recognition in Conversation via Multi-view Feature Alignment and Memorization},\nauthor={Guiyang Hou and Yongliang Shen and Wenqi Zhang and Wei Xue and Weiming Lu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=CoEuk8SNI1}\n}", "github": "", "project": "", "reviewers": "oCgX;Gm2P;NUwX", "site": "https://openreview.net/forum?id=CoEuk8SNI1", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;3", "excitement": "3;3;4", "reproducibility": "4;3;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 9, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;;", "aff_unique_index": "0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "Coh1A4iSsl", "title": "Revisiting Sparse Retrieval for Few-shot Entity Linking", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Entity linking aims to link ambiguous mentions to their corresponding entities in a knowledge base.\nOne of the key challenges comes from insufficient labeled data for specific domains. \nAlthough dense retrievers have achieved excellent performance on several benchmarks, their performance decreases significantly when only a limited amount of in-domain labeled data is available.\nIn such few-shot setting, we revisit the sparse retrieval method, and propose an ELECTRA-based keyword extractor to denoise the mention context and construct a better query expression.\nFor training the extractor, we propose a distant supervision method to automatically generate training data based on overlapping tokens between mention contexts and entity descriptions.\nExperimental results on the ZESHEL dataset demonstrate that the proposed method outperforms state-of-the-art models by a significant margin across all test domains, showing the effectiveness of keyword-enhanced sparse retrieval.", "keywords": "entity linking;sparse retrieval", "primary_area": "", "supplementary_material": "", "author": "Yulin Chen;Zhenran Xu;Baotian Hu;Min Zhang", "authorids": "~Yulin_Chen3;~Zhenran_Xu1;~Baotian_Hu1;~Min_Zhang9", "gender": "M;;M;M", "homepage": "https://github.com/LukeChen-go;;;https://zhangmin-nlp-ai.github.io/", "dblp": ";322/2310;155/1902;83/5342-5", "google_scholar": "AtlFU3kAAAAJ;1m5X_28AAAAJ;5NiJ1VoAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Yulin_Chen3;~Zhenran_Xu1;~Baotian_Hu1;~Min_Zhang9", "aff": "Harbin Institute of Technology(Shenzhen);Harbin Institute of Technology, Shenzhen;Harbin Institute of Technology, Shenzhen;Harbin Institute of Technology, Shenzhen", "aff_domain": "stu.hit.edu.cn;hit.edu.cn;hhit.edu.cn;hit.edu.cn", "position": "Undergrad student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nchen2023revisiting,\ntitle={Revisiting Sparse Retrieval for Few-shot Entity Linking},\nauthor={Yulin Chen and Zhenran Xu and Baotian Hu and Min Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Coh1A4iSsl}\n}", "github": "", "project": "", "reviewers": "paUf;BHTM;2Pme", "site": "https://openreview.net/forum?id=Coh1A4iSsl", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "4;4;4", "reproducibility": "4;3;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-5536-806X;0000-0001-7490-684X;", "linkedin": ";zhenran-xu/;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Harbin Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://en.hhit.edu.cn/", "aff_unique_abbr": "HIT", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Shenzhen", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "CsCRTvEZg1", "title": "MISCA: A Joint Model for Multiple Intent Detection and Slot Filling with Intent-Slot Co-Attention", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The research study of detecting multiple intents and filling slots is becoming more popular because of its relevance to complicated real-world situations. Recent advanced approaches, which are joint models based on graphs, might still face two potential issues: (i) the uncertainty introduced by constructing graphs based on preliminary intents and slots, which may transfer intent-slot correlation information to incorrect label node destinations, and (ii) direct incorporation of multiple intent labels for each token w.r.t. token-level intent voting might potentially lead to incorrect slot predictions, thereby hurting the overall performance. To address these two issues, we propose a joint model named MISCA. Our MISCA introduces an intent-slot co-attention mechanism and an underlying layer of label attention mechanism. These mechanisms enable MISCA to effectively capture correlations between intents and slot labels, eliminating the need for graph construction. They also facilitate the transfer of correlation information in both directions: from intents to slots and from slots to intents, through multiple levels of label-specific representations, without relying on token-level intent information. Experimental results show that MISCA outperforms previous models, achieving new state-of-the-art overall accuracy performances on two benchmark datasets MixATIS and MixSNIPS. This highlights the effectiveness of our attention mechanisms.", "keywords": "Multiple Intent Detection;Slot Filling;Intent-Slot Co-Attention;Label Attention", "primary_area": "", "supplementary_material": "", "author": "Thinh Pham;Tran Chi;Dat Quoc Nguyen", "authorids": "~Thinh_Pham2;~Tran_Chi1;~Dat_Quoc_Nguyen1", "gender": ";M;", "homepage": ";;http://datquocnguyen.github.io", "dblp": ";;23/9125", "google_scholar": ";;HVl7vyEAAAAJ", "or_profile": "~Thinh_Pham2;~Tran_Chi1;~Dat_Quoc_Nguyen1", "aff": ";Hanoi University of Science and Technology;VinAI Research, Vietnam", "aff_domain": ";hust.edu.vn;vinai.io", "position": ";Undergrad student;Senior Research Scientist", "bibtex": "@inproceedings{\npham2023misca,\ntitle={{MISCA}: A Joint Model for Multiple Intent Detection and Slot Filling with Intent-Slot Co-Attention},\nauthor={Thinh Pham and Tran Chi and Dat Quoc Nguyen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=CsCRTvEZg1}\n}", "github": "", "project": "", "reviewers": "wfy7;beYN;eHBx", "site": "https://openreview.net/forum?id=CsCRTvEZg1", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "2;3;3", "reproducibility": "3;5;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 12, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";chi-tran-68127a222/;", "aff_unique_index": "0;1", "aff_unique_norm": "Hanoi University of Science and Technology;VinAI Research", "aff_unique_dep": ";", "aff_unique_url": "https://www.hust.edu.vn;https://www.vin.ai", "aff_unique_abbr": "HUST;VinAI", "aff_campus_unique_index": "0", "aff_campus_unique": "Hanoi;", "aff_country_unique_index": "0;0", "aff_country_unique": "Vietnam" }, { "id": "Cu4Jn4Xt22", "title": "Joint Geometrical and Statistical Domain Adaptation for Cross-domain Code Vulnerability Detection", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In code vulnerability detection tasks, a detector trained on a label-rich source domain fails to provide accurate prediction on new or unseen target domains due to the lack of labeled training data on target domains. Previous studies mainly utilize domain adaptation to perform cross-domain vulnerability detection. But they ignore the negative effect of private semantic characteristics of the target domain for domain alignment, which easily causes the problem of negative transfer. In addition, these methods forcibly reduce the distribution discrepancy between domains and do not take into account the interference of irrelevant target instances for distributional domain alignment, which leads to the problem of excessive alignment. To address the above issues, we propose a novel cross-domain code vulnerability detection framework named MNCRI. Specifically, we introduce mutual nearest neighbor contrastive learning to align the source domain and target domain geometrically, which could align the common semantic characteristics of two domains and separate out the private semantic characteristics of each domain. Furthermore, we introduce an instance re-weighting scheme to alleviate the problem of excessive alignment. This scheme dynamically assign different weights to instances, reducing the contribution of irrelevant instances so as to achieve better domain alignment. Finally, extensive experiments demonstrate that MNCRI significantly outperforms state-of-the-art cross-domain code vulnerability detection methods by a large margin.", "keywords": "Contrastive Learning;Code Vulnerability;Cross-domain Detection", "primary_area": "", "supplementary_material": "", "author": "Qianjin Du;Shiji Zhou;Xiaohui Kuang;Gang Zhao;Jidong Zhai", "authorids": "~Qianjin_Du2;~Shiji_Zhou1;~Xiaohui_Kuang1;~Gang_Zhao6;~Jidong_Zhai1", "gender": ";M;M;M;M", "homepage": "http://www.duqlk.com;https://arnoldshijizhou.github.io;https://ieeexplore.ieee.org/author/37543548800;https://dblp.org/pid/73/860.html;http://pacman.cs.tsinghua.edu.cn/~zjd/", "dblp": ";294/8684;;;", "google_scholar": ";Do5jf8oAAAAJ;;;", "or_profile": "~Qianjin_Du2;~Shiji_Zhou1;~Xiaohui_Kuang1;~Gang_Zhao6;~Jidong_Zhai1", "aff": "Tsinghua University;Tsinghua University;;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;mails.tsinghua.edu.cn;;mail.tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;;PhD student;Associate Professor", "bibtex": "@inproceedings{\ndu2023joint,\ntitle={Joint Geometrical and Statistical Domain Adaptation for Cross-domain Code Vulnerability Detection},\nauthor={Qianjin Du and Shiji Zhou and Xiaohui Kuang and Gang Zhao and Jidong Zhai},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Cu4Jn4Xt22}\n}", "github": "", "project": "", "reviewers": "HWdy;f7o6;hFha", "site": "https://openreview.net/forum?id=Cu4Jn4Xt22", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "3;3;3", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0000-0677-7396;;;", "linkedin": ";shiji-zhou-05b766ba/;;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "CuI1xfhxaJ", "title": "Question Answering as Programming for Solving Time-Sensitive Questions", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Question answering plays a pivotal role in human daily life because it involves our acquisition of knowledge about the world. However, due to the dynamic and ever-changing nature of real-world facts, the answer can be completely different when the time constraint in the question changes. Recently, Large Language Models (LLMs) have shown remarkable intelligence in question answering, while our experiments reveal that the aforementioned problems still pose a significant challenge to existing LLMs. This can be attributed to the LLMs' inability to perform rigorous reasoning based on surface-level text semantics. To overcome this limitation, rather than requiring LLMs to directly answer the question, we propose a novel approach where we reframe the $\\textbf{Q}$uestion $\\textbf{A}$nswering task $\\textbf{a}$s $\\textbf{P}$rogramming ($\\textbf{QAaP}$). Concretely, by leveraging modern LLMs' superior capability in understanding both natural language and programming language, we endeavor to harness LLMs to represent diversely expressed text as well-structured code and select the best matching answer from multiple candidates through programming. We evaluate our QAaP framework on several time-sensitive question answering datasets and achieve decent improvement, up to $14.5$% over strong baselines.", "keywords": "large language models;reasoning;question answering", "primary_area": "", "supplementary_material": "", "author": "Xinyu Zhu;Cheng Yang;Bei Chen;Siheng Li;Jian-Guang Lou;Yujiu Yang", "authorids": "~Xinyu_Zhu2;~Cheng_Yang7;~Bei_Chen3;~Siheng_Li1;~Jian-Guang_Lou1;~Yujiu_Yang2", "gender": ";;F;M;M;M", "homepage": ";;http://ml.cs.tsinghua.edu.cn/~beichen/;;https://www.microsoft.com/en-us/research/people/jlou/;https://sites.google.com/view/iigroup-thu", "dblp": ";;;312/9450;37/1917;30/3847", "google_scholar": ";;Po65v_MAAAAJ;;alDxINIAAAAJ;4gH3sxsAAAAJ", "or_profile": "~Xinyu_Zhu2;~Cheng_Yang7;~Bei_Chen3;~Siheng_Li1;~Jian-Guang_Lou1;~Yujiu_Yang2", "aff": ";;Microsoft;Tsinghua University;Microsoft Research Asia;Tsinghua University", "aff_domain": ";;microsoft.com;tsinghua.edu.cn;microsoft.com;tsinghua.edu.cn", "position": ";;Researcher;MS student;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nzhu2023question,\ntitle={Question Answering as Programming for Solving Time-Sensitive Questions},\nauthor={Xinyu Zhu and Cheng Yang and Bei Chen and Siheng Li and Jian-Guang Lou and Yujiu Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=CuI1xfhxaJ}\n}", "github": "", "project": "", "reviewers": "LQzX;NXcU;PAqi", "site": "https://openreview.net/forum?id=CuI1xfhxaJ", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "3;4;4", "reproducibility": "3;3;3", "correctness": "3;3;3", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0002-6427-1024", "linkedin": ";;;;;", "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Microsoft;Tsinghua University", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://www.tsinghua.edu.cn", "aff_unique_abbr": "Microsoft;THU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;China" }, { "id": "Cx5vVkpsOY", "title": "Pragmatics in Language Grounding: Phenomena, Tasks, and Modeling Approaches", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "People rely heavily on context to enrich meaning beyond what is literally said, enabling concise but effective communication. To interact successfully and naturally with people, user-facing artificial intelligence systems will require similar skills in pragmatics: relying on various types of context --- from shared linguistic goals and conventions, to the visual and embodied world --- to use language effectively.\n\nWe survey existing grounded settings and pragmatic modeling approaches and analyze how the task goals, environmental contexts, and communicative affordances in each work enrich linguistic meaning. We present recommendations for future grounded task design to naturally elicit pragmatic phenomena, and suggest directions that focus on a broader range of communicative contexts and affordances.", "keywords": "grounding;pragmatics;multimodality;survey", "primary_area": "", "supplementary_material": "", "author": "Daniel Fried;Nicholas Tomlin;Jennifer Hu;Roma Patel;Aida Nematzadeh", "authorids": "~Daniel_Fried1;~Nicholas_Tomlin1;~Jennifer_Hu1;~Roma_Patel1;~Aida_Nematzadeh1", "gender": "M;M;;F;", "homepage": "https://dpfried.github.io/;https://people.eecs.berkeley.edu/~nicholas_tomlin/;https://jennhu.github.io/;http://cs.brown.edu/people/rpatel59/;http://www.aidanematzadeh.me/", "dblp": "117/4804;;217/1862;168/1595;153/9556", "google_scholar": "sJDqACEAAAAJ;zV5vhUcAAAAJ;;16OCMAQAAAAJ;FWJZYMYAAAAJ", "or_profile": "~Daniel_Fried1;~Nicholas_Tomlin1;~Jennifer_Hu1;~Roma_Patel1;~Aida_Nematzadeh1", "aff": "Carnegie Mellon University;University of California, Berkeley;Massachusetts Institute of Technology;Brown University;Google Deepmind", "aff_domain": "cmu.edu;berkeley.edu;mit.edu;brown.edu;deepmind.com", "position": "Assistant Professor;PhD student;PhD student;PhD student;Researcher", "bibtex": "@inproceedings{\nfried2023pragmatics,\ntitle={Pragmatics in Language Grounding: Phenomena, Tasks, and Modeling Approaches},\nauthor={Daniel Fried and Nicholas Tomlin and Jennifer Hu and Roma Patel and Aida Nematzadeh},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Cx5vVkpsOY}\n}", "github": "", "project": "", "reviewers": "3CNV;tVhy;ED4y", "site": "https://openreview.net/forum?id=Cx5vVkpsOY", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;5;5", "excitement": "5;4;2", "reproducibility": "", "correctness": "4;3;1", "rating_avg": 4.0, "confidence_avg": 5.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 0, "correctness_avg": 2.6666666666666665, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-4075-6876;;", "linkedin": ";;;;", "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Carnegie Mellon University;University of California, Berkeley;Massachusetts Institute of Technology;Brown University;DeepMind", "aff_unique_dep": ";;;;DeepMind", "aff_unique_url": "https://www.cmu.edu;https://www.berkeley.edu;https://web.mit.edu;https://www.brown.edu;https://deepmind.com", "aff_unique_abbr": "CMU;UC Berkeley;MIT;Brown;DeepMind", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "CyDf6Q619o", "title": "Enabling Unsupervised Neural Machine Translation with Word-level Visual Representations", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Unsupervised neural machine translation has recently made remarkable strides, achieving impressive results with the exclusive use of monolingual corpora. Nonetheless, these methods still exhibit fundamental flaws, such as confusing similar words. A straightforward remedy to rectify this drawback is to employ bilingual dictionaries, however, high-quality bilingual dictionaries can be costly to obtain. To overcome this limitation, we propose a method that incorporates images at the word level to augment the lexical mappings. Specifically, our method inserts visual representations into the model, modifying the corresponding embedding layer information. Besides, a visible matrix is adopted to isolate the impact of images on other unrelated words. Experiments on the Multi30k dataset with over 300,000 self-collected images validate the effectiveness in generating more accurate word translation, achieving an improvement of up to $+$2.81 BLEU score, which is comparable or even superior to using bilingual dictionaries.", "keywords": "Unsupervised Machine Translation;Cross-modal Machine Translation;Word-level Image", "primary_area": "", "supplementary_material": "", "author": "Chengpeng Fu;Xiaocheng Feng;Yichong Huang;Wenshuai Huo;Hui Wang;Bing Qin;Ting Liu", "authorids": "~Chengpeng_Fu2;~Xiaocheng_Feng1;~Yichong_Huang1;~Wenshuai_Huo1;~Hui_Wang13;~Bing_Qin2;~Ting_Liu2", "gender": "M;M;M;M;;M;M", "homepage": "http://ir.hit.edu.cn/~xcfeng/;https://ychuang.netlify.app/;;https://openi.pcl.ac.cn;http://ir.hit.edu.cn/~qinb;;", "dblp": ";291/4211;362/8703;39/721-73;86/5934.html;52/5150-1;255/6333", "google_scholar": "Xu8NbhYAAAAJ;e0H1eqEAAAAJ;D8Xg7Q8AAAAJ;;LKnCub0AAAAJ;zyMJ1V0AAAAJ;nJpQm5EAAAAJ", "or_profile": "~Xiaocheng_Feng1;~Yichong_Huang1;~Wenshuai_Huo1;~Hui_Wang13;~Bing_Qin2;~Ting_Liu2;~chengpeng_fu1", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;Cloud Computing;Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;hit.edu.cn;hit.edu.cn;pcl.ac.cn;hit.edu.cn;hit.edu.cn;hit.edu.cn", "position": "Associate Professor;PhD student;PhD student;Full Professor;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nfu2023enabling,\ntitle={Enabling Unsupervised Neural Machine Translation with Word-level Visual Representations},\nauthor={Chengpeng Fu and Xiaocheng Feng and Yichong Huang and Wenshuai Huo and Hui Wang and Bing Qin and Ting Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=CyDf6Q619o}\n}", "github": "", "project": "", "reviewers": "Latj;jZHw;eGcr", "site": "https://openreview.net/forum?id=CyDf6Q619o", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "4;4;3", "reproducibility": "4;5;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0005-4004-8564;;;0000-0002-2543-5604;;", "linkedin": ";;;;;;", "aff_unique_index": "0;0;0;1;0;0;0", "aff_unique_norm": "Harbin Institute of Technology;Cloud Computing", "aff_unique_dep": ";", "aff_unique_url": "http://www.hit.edu.cn/;", "aff_unique_abbr": "HIT;", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Harbin;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China;" }, { "id": "D0Mp7ILZME", "title": "SuperTweetEval: A Challenging, Unified and Heterogeneous Benchmark for Social Media NLP Research", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Despite its relevance, the maturity of NLP for social media pales in comparison with general-purpose models, metrics and benchmarks. This fragmented landscape makes it hard for the community to know, for instance, given a task, which is the best performing model and how it compares with others. To alleviate this issue, we introduce a unified benchmark for NLP evaluation in social media, SuperTweetEval, which includes a heterogeneous set of tasks and datasets combined, adapted and constructed from scratch. We benchmarked the performance of a wide range of models on SuperTweetEval and our results suggest that, despite the recent advances in language modelling, social media remains challenging.", "keywords": "Social Media;Benchmark;Twitter", "primary_area": "", "supplementary_material": "", "author": "Dimosthenis Antypas;Asahi Ushio;Francesco Barbieri;Leonardo Neves;Kiamehr Rezaee;Luis Espinosa-Anke;Jiaxin Pei;Jose Camacho-Collados", "authorids": "~Dimosthenis_Antypas1;~Asahi_Ushio1;~Francesco_Barbieri1;~Leonardo_Neves1;~Kiamehr_Rezaee1;~Luis_Espinosa-Anke1;~Jiaxin_Pei1;~Jose_Camacho-Collados1", "gender": "M;M;M;M;;M;;M", "homepage": ";https://asahi417.github.io/;;https://research.snap.com/team/leonardo-neves;;http://www.luisespinosa.net;;http://www.josecamachocollados.com", "dblp": ";;146/4193;180/2571;;140/3490.html;228/5526;165/0790", "google_scholar": ";RstIo9oAAAAJ;https://scholar.google.es/citations?user=1wPUUvcAAAAJ;https://scholar.google.com/citations?hl=en;p2qTqJgAAAAJ;;bfPz_-8AAAAJ;NP4KdQQAAAAJ", "or_profile": "~Dimosthenis_Antypas1;~Asahi_Ushio1;~Francesco_Barbieri1;~Leonardo_Neves1;~Kiamehr_Rezaee1;~Luis_Espinosa-Anke1;~Jiaxin_Pei1;~Jose_Camacho-Collados1", "aff": "Cardiff University;Cardiff University;Snap Inc.;Snap Inc.;Cardiff University;AMPLYFI;University of Michigan;Cardiff University", "aff_domain": "cardiff.ac.uk;cardiff.ac.uk;snap.com;snapchat.com;cardiff.ac.uk;amplyfi.com;umich.edu;cardiff.ac.uk", "position": "PhD student;PhD student;Research Scientist;Principal Researcher;PhD student;Principal Researcher;PhD student;Full Professor", "bibtex": "@inproceedings{\nantypas2023supertweeteval,\ntitle={SuperTweetEval: A Challenging, Unified and Heterogeneous Benchmark for Social Media {NLP} Research},\nauthor={Dimosthenis Antypas and Asahi Ushio and Francesco Barbieri and Leonardo Neves and Kiamehr Rezaee and Luis Espinosa-Anke and Jiaxin Pei and Jose Camacho-Collados},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=D0Mp7ILZME}\n}", "github": "", "project": "", "reviewers": "B3Tx;Anux;5755", "site": "https://openreview.net/forum?id=D0Mp7ILZME", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;2;4", "reproducibility": "4;4;5", "correctness": "3;2;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.0, "replies_avg": 9, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0009-0008-9539-5100;;;;", "linkedin": ";;;lrmneves/;;;;", "aff_unique_index": "0;0;1;1;0;2;3;0", "aff_unique_norm": "Cardiff University;Snap Inc.;AMPLYFI;University of Michigan", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cardiff.ac.uk;https://www.snapinc.com;;https://www.umich.edu", "aff_unique_abbr": "Cardiff;Snap;;UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0;1;0", "aff_country_unique": "United Kingdom;United States;" }, { "id": "D0gAwtclWk", "title": "Rethinking Negative Pairs in Code Search", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recently, contrastive learning has become a key component in fine-tuning code search models for software development efficiency and effectiveness. It pulls together positive code snippets while pushing negative samples away given search queries. Among contrastive learning, InfoNCE is the most widely used loss function due to its better performance. However, the following problems in negative samples of InfoNCE may deteriorate its representation learning: 1) The existence of false negative samples in large code corpora due to duplications. 2). The failure to explicitly differentiate between the potential relevance of negative samples. As an example, a bubble sorting algorithm example is less ``negative'' than a file saving function for the quick sorting algorithm query. In this paper, we tackle the above problems by proposing a simple yet effective Soft-InfoNCE loss that inserts weight terms into InfoNCE. In our proposed loss function, we apply three methods to estimate the weights of negative pairs and show that the vanilla InfoNCE loss is a special case of Soft-InfoNCE. Theoretically, we analyze the effects of Soft-InfoNCE on controlling the distribution of learnt code representations and on deducing a more precise mutual information estimation. We furthermore discuss the superiority of proposed loss functions with other design alternatives. Extensive experiments demonstrate the effectiveness of Soft-InfoNCE and weights estimation methods under state-of-the-art code search models on a large-scale public dataset consisting of six programming languages.", "keywords": "Code Search;Contrastive Learning", "primary_area": "", "supplementary_material": "", "author": "Haochen Li;Xin Zhou;Anh Tuan Luu;Chunyan Miao", "authorids": "~Haochen_Li6;~Xin_Zhou5;~Anh_Tuan_Luu2;~Chunyan_Miao1", "gender": "M;M;M;F", "homepage": "https://alex-haochenli.github.io/;https://xinzhou.me/;https://tuanluu.github.io/;http://www.ntulily.org/ascymiao/", "dblp": "49/11531-9;05/3403-8;81/8329.html;m/ChunyanMiao", "google_scholar": "z5t49dAAAAAJ;YpEaYXkAAAAJ;https://scholar.google.com.sg/citations?hl=en;https://scholar.google.com.tw/citations?user=fmXGRJgAAAAJ", "or_profile": "~Haochen_Li6;~Xin_Zhou5;~Anh_Tuan_Luu2;~Chunyan_Miao1", "aff": "Nanyang Technological University;Nanyang Technological University;Nanyang Technological University;School of Computer Science and Engineering, Nanyang Technological University", "aff_domain": "scse.ntu.edu.sg;ntu.edu.sg;ntu.edu.sg;scse.ntu.edu.sg", "position": "PhD student;Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nli2023rethinking,\ntitle={Rethinking Negative Pairs in Code Search},\nauthor={Haochen Li and Xin Zhou and Anh Tuan Luu and Chunyan Miao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=D0gAwtclWk}\n}", "github": "", "project": "", "reviewers": "ftqB;swyC;Ls9b", "site": "https://openreview.net/forum?id=D0gAwtclWk", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;2;4", "excitement": "4;4;4", "reproducibility": "5;5;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.666666666666667, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-0948-8033;;0000-0002-0300-3448", "linkedin": ";;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Nanyang Technological University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.sg", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore" }, { "id": "D1kF1Eq7Mv", "title": "System Combination via Quality Estimation for Grammatical Error Correction", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Quality estimation models have been developed to assess the corrections made by grammatical error correction (GEC) models when the reference or gold-standard corrections are not available. An ideal quality estimator can be utilized to combine the outputs of multiple GEC systems by choosing the best subset of edits from the union of all edits proposed by the GEC base systems. However, we found that existing GEC quality estimation models are not good enough in differentiating good corrections from bad ones, resulting in a low F0.5 score when used for system combination. In this paper, we propose GRECO, a new state-of-the-art quality estimation model that gives a better estimate of the quality of a corrected sentence, as indicated by having a higher correlation to the F0.5 score of a corrected sentence. It results in a combined GEC system with a higher F0.5 score. We also propose three methods for utilizing GEC quality estimation models for system combination with varying generality: model-agnostic, model-agnostic with voting bias, and model-dependent method. The combined GEC system outperforms the state of the art on the CoNLL-2014 test set and the BEA-2019 test set, achieving the highest F0.5 scores published to date.", "keywords": "Grammatical Error Correction;Quality Estimation;System Combination", "primary_area": "", "supplementary_material": "", "author": "Muhammad Reza Qorib;Hwee Tou Ng", "authorids": "~Muhammad_Reza_Qorib1;~Hwee_Tou_Ng3", "gender": "M;M", "homepage": "https://mrqorib.github.io/;https://www.comp.nus.edu.sg/~nght/", "dblp": "235/2865;97/3037.html", "google_scholar": "bSW0QS0AAAAJ;https://scholar.google.com.tw/citations?user=FABZCeAAAAAJ", "or_profile": "~Muhammad_Reza_Qorib1;~Hwee_Tou_Ng3", "aff": "National University of Singapore;National University of Singapore", "aff_domain": "nus.edu.sg;nus.edu.sg", "position": "PhD student;Professor", "bibtex": "@inproceedings{\nqorib2023system,\ntitle={System Combination via Quality Estimation for Grammatical Error Correction},\nauthor={Muhammad Reza Qorib and Hwee Tou Ng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=D1kF1Eq7Mv}\n}", "github": "", "project": "", "reviewers": "8ah8;nJd5;9qEn", "site": "https://openreview.net/forum?id=D1kF1Eq7Mv", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;5", "excitement": "4;3;4", "reproducibility": "4;4;4", "correctness": "4;3;5", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "id": "D4Cb4gAWro", "title": "Predictive Chemistry Augmented with Text Retrieval", "track": "main", "status": "Long Main", "tldr": "", "abstract": "This paper focuses on using natural language descriptions to enhance predictive models in the chemistry field. Conventionally, chemoinformatics models are trained with extensive structured data manually extracted from the literature. In this paper, we introduce TextReact, a novel method that directly augments predictive chemistry with texts retrieved from the literature. TextReact retrieves text descriptions relevant for a given chemical reaction, and then aligns them with the molecular representation of the reaction. This alignment is enhanced via an auxiliary masked LM objective incorporated in the predictor training. We empirically validate the framework on two chemistry tasks: reaction condition recommendation and one-step retrosynthesis. By leveraging text retrieval, TextReact significantly outperforms state-of-the-art chemoinformatics models trained solely on molecular data.", "keywords": "information retrieval;chemistry", "primary_area": "", "supplementary_material": "", "author": "Yujie Qian;Zhening Li;Zhengkai Tu;Connor W. Coley;Regina Barzilay", "authorids": "~Yujie_Qian1;~Zhening_Li1;~Zhengkai_Tu1;~Connor_W._Coley1;~Regina_Barzilay1", "gender": "M;;M;female;M", "homepage": "https://people.csail.mit.edu/yujieq/;https://people.csail.mit.edu/zli11010/;;https://www.regina.csail.mit.edu/;https://coley.mit.edu", "dblp": "187/3108;;;b/ReginaBarzilay;206/6284", "google_scholar": "https://scholar.google.com/citations?hl=en;3pEDdyoAAAAJ;https://scholar.google.ca/citations?user=6hJ_HcAAAAAJ;;l015S80AAAAJ", "or_profile": "~Yujie_Qian1;~Zhening_Li1;~Zhengkai_Tu1;~Regina_Barzilay1;~Connor_Coley1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu", "position": "PhD student;Undergrad student;PhD student;Professor;Assistant Professor", "bibtex": "@inproceedings{\nqian2023predictive,\ntitle={Predictive Chemistry Augmented with Text Retrieval},\nauthor={Yujie Qian and Zhening Li and Zhengkai Tu and Connor W. Coley and Regina Barzilay},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=D4Cb4gAWro}\n}", "github": "", "project": "", "reviewers": "3bHy;zvNB;EhYp", "site": "https://openreview.net/forum?id=D4Cb4gAWro", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "3;3;4", "reproducibility": "3;4;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-6059-4985;0000-0003-1715-5773;;0000-0002-8271-8723", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "D4CoZQY1nt", "title": "The Less the Merrier? Investigating Language Representation in Multilingual Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Multilingual Language Models offer a way to incorporate multiple languages in one model and utilize cross-language transfer learning to improve performance for different Natural Language Processing (NLP) tasks. Despite progress in multilingual models, not all languages are supported as well, particularly in low-resource settings. In this work, we investigate the linguistic representation of different languages in multilingual models. We start by asking the question which languages are supported in popular multilingual models and which languages are left behind. Then, for included languages, we look at models' learned representations based on language family and dialect and try to understand how models' learned representations for (1) seen and (2) unseen languages vary across different language groups. In addition, we test and analyze performance on downstream tasks such as text generation and Named Entity Recognition. We observe from our experiments that community-centered models---models that focus on languages of a given family or geographical location and are built by communities who speak them---perform better at distinguishing between languages in the same family for low-resource languages. Our paper contributes to the literature in understanding multilingual models and their shortcomings and offers insights on potential ways to improve them.", "keywords": "Multilingual;low-resource;pre-trained models;linguistic diversity;embedding space;dialect;writing script", "primary_area": "", "supplementary_material": "", "author": "Hellina Hailu Nigatu;Atnafu Lambebo Tonja;Jugal Kalita", "authorids": "~Hellina_Hailu_Nigatu1;~Atnafu_Lambebo_Tonja1;~Jugal_Kalita1", "gender": "F;M;M", "homepage": "https://hhnigatu.github.io/;http://atnafuatx.github.io/;http://www.cs.uccs.edu/~kalita/", "dblp": "320/8882;312/3167;78/5662", "google_scholar": "Nn56f9cAAAAJ;https://scholar.google.com.mx/citations?user=rubyApkAAAAJ;", "or_profile": "~Hellina_Hailu_Nigatu1;~Atnafu_Lambebo_Tonja1;~Jugal_Kalita1", "aff": "Electrical Engineering & Computer Science Department, University of California, Berkeley;Instituto Polit\u00e9cnico Nacional;University of Colorado at Colorado Springs", "aff_domain": "eecs.berkeley.edu;ipn.mx;uccs.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nnigatu2023the,\ntitle={The Less the Merrier? Investigating Language Representation in Multilingual Models},\nauthor={Hellina Hailu Nigatu and Atnafu Lambebo Tonja and Jugal Kalita},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=D4CoZQY1nt}\n}", "github": "", "project": "", "reviewers": "U2Y9;NTN2;6SvF", "site": "https://openreview.net/forum?id=D4CoZQY1nt", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;5", "excitement": "2;3;3", "reproducibility": "3;4;4", "correctness": "3;3;2", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-3501-5136;", "linkedin": ";atnafu-lambebo-6b21a5184;", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of California, Berkeley;Instituto Polit\u00e9cnico Nacional;University of Colorado", "aff_unique_dep": "Electrical Engineering & Computer Science Department;;", "aff_unique_url": "https://www.berkeley.edu;https://www.ipn.mx;https://www.uccs.edu", "aff_unique_abbr": "UC Berkeley;IPN;UC-CS", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Berkeley;;Colorado Springs", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Mexico" }, { "id": "D70lPh24o6", "title": "Explaining Interactions Between Text Spans", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Reasoning over spans of tokens from different parts of the input is essential for natural language understanding (NLU) tasks such as fact-checking (FC), machine reading comprehension (MRC) or natural language inference (NLI). However, existing highlight-based explanations primarily focus on identifying individual important features or interactions only between adjacent tokens or tuples of tokens. Most notably, there is a lack of annotations capturing the human decision-making process with respect to the necessary interactions for informed decision-making in such tasks. To bridge this gap, we introduce SpanEx, a multi-annotator dataset of human span interaction explanations for two NLU tasks: NLI and FC. We then investigate the decision-making processes of multiple fine-tuned large language models in terms of the employed connections between spans in separate parts of the input and compare them to the human reasoning processes. Finally, we present a novel community detection based unsupervised method to extract such interaction explanations. We make the code and the dataset available on [Github](https://github.com/copenlu/spanex). The dataset is also available on [Huggingface datasets](https://huggingface.co/datasets/copenlu/spanex).", "keywords": "explainability;interactions;spans", "primary_area": "", "supplementary_material": "", "author": "Sagnik Ray Choudhury;Pepa Atanasova;Isabelle Augenstein", "authorids": "~Sagnik_Ray_Choudhury2;~Pepa_Atanasova1;~Isabelle_Augenstein1", "gender": ";F;F", "homepage": ";https://apepa.github.io/;http://isabelleaugenstein.github.io/", "dblp": ";224/2054;93/11424.html", "google_scholar": ";CLOC3rEAAAAJ;https://scholar.google.co.uk/citations?user=DjJp0dcAAAAJ", "or_profile": "~Sagnik_Ray_Choudhury2;~Pepa_Atanasova1;~Isabelle_Augenstein1", "aff": ";University of Copenhagen;University of Copenhagen", "aff_domain": ";ku.dk;ku.dk", "position": ";Postdoc;Full Professor", "bibtex": "@inproceedings{\nchoudhury2023explaining,\ntitle={Explaining Interactions Between Text Spans},\nauthor={Sagnik Ray Choudhury and Pepa Atanasova and Isabelle Augenstein},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=D70lPh24o6}\n}", "github": "", "project": "", "reviewers": "yM6C;vXE4;hKS7", "site": "https://openreview.net/forum?id=D70lPh24o6", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "4;3;4", "reproducibility": "4;3;3", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-0023-2616;0000-0003-1562-7909", "linkedin": ";pepa-atanasova-65a2b417b;isabelle-augenstein-82436b7a/", "aff_unique_index": "0;0", "aff_unique_norm": "University of Copenhagen", "aff_unique_dep": "", "aff_unique_url": "https://www.ku.dk", "aff_unique_abbr": "UCPH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Denmark" }, { "id": "D7omx8QyFP", "title": "The CoT Collection: Improving Zero-shot and Few-shot Learning of Language Models via Chain-of-Thought Fine-Tuning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Language models (LMs) with less than 100B parameters are known to perform poorly on chain-of-thought (CoT) reasoning in contrast to large LMs when solving unseen tasks. In this work, we aim to equip smaller LMs with the step-by-step reasoning capability by instruction tuning with CoT rationales. In order to achieve this goal, we first introduce a new instruction-tuning dataset called the CoT Collection, which augments the existing Flan Collection (including only 9 CoT tasks) with additional 1.84 million rationales across 1,060 tasks. We show that CoT fine-tuning Flan-T5 (3B \\& 11B) with CoT Collection enables smaller LMs to have better CoT capabilities on unseen tasks. On the BIG-Bench-Hard (BBH) benchmark, we report an average improvement of +4.34% (Flan-T5 3B) and +2.60% (Flan-T5 11B), in terms of zero-shot task accuracy. Furthermore, we show that instruction tuning with CoT Collection allows LMs to possess stronger few-shot learning capabilities on 4 domain-specific tasks, resulting in an improvement of +2.24% (Flan-T5 3B) and +2.37% (Flan-T5 11B), even outperforming ChatGPT utilizing demonstrations until the max length by a +13.98% margin. Our code, the CoT Collection data, and model checkpoints are publicly available.", "keywords": "Chain-of-Thought Fine-tuning;Zero-shot Generalization;Few-shot Learning;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "Seungone Kim;Se June Joo;Doyoung Kim;Joel Jang;Seonghyeon Ye;Jamin Shin;Minjoon Seo", "authorids": "~Seungone_Kim1;~Se_June_Joo1;~Doyoung_Kim3;~Joel_Jang1;~Seonghyeon_Ye1;~Jamin_Shin1;~Minjoon_Seo1", "gender": "M;;M;M;M;M;M", "homepage": "https://github.com/SeungoneKim;;https://doyoungkim-ml.github.io/;https://joeljang.github.io/;https://vano1205.github.io/;https://jayshin.xyz;https://seominjoon.github.io", "dblp": "324/2064.html;;;;301/8927;225/5387;149/1367", "google_scholar": "https://scholar.google.co.kr/citations?user=qEf3e3EAAAAJ;;https://scholar.google.co.kr/citations?user=PJR9ogMAAAAJ;xL-7eFEAAAAJ;https://scholar.google.co.kr/citations?user=JfGGjBoAAAAJ;GuBHIwsAAAAJ;zYze5fIAAAAJ", "or_profile": "~Seungone_Kim1;~Se_June_Joo1;~Doyoung_Kim3;~Joel_Jang1;~Seonghyeon_Ye1;~Jamin_Shin1;~Minjoon_Seo1", "aff": "Yonsei University;;KAIST;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;NAVER;Twelve Labs", "aff_domain": "yonsei.ac.kr;;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;navercorp.com;twelvelabs.io", "position": "Undergrad student;;MS student;MS student;PhD student;Research Scientist;Chief Scientist", "bibtex": "@inproceedings{\nkim2023the,\ntitle={The CoT Collection: Improving Zero-shot and Few-shot Learning of Language Models via Chain-of-Thought Fine-Tuning},\nauthor={Seungone Kim and Se June Joo and Doyoung Kim and Joel Jang and Seonghyeon Ye and Jamin Shin and Minjoon Seo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=D7omx8QyFP}\n}", "github": "", "project": "", "reviewers": "qbB3;cTkT;d7Vi", "site": "https://openreview.net/forum?id=D7omx8QyFP", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;4", "excitement": "3;4;4", "reproducibility": "2;4;2", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;", "linkedin": "seungone-kim-09b551264/;;doyoung-kim-870a141a2/;joel-jang-1289331a5/;;jayshin94/;minjoon-seo/", "aff_unique_index": "0;1;1;1;2;3", "aff_unique_norm": "Yonsei University;Korea Advanced Institute of Science and Technology;NAVER Corporation;Twelve Labs", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.yonsei.ac.kr;https://www.kaist.ac.kr;https://www.naver.com;https://twelvelabs.com", "aff_unique_abbr": "Yonsei;KAIST;NAVER;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "South Korea;United States" }, { "id": "D97Zfgv4em", "title": "MeaeQ: Mount Model Extraction Attacks with Efficient Queries", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We study model extraction attacks in natural language processing (NLP) where attackers aim to steal victim models by repeatedly querying the open Application Programming Interfaces (APIs). Recent works focus on limited-query budget settings and adopt random sampling or active learning-based sampling strategies on publicly available, unannotated data sources. However, these methods often result in selected queries that lack task relevance and data diversity, leading to limited success in achieving satisfactory results with low query costs. In this paper, we propose MeaeQ (Model extraction attack with efficient Queries), a straightforward yet effective method to address these issues. Specifically, we initially utilize a zero-shot sequence inference classifier, combined with API service information, to filter task-relevant data from a public text corpus instead of a problem domain-specific dataset. Furthermore, we employ a clustering-based data reduction technique to obtain representative data as queries for the attack. Extensive experiments conducted on four benchmark datasets demonstrate that MeaeQ achieves higher functional similarity to the victim model than baselines while requiring fewer queries.", "keywords": "Model Extraction Attack; Efficient Query Sampling Strategy; Limited Query Budgets", "primary_area": "", "supplementary_material": "", "author": "Chengwei Dai;Minxuan Lv;Kun Li;Wei Zhou", "authorids": "~Chengwei_Dai1;~Minxuan_Lv1;~Kun_Li8;~Wei_Zhou5", "gender": "M;;M;F", "homepage": "https://github.com/C-W-D;https://github.com/xiaoxuanNLP;;http://people.ucas.ac.cn/~iiezhouwei", "dblp": ";359/3555;;69/5011-19", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;", "or_profile": "~Chengwei_Dai1;~Minxuan_Lv1;~Kun_Li8;~Wei_Zhou5", "aff": "University of Chinese Academy of Sciences;Institute of Information Engineering,Chinese Academy of Sciences;University of Chinese Academy of Sciences;", "aff_domain": "ucas.ac.cn;iie.ac.cn;ucas.edu.cn;", "position": "MS student;MS student;PhD student;", "bibtex": "@inproceedings{\ndai2023meaeq,\ntitle={MeaeQ: Mount Model Extraction Attacks with Efficient Queries},\nauthor={Chengwei Dai and Minxuan Lv and Kun Li and Wei Zhou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=D97Zfgv4em}\n}", "github": "", "project": "", "reviewers": "GBYG;ymJe;rQZ7", "site": "https://openreview.net/forum?id=D97Zfgv4em", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "3;3;4", "reproducibility": "4;3;3", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Chinese Academy of Sciences;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Information Engineering", "aff_unique_url": "http://www.ucas.ac.cn;http://www.cas.cn", "aff_unique_abbr": "UCAS;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "D9oq45WsKq", "title": "Ensemble-Instruct: Instruction Tuning Data Generation with a Heterogeneous Mixture of LMs", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Using in-context learning (ICL) for data generation, techniques such as Self-Instruct (Wang et al., 2023) or the follow-up Alpaca (Taori et al., 2023) can train strong conversational agents with only a small amount of human supervision. One limitation of these approaches is that they resort to very large language models (around 175B parameters) that are also proprietary and non-public. Here we explore the application of such techniques to language models that are much smaller (around 10B--40B parameters) and have permissive licenses. \nWe find the Self-Instruct approach to be less effective at these sizes and propose new ICL methods that draw on two main ideas: (a) categorization and simplification of the ICL templates to make prompt learning easier for the LM, and (b) ensembling over multiple LM outputs to help select high-quality synthetic examples. Our algorithm leverages the 175 Self-Instruct seed tasks and employs separate pipelines for instructions that require an input and instructions that do not. Empirical investigations with different LMs show that: (1) Our proposed method yields higher-quality instruction tuning data than Self-Instruct, (2) It improves performances of both vanilla and instruction-tuned LMs by significant margins, and (3) Smaller instruction-tuned LMs generate more useful examples than their larger un-tuned counterparts.", "keywords": "instruction tuning data generation;prompt learning;ensemble learning", "primary_area": "", "supplementary_material": "", "author": "Young-Suk Lee;Md Arafat Sultan;Yousef El-Kurdi;Tahira Naseem;Asim Munawar;Radu Florian;Salim Roukos;Ram\u00f3n Fernandez Astudillo", "authorids": "~Young-Suk_Lee1;~Md_Arafat_Sultan1;~Yousef_El-Kurdi1;~Tahira_Naseem1;~Asim_Munawar2;~Radu_Florian1;~Salim_Roukos1;~Ram\u00f3n_Fernandez_Astudillo1", "gender": "F;M;M;F;M;M;M;M", "homepage": "https://researcher.watson.ibm.com/researcher/view.php?person=us-ysuklee;https://ma-sultan.github.io/;;;;;;https://ramon-astudillo.github.io/", "dblp": "92/584;77/11514;;44/642;;91/663;01/1417;", "google_scholar": "https://scholar.google.com/citations?hl=en;lDB1ul4AAAAJ;https://scholar.google.ca/citations?user=MgJspykAAAAJ;IoVlb40AAAAJ;;NvIcXEYAAAAJ;1S7VwIcAAAAJ;zJ4uM00AAAAJ", "or_profile": "~Young-Suk_Lee1;~Md_Arafat_Sultan1;~Yousef_El-Kurdi1;~Tahira_Naseem1;~Asim_Munawar2;~Radu_Florian1;~Salim_Roukos1;~Ramon_Fernandez_Astudillo1", "aff": "IBM, International Business Machines;International Business Machines;IBM, International Business Machines;IBM, International Business Machines;International Business Machines;International Business Machines;International Business Machines;International Business Machines", "aff_domain": "us.ibm.com;ibm.com;us.ibm.com;us.ibm.com;ibm.com;ibm.com;ibm.com;ibm.com", "position": "Research Staff Member;Researcher;Researcher;Researcher;Researcher;Researcher;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nlee2023ensembleinstruct,\ntitle={Ensemble-Instruct: Instruction Tuning Data Generation with a Heterogeneous Mixture of {LM}s},\nauthor={Young-Suk Lee and Md Arafat Sultan and Yousef El-Kurdi and Tahira Naseem and Asim Munawar and Radu Florian and Salim Roukos and Ram{\\'o}n Fernandez Astudillo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=D9oq45WsKq}\n}", "github": "", "project": "", "reviewers": "gP8F;CwUv;NX6N", "site": "https://openreview.net/forum?id=D9oq45WsKq", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "3;4;3", "reproducibility": "4;4;5", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0002-3670-1576;;", "linkedin": ";;;tahira-naseem-12066b46/;asimmunawar/;;salim-roukos-55a3871/;", "aff_unique_index": "0;1;0;0;1;1;1;1", "aff_unique_norm": "International Business Machines;International Business Machines Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.ibm.com;https://www.ibm.com", "aff_unique_abbr": "IBM;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "DDvcWpZNgl", "title": "PIEClass: Weakly-Supervised Text Classification with Prompting and Noise-Robust Iterative Ensemble Training", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Weakly-supervised text classification trains a classifier using the label name of each target class as the only supervision, which largely reduces human annotation efforts. Most existing methods first use the label names as static keyword-based features to generate pseudo labels, which are then used for final classifier training. While reasonable, such a commonly adopted framework suffers from two limitations: (1) keywords can have different meanings in different contexts and some text may not have any keyword, so keyword matching can induce noisy and inadequate pseudo labels; (2) the errors made in the pseudo label generation stage will directly propagate to the classifier training stage without a chance of being corrected. In this paper, we propose a new method, PIEClass, consisting of two modules: (1) a pseudo label acquisition module that uses zero-shot prompting of pre-trained language models (PLM) to get pseudo labels based on contextualized text understanding beyond static keyword matching, and (2) a noise-robust iterative ensemble training module that iteratively trains classifiers and updates pseudo labels by utilizing two PLM fine-tuning methods that regularize each other. Extensive experiments show that PIEClass achieves overall better performance than existing strong baselines on seven benchmark datasets and even achieves similar performance to fully-supervised classifiers on sentiment classification tasks.", "keywords": "Text Classification;Weak Supervision;Pre-Trained Language Model;Prompt-Based Learning", "primary_area": "", "supplementary_material": "", "author": "Yunyi Zhang;Minhao Jiang;Yu Meng;Yu Zhang;Jiawei Han", "authorids": "~Yunyi_Zhang1;~Minhao_Jiang1;~Yu_Meng1;~Yu_Zhang26;~Jiawei_Han1", "gender": "M;M;M;M;M", "homepage": "https://yzhan238.github.io/;https://minhaoj2.github.io/;https://yumeng5.github.io/;https://yuzhimanhua.github.io/;http://hanj.cs.illinois.edu/", "dblp": "384/0161;;30/4233-1;50/671-44;h/JiaweiHan.html", "google_scholar": "5VMnkl0AAAAJ;Qzm-cLIAAAAJ;S2-yZKcAAAAJ;N0PrmgIAAAAJ;https://scholar.google.com.tw/citations?user=Kv9AbjMAAAAJ", "or_profile": "~Yunyi_Zhang1;~Minhao_Jiang1;~Yu_Meng1;~Yu_Zhang26;~Jiawei_Han1", "aff": "University of Illinois Urbana-Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois at Urbana-Champaign (UIUC)", "aff_domain": "illinois.edu;uiuc.edu;illinois.edu;illinois.edu;illinois.edu", "position": "PhD student;MS student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nzhang2023pieclass,\ntitle={{PIEC}lass: Weakly-Supervised Text Classification with Prompting and Noise-Robust Iterative Ensemble Training},\nauthor={Yunyi Zhang and Minhao Jiang and Yu Meng and Yu Zhang and Jiawei Han},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=DDvcWpZNgl}\n}", "github": "", "project": "", "reviewers": "cc9r;dUCf;oKTC", "site": "https://openreview.net/forum?id=DDvcWpZNgl", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;5", "excitement": "3;3;3", "reproducibility": "4;4;3", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-9790-4855;;0000-0003-2554-2888;0000-0003-0540-6758;0000-0002-3629-2696", "linkedin": "yunyi-zhang-198982149;minhao-jiang-6bbb03157/;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "DOlbbJhJ1A", "title": "Large Language Models: The Need for Nuance in Current Debates and a Pragmatic Perspective on Understanding", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Current Large Language Models (LLMs) are unparalleled in their ability to generate grammatically correct, fluent text. LLMs are appearing rapidly, and debates on LLM capacities have taken off, but reflection is lagging behind. Thus, in this position paper, we first zoom in on the debate and critically assess three points recurring in critiques of LLM capacities: i) that LLMs only parrot statistical patterns in the training data; ii) that LLMs master formal but not functional language competence; and iii) that language learning in LLMs cannot inform human language learning. Drawing on empirical and theoretical arguments, we show that these points need more nuance. Second, we outline a pragmatic perspective on the issue of `real' understanding and intentionality in LLMs. Understanding and intentionality pertain to unobservable mental states we attribute to other humans because they have pragmatic value: they allow us to abstract away from complex underlying mechanics and predict behaviour effectively. We reflect on the circumstances under which it would make sense for humans to similarly attribute mental states to LLMs, thereby outlining a pragmatic philosophical context for LLMs as an increasingly prominent technology in society.", "keywords": "Large Language Models;Critical Analysis;Interpretation;Explanation;Philosophy;Understanding;Pragmatism;NLP", "primary_area": "", "supplementary_material": "", "author": "Bram Van Dijk;Tom Kouwenhoven;Marco Spruit;Max Johannes van Duijn", "authorids": "~Bram_Van_Dijk1;~Tom_Kouwenhoven1;~Marco_Spruit1;~Max_Johannes_van_Duijn1", "gender": "M;M;Not Specified;M", "homepage": "https://www.universiteitleiden.nl/en/staffmembers/bram-van-dijk;;https://www.universiteitleiden.nl/en/staffmembers/marco-spruit;https://www.universiteitleiden.nl/en/staffmembers/max-van-duijn", "dblp": ";320/1588;;", "google_scholar": ";vbIiimUAAAAJ;GFvyyeAAAAAJ;", "or_profile": "~Bram_Van_Dijk1;~Tom_Kouwenhoven1;~Marco_Spruit1;~Max_Johannes_van_Duijn1", "aff": "Leiden University;Leiden University, Leiden University;Leiden University;Leiden University, Leiden University", "aff_domain": "leidenuniv.nl;liacs.leidenuniv.nl;leidenuniv.nl;liacs.leidenuniv.nl", "position": "PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ndijk2023large,\ntitle={Large Language Models: The Need for Nuance in Current Debates and a Pragmatic Perspective on Understanding},\nauthor={Bram Van Dijk and Tom Kouwenhoven and Marco Spruit and Max Johannes van Duijn},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=DOlbbJhJ1A}\n}", "github": "", "project": "", "reviewers": "rDe7;6wK9;QBMH", "site": "https://openreview.net/forum?id=DOlbbJhJ1A", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;4;4", "reproducibility": "", "correctness": "3;3;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-2480-4073;0000-0002-9237-221X;", "linkedin": "bram-van-dijk-869768266/;;spruit/;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Leiden University", "aff_unique_dep": "", "aff_unique_url": "https://www.leidenuniv.nl", "aff_unique_abbr": "LU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Leiden", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Netherlands" }, { "id": "DPhTTeoyjC", "title": "LM vs LM: Detecting Factual Errors via Cross Examination", "track": "main", "status": "Long Main", "tldr": "", "abstract": "A prominent weakness of modern language models (LMs) is their tendency to generate factually incorrect text, which hinders their usability. A natural question is whether such factual errors can be detected automatically. Inspired by truth-seeking mechanisms in law, we propose a factuality evaluation framework for LMs that is based on cross-examination. Our key idea is that an incorrect claim is likely to result in inconsistency with other claims that the model generates. To discover such inconsistencies, we facilitate a multi-turn interaction between the LM that generated the claim and another LM (acting as an examiner) which introduces questions to discover inconsistencies. We empirically evaluate our method on factual claims made by multiple recent LMs on four benchmarks, finding that it outperforms existing methods and baselines, often by a large gap. Our results demonstrate the potential of using interacting LMs for capturing factual errors.", "keywords": "knowledge;factuality;LLM;question answering;interpretability;interactivity;multi-agent;consistency;fact checking;dialogue", "primary_area": "", "supplementary_material": "", "author": "Roi Cohen;May Hamri;Mor Geva;Amir Globerson", "authorids": "~Roi_Cohen1;~May_Hamri1;~Mor_Geva1;~Amir_Globerson1", "gender": "M;F;F;M", "homepage": ";;https://mega002.github.io/;http://www.cs.tau.ac.il/~gamir/", "dblp": ";;203/9159;08/4162.html", "google_scholar": ";;https://scholar.google.co.il/citations?user=GxpQbSkAAAAJ;https://scholar.google.com.tw/citations?user=5JserkUAAAAJ", "or_profile": "~Roi_Cohen1;~May_Hamri1;~Mor_Geva1;~Amir_Globerson1", "aff": "School of Computer Science, Tel Aviv University;Tel Aviv University;Google DeepMind;Tel Aviv University", "aff_domain": "cs.tau.ac.il;tau.ac.il;google.com;tau.ac.il", "position": "MS student;Undergrad student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\ncohen2023lm,\ntitle={{LM} vs {LM}: Detecting Factual Errors via Cross Examination},\nauthor={Roi Cohen and May Hamri and Mor Geva and Amir Globerson},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=DPhTTeoyjC}\n}", "github": "", "project": "", "reviewers": "ZiT3;eV78;hr8G", "site": "https://openreview.net/forum?id=DPhTTeoyjC", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;2;5", "excitement": "4;3;4", "reproducibility": "5;3;4", "correctness": "4;3;3", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "roicohen9/;may-hamri-2b96b61b7;morgeva/;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Tel Aviv University;Google", "aff_unique_dep": "School of Computer Science;Google DeepMind", "aff_unique_url": "https://www.tau.ac.il;https://deepmind.com", "aff_unique_abbr": "TAU;DeepMind", "aff_campus_unique_index": "0", "aff_campus_unique": "Tel Aviv;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Israel;United Kingdom" }, { "id": "DQ9WeXpgJt", "title": "Unsupervised Sounding Pixel Learning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Sounding source localization is a challenging cross-modal task due to the difficulty of cross-modal alignment. \nAlthough supervised cross-modal methods achieve encouraging performance, heavy manual annotations are expensive and inefficient. Thus it is valuable and meaningful to develop unsupervised solutions. \nIn this paper, we propose an **U**nsupervised **S**ounding **P**ixel **L**earning (USPL) approach which enables a pixel-level sounding source localization in unsupervised paradigm. \nWe first design a mask augmentation based multi-instance contrastive learning to realize unsupervised cross-modal coarse localization, which aligns audio-visual features to obtain coarse sounding maps. \nSecondly, we present an *Unsupervised Sounding Map Refinement (SMR)* module which employs the visual semantic affinity learning to explore inter-pixel relations of adjacent coordinate features. It contributes to recovering the boundary of coarse sounding maps and obtaining fine sounding maps. \nFinally, a *Sounding Pixel Segmentation (SPS)* module is presented to realize audio-supervised semantic segmentation. \nExtensive experiments are performed on the AVSBench-S4 and VGGSound datasets, exhibiting encouraging results compared with previous SOTA methods.", "keywords": "unsupervised;cross-modal learning;sound source localization;semantic affinity refinement", "primary_area": "", "supplementary_material": "", "author": "Yining Zhang;Yanli Ji;Yang Yang", "authorids": "~Yining_Zhang4;~Yanli_Ji1;~Yang_Yang37", "gender": "F;M;M", "homepage": "https://scholar.google.com/citations?user=aGbEdhEAAAAJ&hl=en;http://cfm.uestc.edu.cn/~yangyang/;https://tonyzhang128.github.io/", "dblp": "79/8728;;", "google_scholar": ";;", "or_profile": "~Yanli_Ji1;~Yang_Yang37;~zhang_yining1", "aff": ";University of Electronic Science and Technology of China;University of Electronic Science and Technology of China", "aff_domain": ";uestc.edu.cn;uestc.edu.cn", "position": ";Full Professor;MS student", "bibtex": "@inproceedings{\nzhang2023unsupervised,\ntitle={Unsupervised Sounding Pixel Learning},\nauthor={Yining Zhang and Yanli Ji and Yang Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=DQ9WeXpgJt}\n}", "github": "", "project": "", "reviewers": "fozg;c1YV;A2BU;3pBn", "site": "https://openreview.net/forum?id=DQ9WeXpgJt", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;3;4;3", "excitement": "4;4;4;4", "reproducibility": "3;4;4;4", "correctness": "4;4;4;4", "rating_avg": 4.0, "confidence_avg": 3.5, "excitement_avg": 4.0, "reproducibility_avg": 3.75, "correctness_avg": 4.0, "replies_avg": 13, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0", "aff_unique_norm": "University of Electronic Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "https://www.uestc.edu.cn", "aff_unique_abbr": "UESTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "DQVGhBdAPG", "title": "Identification of Multimodal Stance Towards Frames of Communication", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Frames of communication are often evoked in multimedia documents. When an author decides to add an image to a text, one or both of the modalities may evoke a communication frame. Moreover, when evoking the frame, the author also conveys her/his stance towards the frame. Until now, determining if the author is in favor of, against or has no stance towards the frame was performed automatically only when processing texts. This is due to the absence of stance annotations on multimedia documents. In this paper we introduce MMVax-Stance, a dataset of 11,300 multimedia documents retrieved from social media, which have stance annotations towards 113 different frames of communication. This dataset allowed us to experiment with several models of multimedia stance detection, which revealed important interactions between texts and images in the inference of stance towards communication frames. When inferring the text/image relations, a set of 46,606 synthetic examples of multimodal documents with known stance was generated. This greatly impacted the quality of identifying multimedia stance, yielding an improvement of 20% in F1-score.", "keywords": "stance detection;covid-19;social media;twitter;multimodal;images;multimedia", "primary_area": "", "supplementary_material": "", "author": "Maxwell Weinzierl;Sanda Harabagiu", "authorids": "~Maxwell_Weinzierl1;~Sanda_Harabagiu1", "gender": "M;F", "homepage": "https://maxnlp.ai;http://www.utdallas.edu/~sanda/", "dblp": "277/7481;51/3845", "google_scholar": "G5BQKugAAAAJ;https://scholar.google.com.tw/citations?user=fkyBx9EAAAAJ", "or_profile": "~Maxwell_Weinzierl1;~Sanda_Harabagiu1", "aff": "University of Texas at Dallas;University of Texas at Dallas", "aff_domain": "utdallas.edu;utdallas.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nweinzierl2023identification,\ntitle={Identification of Multimodal Stance Towards Frames of Communication},\nauthor={Maxwell Weinzierl and Sanda Harabagiu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=DQVGhBdAPG}\n}", "github": "", "project": "", "reviewers": "Sfdh;GuKD;anjp", "site": "https://openreview.net/forum?id=DQVGhBdAPG", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "1;3;3", "reproducibility": "1;4;4", "correctness": "1;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 2.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 2.3333333333333335, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-8049-7453;", "linkedin": "max-weinzierl/;", "aff_unique_index": "0;0", "aff_unique_norm": "University of Texas at Dallas", "aff_unique_dep": "", "aff_unique_url": "https://www.utdallas.edu", "aff_unique_abbr": "UT Dallas", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Dallas", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "DRpZjTJKZh", "title": "Probabilistic Tree-of-thought Reasoning for Answering Knowledge-intensive Complex Questions", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models (LLMs) are capable of answering knowledge-intensive complex questions with chain-of-thought (CoT) reasoning. However, they tend to generate factually incorrect reasoning steps when the required knowledge is not available or up-to-date in models' parameters. Recent works turn to retrieving external knowledge to augment CoT reasoning. Despite being promising, these chain-based methods suffer from: 1) Negative retrieval. Unnecessary or incorrect retrieval may mislead the reasoning; 2) Limited sight. Lacking the ability to look backward or forward, a local error in one step will propagate along the chain.\n\nIn this paper, we propose a novel approach: Probabilistic Tree-of-thought Reasoning (ProbTree). First, LLMs translate a complex question into a query tree, in which each non-root node denotes a sub-question of its parent node. Then, probabilistic reasoning is conducted over the tree, by solving questions from leaf to root considering the confidence of both question decomposing and answering. During reasoning, for leaf nodes, LLMs choose a more confident answer from Closed-book QA that employs parametric knowledge and Open-book QA that employs retrieved external knowledge, thus eliminating the negative retrieval problem. For non-leaf nodes, with the hierarchical structure, LLMs have broader sights and are able to globally reason with the information from child nodes, thus recovering from local errors. The experiments on three Complex QA datasets under the open-domain setting show that our approach outperforms SOTA methods significantly, demonstrating the effect of probabilistic tree-of-thought reasoning.", "keywords": "LLMs; Probabilistic Tree-of-thought Reasoning; Hallucination; Knowledge-intensive Complex QA", "primary_area": "", "supplementary_material": "", "author": "Shulin Cao;Jiajie Zhang;Jiaxin Shi;Xin Lv;Zijun Yao;Qi Tian;Lei Hou;Juanzi Li", "authorids": "~Shulin_Cao1;~Jiajie_Zhang2;~Jiaxin_Shi3;~Xin_Lv1;~Zijun_Yao2;~Qi_Tian3;~Lei_Hou2;~Juanzi_Li1", "gender": "F;M;M;M;M;M;M;", "homepage": "https://github.com/ShulinCao;https://blog.csdn.net/cdsszjj;;https://davidlvxin.github.io;https://transirius.github.io/;https://www.qitian1987.com/index.html;https://www.cs.tsinghua.edu.cn/csen/info/1305/4466.htm;", "dblp": "229/2976;73/5253.html;;;134/4025-2;78/1467-1.html;32/5685-1;", "google_scholar": "lUfGROcAAAAJ;g-bcOmUAAAAJ;8XcQHUEAAAAJ;rJzgbYQAAAAJ;B4LmHSUAAAAJ;https://scholar.google.com/citations?hl=en;YnIq4hsAAAAJ;", "or_profile": "~Shulin_Cao1;~Jiajie_Zhang2;~Jiaxin_Shi3;~Xin_Lv1;~Zijun_Yao2;~Qi_Tian3;~Lei_Hou2;~Juanzi_Li1", "aff": "Tsinghua University;Tsinghua University;Huawei Technologies Ltd.;Tsinghua University;Tsinghua University;Huawei Technologies Ltd.;Tsinghua University;", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;huawei.com;tsinghua.edu.cn;tsinghua.edu.cn;huawei.com;tsinghua.edu.cn;", "position": "PhD student;PhD student;Researcher;PhD student;MS student;Principal Researcher;Assistant Professor;", "bibtex": "@inproceedings{\ncao2023probabilistic,\ntitle={Probabilistic Tree-of-thought Reasoning for Answering Knowledge-intensive Complex Questions},\nauthor={Shulin Cao and Jiajie Zhang and Jiaxin Shi and Xin Lv and Zijun Yao and Qi Tian and Lei Hou and Juanzi Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=DRpZjTJKZh}\n}", "github": "", "project": "", "reviewers": "ahcZ;M8kU;3643", "site": "https://openreview.net/forum?id=DRpZjTJKZh", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "4;2;3", "reproducibility": "4;4;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-0288-9283;0000-0002-7252-5047;0000-0002-8907-3526;", "linkedin": ";;;;%E5%AD%90%E4%BF%8A-%E5%A7%9A-313188209/;;;", "aff_unique_index": "0;0;1;0;0;1;0", "aff_unique_norm": "Tsinghua University;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.huawei.com", "aff_unique_abbr": "THU;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "DSmHC8bi3j", "title": "Noise-Robust Fine-Tuning of Pretrained Language Models via External Guidance", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Adopting a two-stage paradigm of pretraining followed by fine-tuning, Pretrained Language Models (PLMs) have achieved substantial advancements in the field of natural language processing. However, in real-world scenarios, data labels are often noisy due to the complex annotation process, making it essential to develop strategies for fine-tuning PLMs with such noisy labels. To this end, we introduce an innovative approach for fine-tuning PLMs using noisy labels, which incorporates the guidance of Large Language Models (LLMs) like ChatGPT. This guidance assists in accurately distinguishing between clean and noisy samples and provides supplementary information beyond the noisy labels, thereby boosting the learning process during fine-tuning PLMs. Extensive experiments on synthetic and real-world noisy datasets further demonstrate the superior advantages of our framework over the state-of-the-art baselines.", "keywords": "Pretrained Language Models;Large-scale Language Models;Learning from Noisy Labels", "primary_area": "", "supplementary_material": "", "author": "Song Wang;Zhen Tan;Ruocheng Guo;Jundong Li", "authorids": "~Song_Wang6;~Zhen_Tan2;~Ruocheng_Guo1;~Jundong_Li2", "gender": "M;M;M;M", "homepage": "https://songw-sw.github.io/;https://zhen-tan-dmml.github.io/;https://rguo12.github.io;https://jundongli.github.io/", "dblp": ";13/10345-1.html;167/4378;144/7997.html", "google_scholar": ";yMV7JtIAAAAJ;8Nuj8NwAAAAJ;uY6ek7sAAAAJ", "or_profile": "~Song_Wang6;~Zhen_Tan2;~Ruocheng_Guo1;~Jundong_Li2", "aff": "University of Virginia;Arizona State University;Bytedance Research;University of Virginia", "aff_domain": "virginia.edu;asu.edu;bytedance.com;virginia.edu", "position": "PhD student;PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nwang2023noiserobust,\ntitle={Noise-Robust Fine-Tuning of Pretrained Language Models via External Guidance},\nauthor={Song Wang and Zhen Tan and Ruocheng Guo and Jundong Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=DSmHC8bi3j}\n}", "github": "", "project": "", "reviewers": "Qwqa;No3t;br2H", "site": "https://openreview.net/forum?id=DSmHC8bi3j", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;3;4", "reproducibility": "4;3;5", "correctness": "4;2;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-1273-7694;0009-0006-9548-2330;;", "linkedin": ";;;", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Virginia;Arizona State University;ByteDance", "aff_unique_dep": ";;Bytedance Research", "aff_unique_url": "https://www.virginia.edu;https://www.asu.edu;https://www.bytedance.com", "aff_unique_abbr": "UVA;ASU;Bytedance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;China" }, { "id": "DTELCDufzE", "title": "Large Language Models Are Better Adversaries: Exploring Generative Clean-Label Backdoor Attacks Against Text Classifiers", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Backdoor attacks manipulate model predictions by inserting innocuous triggers into training and test data. We focus on more realistic and more challenging clean-label attacks where the adversarial training examples are correctly labeled. Our attack, LLMBkd, leverages language models to automatically insert diverse style-based triggers into texts. We also propose a poison selection technique to improve the effectiveness of both LLMBkd as well as existing textual backdoor attacks. Lastly, we describe REACT, a baseline defense to mitigate backdoor attacks via antidote training examples. Our evaluations demonstrate LLMBkd's effectiveness and efficiency, where we consistently achieve high attack success rates across a wide range of styles with little effort and no model training.", "keywords": "adversarial machine learning;backdoor attacks;large language models;natural language processing", "primary_area": "", "supplementary_material": "", "author": "Wencong You;Zayd Hammoudeh;Daniel Lowd", "authorids": "~Wencong_You1;~Zayd_Hammoudeh1;~Daniel_Lowd1", "gender": ";M;M", "homepage": ";https://zaydh.github.io/;http://ix.cs.uoregon.edu/~lowd", "dblp": ";204/2516;80/3901", "google_scholar": "2ICn8GAAAAAJ;bgFcdQkAAAAJ;IrcFO1AAAAAJ", "or_profile": "~Wencong_You1;~Zayd_Hammoudeh1;~Daniel_Lowd1", "aff": "University of Oregon;University of Oregon;University of Oregon", "aff_domain": "uoregon.edu;uoregon.edu;uoregon.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nyou2023large,\ntitle={Large Language Models Are Better Adversaries: Exploring Generative Clean-Label Backdoor Attacks Against Text Classifiers},\nauthor={Wencong You and Zayd Hammoudeh and Daniel Lowd},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=DTELCDufzE}\n}", "github": "", "project": "", "reviewers": "md88;SQ3s;qdhw", "site": "https://openreview.net/forum?id=DTELCDufzE", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;1", "excitement": "3;3;3", "reproducibility": "5;4;0", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Oregon", "aff_unique_dep": "", "aff_unique_url": "https://www.uoregon.edu", "aff_unique_abbr": "UO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "DTyMi3ReQU", "title": "You Are What You Annotate: Towards Better Models through Annotator Representations", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Annotator disagreement is ubiquitous in natural language processing (NLP) tasks. There are multiple reasons for such disagreements, including the subjectivity of the task, difficult cases, unclear guidelines, and so on. Rather than simply aggregating labels to obtain data annotations, we instead try to directly model the diverse perspectives of the annotators, and explicitly account for annotators' idiosyncrasies in the modeling process by creating representations for each annotator (*annotator embeddings*) and also their annotations (*annotation embeddings*). \nIn addition, we propose **TID-8**, **T**he **I**nherent **D**isagreement - **8** dataset, a benchmark that consists of eight existing language understanding datasets that have inherent annotator disagreement.\nWe test our approach on TID-8 and show that our approach helps models learn significantly better from disagreements on six different datasets in TID-8 while increasing model size by fewer than 1\\% parameters. \nBy capturing the unique tendencies and subjectivity of individual annotators through embeddings, our representations prime AI models to be inclusive of diverse viewpoints.", "keywords": "annotator disagreement;annotator representation", "primary_area": "", "supplementary_material": "", "author": "Naihao Deng;Xinliang Frederick Zhang;Siyang Liu;Winston Wu;Lu Wang;Rada Mihalcea", "authorids": "~Naihao_Deng1;~Xinliang_Frederick_Zhang1;~Siyang_Liu1;~Winston_Wu1;~Lu_Wang9;~Rada_Mihalcea1", "gender": "M;M;F;;F;F", "homepage": "https://dnaihao.github.io;https://web.eecs.umich.edu/~xlfzhang/;;;https://web.eecs.umich.edu/~wangluxy/;https://web.eecs.umich.edu/~mihalcea/", "dblp": "303/0640;277/5381;81/4071-3;;49/3800-8;m/RadaMihalcea", "google_scholar": "3_qUtH4AAAAJ;-uGCT5QAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;uczqEdUAAAAJ;https://scholar.google.com.tw/citations?user=UetM7FgAAAAJ", "or_profile": "~Naihao_Deng1;~Xinliang_Frederick_Zhang1;~Siyang_Liu1;~Winston_Wu1;~Lu_Wang9;~Rada_Mihalcea1", "aff": "University of Michigan - Ann Arbor;Bloomberg;University of Michigan - Ann Arbor;;University of Michigan;University of Michigan", "aff_domain": "umich.edu;bloomberg.net;umich.edu;;umich.edu;umich.edu", "position": "PhD student;Intern;PhD student;;Associate Professor;Full Professor", "bibtex": "@inproceedings{\ndeng2023you,\ntitle={You Are What You Annotate: Towards Better Models through Annotator Representations},\nauthor={Naihao Deng and Xinliang Frederick Zhang and Siyang Liu and Winston Wu and Lu Wang and Rada Mihalcea},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=DTyMi3ReQU}\n}", "github": "", "project": "", "reviewers": "6CyZ;Yi2j;nSUZ;Ejmf", "site": "https://openreview.net/forum?id=DTyMi3ReQU", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;4;5;3", "excitement": "1;3;4;4", "reproducibility": "3;3;3;4", "correctness": "2;3;3;3", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 3.0, "reproducibility_avg": 3.25, "correctness_avg": 2.75, "replies_avg": 13, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-0294-2897;;0009-0009-0377-8508;;;0000-0002-0767-6703", "linkedin": "naihao-deng/;frederick-x-zhang/?locale=en_US;;;;", "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "University of Michigan;Bloomberg", "aff_unique_dep": ";", "aff_unique_url": "https://www.umich.edu;https://www.bloomberg.com", "aff_unique_abbr": "UM;Bloomberg", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "DVDGNFn1Jm", "title": "Reinforced Target-driven Conversational Promotion", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The ability to proactively engage with users towards pitching products is highly desired for conversational assistants. However, existing conversational recommendation methods overemphasize on acquiring user preferences while ignore the strategic planning for nudging users towards accepting a designated item. Hence, these methods fail to promote specified items with engaging responses. In this work, we propose a Reinforced Target-driven Conversational Promotion (RTCP) framework for conversational promotion. RTCP integrates short-term and long-term planning via a balanced gating mechanism. Inside which, the dialogue actions are predicted via a knowledge-integrated multi-head attention and guided via reinforcement learning rewards. RTCP then employs action-guided prefix tuning to generate relevant responses. Experimental results demonstrate that our model outperforms state-of-the-art models on both automatic metrics and human evaluation. Moreover, RTCP has a strong capability in quickly adapting to unseen scenarios just by updating prefix parameters without re-training the whole model.", "keywords": "Conversational Promotion;Conversational Recommendation;Target-driven Recommenders", "primary_area": "", "supplementary_material": "", "author": "Huy Quang Dao;Lizi Liao;Dung D. Le;Yuxiang Nie", "authorids": "~Huy_Quang_Dao1;~Lizi_Liao1;~Dung_D._Le2;~Yuxiang_Nie1", "gender": "M;F;M;", "homepage": ";https://liziliao.github.io/;https://andrew-dungle.github.io/;https://jerrrynie.github.io/", "dblp": "213/1835.html;149/1249;186/1477;247/9594", "google_scholar": "sZhoEnYAAAAJ;https://scholar.google.com.sg/citations?user=W2b08EUAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Huy_Quang_Dao1;~Lizi_Liao1;~Dung_D._Le2;~Yuxiang_Nie1", "aff": "FPT Software;Singapore Management University;VinUniversity;Beijing Institute of Technology", "aff_domain": "fsoft.com.vn;smu.edu.sg;vinuni.edu.vn;bit.edu.cn", "position": "Researcher;Assistant Professor;Assistant Professor;MS student", "bibtex": "@inproceedings{\ndao2023reinforced,\ntitle={Reinforced Target-driven Conversational Promotion},\nauthor={Huy Quang Dao and Lizi Liao and Dung D. Le and Yuxiang Nie},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=DVDGNFn1Jm}\n}", "github": "", "project": "", "reviewers": "nCqR;N7Uc;pkX6", "site": "https://openreview.net/forum?id=DVDGNFn1Jm", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "4;3;3", "reproducibility": "4;4;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0009-0001-4197-1079", "linkedin": "dao-quang-huy-8ab6381a6/;;;%E5%AE%87%E7%BF%94-%E8%81%82-8a15b216a/", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "FPT Corporation;Singapore Management University;VinUniversity;Beijing Institute of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.fpt-software.com;https://www.smu.edu.sg;https://vinuni.edu.vn;http://www.bit.edu.cn/", "aff_unique_abbr": "FPT;SMU;VinUni;BIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2", "aff_country_unique": "Vietnam;Singapore;China" }, { "id": "DgB01RzOqo", "title": "Multilingual Large Language Models Are Not (Yet) Code-Switchers", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Multilingual Large Language Models (LLMs) have recently shown great capabilities in a wide range of tasks, exhibiting state-of-the-art performance through zero-shot or few-shot prompting methods. While there have been extensive studies on their abilities in monolingual tasks, the investigation of their potential in the context of code-switching (CSW), the practice of alternating languages within an utterance, remains relatively uncharted. In this paper, we provide a comprehensive empirical analysis of various multilingual LLMs, benchmarking their performance across four tasks: sentiment analysis, machine translation, summarization and word-level language identification. Our results indicate that despite multilingual LLMs exhibiting promising outcomes in certain tasks using zero or few-shot prompting, they still underperform in comparison to fine-tuned models of much smaller scales. We argue that current \"multilingualism\" in LLMs does not inherently imply proficiency with code-switching texts, calling for future research to bridge this discrepancy.", "keywords": "code-switching;benchmark;multilingual large language models", "primary_area": "", "supplementary_material": "", "author": "Ruochen Zhang;Samuel Cahyawijaya;Jan Christian Blaise Cruz;Genta Indra Winata;Alham Fikri Aji", "authorids": "~Ruochen_Zhang1;~Samuel_Cahyawijaya1;~Jan_Christian_Blaise_Cruz1;~Genta_Indra_Winata1;~Alham_Fikri_Aji1", "gender": ";M;M;M;M", "homepage": ";https://samuelcahyawijaya.github.io/;https://blaisecruz.com;https://gentawinata.com/;", "dblp": ";235/2988.html;244/2362;https://dblp.uni-trier.de/pers/hd/w/Winata:Genta_Indra;188/8762", "google_scholar": ";w5w_WZEAAAAJ;iBuxBEUAAAAJ;7QxkToIAAAAJ;0Cyfqv4AAAAJ", "or_profile": "~Ruochen_Zhang1;~Samuel_Cahyawijaya1;~Jan_Christian_Blaise_Cruz1;~Genta_Indra_Winata1;~Alham_Fikri_Aji1", "aff": ";Hong Kong University of Science and Technology;Samsung;Bloomberg;Amazon", "aff_domain": ";ust.hk;samsung.com;bloomberg.net;amazon.com", "position": ";PhD student;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nzhang2023multilingual,\ntitle={Multilingual Large Language Models Are Not (Yet) Code-Switchers},\nauthor={Ruochen Zhang and Samuel Cahyawijaya and Jan Christian Blaise Cruz and Genta Indra Winata and Alham Fikri Aji},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=DgB01RzOqo}\n}", "github": "", "project": "", "reviewers": "ajnS;dySR;RRwZ", "site": "https://openreview.net/forum?id=DgB01RzOqo", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;4;3", "reproducibility": "4;3;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-9891-1608;;;", "linkedin": ";samuelcahyawijaya/;;gentaiscool/;", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Hong Kong University of Science and Technology;Samsung;Bloomberg;Amazon", "aff_unique_dep": ";Samsung;;Amazon.com, Inc.", "aff_unique_url": "https://www.ust.hk;https://www.samsung.com;https://www.bloomberg.com;https://www.amazon.com", "aff_unique_abbr": "HKUST;Samsung;Bloomberg;Amazon", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;2;2", "aff_country_unique": "China;South Korea;United States" }, { "id": "DgNnVebNPy", "title": "LLMs -- the Good, the Bad or the Indispensable?: A Use Case on Legal Statute Prediction and Legal Judgment Prediction on Indian Court Cases", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "The Large Language Models (LLMs) have impacted many real-life tasks. To examine the efficacy of LLMs in a high-stake domain like law, we have applied state-of-the-art LLMs for two popular tasks: Statute Prediction and Judgment Prediction, on Indian Supreme Court cases. We see that while LLMs exhibit excellent predictive performance in Statute Prediction, their performance dips in Judgment Prediction when compared with many standard models. The explanations generated by LLMs (along with prediction) are of moderate to decent quality. We also see evidence of gender and religious bias in the LLM-predicted results. In addition, we present a note from a senior legal expert on the ethical concerns of deploying LLMs in these critical legal tasks.", "keywords": "Legal judgement prediction;Legal Statute prediction;LLMs;explainability;bias;fairness;ethics", "primary_area": "", "supplementary_material": "", "author": "Shaurya Vats;Atharva Zope;Somsubhra De;Anurag Sharma;Upal Bhattacharya;Shubham Kumar Nigam;Shouvik Kumar Guha;Koustav Rudra;Kripabandhu Ghosh", "authorids": "~Shaurya_Vats1;~Atharva_Zope1;~Somsubhra_De1;~Anurag_Sharma2;~Upal_Bhattacharya1;~Shubham_Kumar_Nigam1;~Shouvik_Kumar_Guha1;~Koustav_Rudra1;~Kripabandhu_Ghosh1", "gender": "M;;M;M;M;M;M;M;M", "homepage": ";;;;https://github.com/upalbhattacharya;https://www.cse.iitk.ac.in/users/sknigam/;https://www.nujs.edu/faculty/shouvik-kumar-guha.html;https://sites.google.com/view/krudra5/;https://sites.google.com/view/kripabandhughosh-homepage/home", "dblp": ";;362/8583.html;;312/2717;256/7932;293/9241.html;139/5123.html;74/10289", "google_scholar": ";;;https://scholar.google.com/citations?hl=en;;73t3-rQAAAAJ;QhSkbD0AAAAJ;https://scholar.google.co.in/citations?user=t9LUAUQAAAAJ;https://scholar.google.co.in/citations?user=6CszmskAAAAJ", "or_profile": "~Shaurya_Vats1;~Atharva_Zope1;~Somsubhra_De1;~Anurag_Sharma2;~Upal_Bhattacharya1;~Shubham_Kumar_Nigam1;~Shouvik_Kumar_Guha1;~Koustav_Rudra1;~Kripabandhu_Ghosh1", "aff": "Indian Institute of Technology Kharagpur ;Indian Institute of Science Education & Research (IISER), Kolkata;Indian Institute of Science Education and Research (IISER), Kolkata;IISER Kolkata;Indian Institute of Science Education and Research Kolkata;IIT Kanpur;The West Bengal National University of Juridical Sciences;Indian Institute of Technology (ISM) Dhanbad;Indian Institute of Science Education and Research Kolkata", "aff_domain": "iitkgp.ac.in;iiserkol.ac.in;iiserkol.ac.in;iiserkol.ac.in;iiserkol.ac.in;iitk.ac.in;nujs.edu;iitism.ac.in;iiserkol.ac.in", "position": "Undergrad student;Undergrad student;Intern;MS student;Researcher;PhD student;Associate Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nvats2023llms,\ntitle={{LLM}s -- the Good, the Bad or the Indispensable?: A Use Case on Legal Statute Prediction and Legal Judgment Prediction on Indian Court Cases},\nauthor={Shaurya Vats and Atharva Zope and Somsubhra De and Anurag Sharma and Upal Bhattacharya and Shubham Kumar Nigam and Shouvik Kumar Guha and Koustav Rudra and Kripabandhu Ghosh},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=DgNnVebNPy}\n}", "github": "", "project": "", "reviewers": "8zfw;ZNGP;Je4i", "site": "https://openreview.net/forum?id=DgNnVebNPy", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;4;4", "excitement": "4;3;2", "reproducibility": "5;3;2", "correctness": "3;3;2", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0003-0858-3725;0000-0003-2564-7866;0000-0003-1623-0444;0000-0002-2486-7608;", "linkedin": "shaurya-vats-6a521a231;atharva-zope-21862827a;somsubhrad/;anuragsharma321/;;shubham-kumar-nigam-34670541/;shouvik-kumar-guha-b9396010;;", "aff_unique_index": "0;1;2;2;2;3;4;5;2", "aff_unique_norm": "Indian Institute of Technology Kharagpur;Indian Institute of Science Education & Research;Indian Institute of Science Education and Research;Indian Institute of Technology Kanpur;West Bengal National University of Juridical Sciences;Indian Institute of Technology Dhanbad", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.iitkgp.ac.in;https://www.iiserkol.ac.in;https://www.iiserkol.ac.in;https://www.iitk.ac.in;https://www.wbnuls.ac.in;https://www.iitdh.ac.in", "aff_unique_abbr": "IIT Kharagpur;IISER Kolkata;IISER Kolkata;IITK;WBNUSJ;IIT (ISM) Dhanbad", "aff_campus_unique_index": "0;1;1;1;1;2;4;1", "aff_campus_unique": "Kharagpur;Kolkata;Kanpur;;Dhanbad", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "India" }, { "id": "Dil6z5sZkD", "title": "Adversarial Robustness for Large Language NER models using Disentanglement and Word Attributions", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models (LLM's) have been widely used for several applications such as question answering, text classification and clustering. While the preliminary results across the aforementioned tasks looks promising, recent work has dived deep into LLM's performing poorly for complex Named Entity Recognition (NER) tasks in comparison to fine-tuned pre-trained language models (PLM's). To enhance wider adoption of LLM's, our paper investigates the robustness of such LLM NER models and its instruction fine-tuned variants to adversarial attacks. In particular, we propose a novel attack which relies on disentanglement and word attribution techniques where the former aids in learning an embedding capturing both entity and non-entity influences separately, and the latter aids in identifying important words across both components. This is in stark contrast to most techniques which primarily leverage non-entity words for perturbations limiting the space being explored to synthesize effective adversarial examples. Adversarial training results based on our method improves the F1 score over original LLM NER model by 8% and 18% on CoNLL-2003 and Ontonotes 5.0 datasets respectively.", "keywords": "Adversarial Robustness;Adversarial Attacks;Named Entity Recognition;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "Xiaomeng Jin;Bhanukiran Vinzamuri;Sriram Venkatapathy;Heng Ji;Pradeep Natarajan", "authorids": "~Xiaomeng_Jin3;~Bhanukiran_Vinzamuri1;~Sriram_Venkatapathy1;~Heng_Ji3;~Pradeep_Natarajan1", "gender": "F;;M;F;M", "homepage": ";https://researcher.watson.ibm.com/researcher/view.php?person=ibm-Bhanu.Vinzamuri;;http://blender.cs.illinois.edu/hengji.html;", "dblp": ";97/10151;91/1751;;95/5978.html", "google_scholar": "Jd_tsuEAAAAJ;JbI-zokAAAAJ;;z7GCqT4AAAAJ;E1IdmqwAAAAJ", "or_profile": "~Xiaomeng_Jin3;~Bhanukiran_Vinzamuri1;~Sriram_Venkatapathy1;~Heng_Ji3;~Pradeep_Natarajan1", "aff": "University of Illinois, Urbana Champaign;Amazon;;University of Illinois, Urbana-Champaign;Amazon", "aff_domain": "illinois.edu;amazon.com;;uiuc.edu;amazon.com", "position": "PhD student;Applied Scientist;;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\njin2023adversarial,\ntitle={Adversarial Robustness for Large Language {NER} models using Disentanglement and Word Attributions},\nauthor={Xiaomeng Jin and Bhanukiran Vinzamuri and Sriram Venkatapathy and Heng Ji and Pradeep Natarajan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Dil6z5sZkD}\n}", "github": "", "project": "", "reviewers": "MbYs;RiQ4;Yix7;fP4e", "site": "https://openreview.net/forum?id=Dil6z5sZkD", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "2;4;4;3", "excitement": "4;3;2;2", "reproducibility": "4;4;4;4", "correctness": "4;4;4;3", "rating_avg": 3.0, "confidence_avg": 3.25, "excitement_avg": 2.75, "reproducibility_avg": 4.0, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;;", "aff_unique_index": "0;1;2;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;Amazon;University of Illinois", "aff_unique_dep": ";Amazon.com, Inc.;", "aff_unique_url": "https://illinois.edu;https://www.amazon.com;https://illinois.edu", "aff_unique_abbr": "UIUC;Amazon;UIUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "DjwSceRw7B", "title": "Macedon: Minimizing Representation Coding Rate Reduction for Cross-Lingual Natural Language Understanding", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Cross-lingual natural language understanding(NLU) is one of the fundamental tasks of NLP. The goal is to learn a model which can generalize well on both high-resource and low-resource language data. Recent pre-trained multilingual language models, e.g., multilingual BERT, XLM, have shown impressive performance on cross-lingual NLU tasks. However, such promising results request the use of sufficient training data, which is a difficult condition to satisfy for low-resource language.\nWhen the data is limited in those low resource languages, the accuracy of existing models will drop. In light of this challenge, we investigate the important task of how to train the cross-lingual model with abundant high-source language data and limited low-resource language data. Existing methods typically learn language-agnostic representation via adversarial training and mutual information estimation. Existing approaches may suffer When data is very limited (e.g., low-resource language) because it is challenging to estimate data distribution accurately. To tackle this issue, we propose a conceptually innovative approach to remove language-associated information via \\textbf{m}inimizing represent\\textbf{a}tion \\textbf{c}oding rate r\\textbf{ed}ucti\\textbf{on}(Macedon). Specifically, Macedon avoids using extra codes to encode language-related information, which is measured by the rate-distortion function. To validate the effectiveness of Macedon, we conduct extensive experiments on three tasks, including paraphrase identification, natural language inference, and query advertisement matching. The\nexperiment results show that the proposed Macedon outperforms state-of-the-art cross-lingual NLU approaches.", "keywords": "cross-lingual;rate reduction", "primary_area": "", "supplementary_material": "", "author": "Haoyu Wang;Yaqing Wang;Huaxiu Yao;Jing Gao", "authorids": "~Haoyu_Wang6;~Yaqing_Wang1;~Huaxiu_Yao1;~Jing_Gao2", "gender": "M;M;M;F", "homepage": "https://sites.google.com/view/haoyuwang/home;https://yaqingwang.github.io/;http://huaxiuyao.mystrikingly.com;https://engineering.purdue.edu/~jinggao/", "dblp": "50/8499-4;147/1393;197/1635;67/4834-4", "google_scholar": "https://scholar.google.com.hk/citations?user=5Lw9_jcAAAAJ;_Rfg2CAAAAAJ;A20BZnQAAAAJ;Ftj1h4cAAAAJ", "or_profile": "~Haoyu_Wang6;~Yaqing_Wang1;~Huaxiu_Yao1;~Jing_Gao2", "aff": "Purdue University;Research, Google;Computer Science Department, Stanford University;Purdue University", "aff_domain": "purdue.edu;research.google.com;cs.stanford.edu;purdue.edu", "position": "PhD student;Research Scientist;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nwang2023macedon,\ntitle={Macedon: Minimizing Representation Coding Rate Reduction for Cross-Lingual Natural Language Understanding},\nauthor={Haoyu Wang and Yaqing Wang and Huaxiu Yao and Jing Gao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=DjwSceRw7B}\n}", "github": "", "project": "", "reviewers": "C8eb;hT3i;uCfL;AJrg", "site": "https://openreview.net/forum?id=DjwSceRw7B", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;3;3", "excitement": "3;3;4;3", "reproducibility": "3;3;4;1", "correctness": "3;3;4;3", "rating_avg": 3.0, "confidence_avg": 3.5, "excitement_avg": 3.25, "reproducibility_avg": 2.75, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-7485-6213;;;", "linkedin": ";;huaxiuyao/;", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Purdue University;Google;Stanford University", "aff_unique_dep": ";Google Research;Computer Science Department", "aff_unique_url": "https://www.purdue.edu;https://research.google;https://www.stanford.edu", "aff_unique_abbr": "Purdue;Google;Stanford", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Mountain View;Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "Dl3YgoOh2c", "title": "Identifying Statements Crucial for Awareness of Interpretive Nonsense to Prevent Communication Breakdowns", "track": "main", "status": "Long Main", "tldr": "", "abstract": "During remote conversations, communication breakdowns often occur when a listener misses certain statements. Our objective is to prevent such breakdowns by identifying Statements Crucial for Awareness of Interpretive Nonsense (SCAINs). If a listener misses a SCAIN, s/he may interpret subsequent statements differently from the speaker's intended meaning. To identify SCAINs, we adopt a unique approach where we create a dialogue by omitting two consecutive statements from the original dialogue and then generate text to make the following statement more specific. The novelty of the proposed method lies in simulating missing information by processing text with omissions. We validate the effectiveness of SCAINs through evaluation using a dialogue dataset. Furthermore, we demonstrate that SCAINs cannot be identified as merely important statements, highlighting the uniqueness of our proposed method.", "keywords": "Communication breakdowns;dialogue;context;rephrasing;language model", "primary_area": "", "supplementary_material": "", "author": "Tomoyuki Maekawa;Michita Imai", "authorids": "~Tomoyuki_Maekawa1;~Michita_Imai1", "gender": ";M", "homepage": ";https://www.ailab.ics.keio.ac.jp/", "dblp": "204/2318;69/6753", "google_scholar": ";lYqJ_FsAAAAJ", "or_profile": "~Tomoyuki_Maekawa1;~Michita_Imai1", "aff": "Keio University;Keio University, Tokyo Institute of Technology", "aff_domain": "keio.ac.jp;keio.ac.jp", "position": "Postdoc;Full Professor", "bibtex": "@inproceedings{\nmaekawa2023identifying,\ntitle={Identifying Statements Crucial for Awareness of Interpretive Nonsense to Prevent Communication Breakdowns},\nauthor={Tomoyuki Maekawa and Michita Imai},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Dl3YgoOh2c}\n}", "github": "", "project": "", "reviewers": "e24W;qqvW;g4QE", "site": "https://openreview.net/forum?id=Dl3YgoOh2c", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;3", "excitement": "4;2;3", "reproducibility": "3;2;4", "correctness": "4;2;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0003-0350-1011;", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Keio University", "aff_unique_dep": "", "aff_unique_url": "https://www.keio.ac.jp", "aff_unique_abbr": "Keio", "aff_campus_unique_index": "1", "aff_campus_unique": ";Tokyo", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "id": "DlQeSfGYfS", "title": "Focus Your Attention (with Adaptive IIR Filters)", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We present a new layer in which dynamic (i.e., input-dependent) Infinite Impulse Response (IIR) filters of order two are used to process the input sequence prior to applying conventional attention. The input is split into chunks, and the coefficients of these filters are determined based on previous chunks to maintain causality. Despite their relatively low order, the causal adaptive filters are shown to focus attention on the relevant sequence elements. The new layer is grounded in control theory, and is shown to generalize diagonal state-space layers. The layer performs on-par with state-of-the-art networks, with a fraction of their parameters and with time complexity that is sub-quadratic with input size. The obtained layer is favorable\nto layers such as Heyna, GPT2, and Mega, both with respect to the number of parameters and the obtained level of performance on multiple long-range sequence problems.", "keywords": "IIR Filters;dynamic Filtering;Transformers", "primary_area": "", "supplementary_material": "", "author": "Shahar Lutati;Itamar Zimerman;Lior Wolf", "authorids": "~Shahar_Lutati1;~Itamar_Zimerman1;~Lior_Wolf1", "gender": "M;M;M", "homepage": ";;http://www.cs.tau.ac.il/~wolf", "dblp": "292/4297;294/8621;83/4103", "google_scholar": "https://scholar.google.co.il/citations?user=S8gPxXUAAAAJ;01s_DpwAAAAJ;UbFrXTsAAAAJ", "or_profile": "~Shahar_Lutati1;~Itamar_Zimerman1;~Lior_Wolf1", "aff": "Tel Aviv University;International Business Machines;Tel Aviv University", "aff_domain": "tau.ac.il;ibm.com;tau.ac.il", "position": "PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nlutati2023focus,\ntitle={Focus Your Attention (with Adaptive {IIR} Filters)},\nauthor={Shahar Lutati and Itamar Zimerman and Lior Wolf},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=DlQeSfGYfS}\n}", "github": "", "project": "", "reviewers": "EHXv;Wq8Q;K69V", "site": "https://openreview.net/forum?id=DlQeSfGYfS", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "4;3;4", "reproducibility": "3;4;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-8321-0609;0000-0001-5578-8892", "linkedin": "shahar-lutati-4b4863118/;;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Tel Aviv University;International Business Machines Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.tau.ac.il;https://www.ibm.com", "aff_unique_abbr": "TAU;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Israel;United States" }, { "id": "DmrIEHJxN5", "title": "From Complex to Simple: Unraveling the Cognitive Tree for Reasoning with Small Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Reasoning is a distinctive human capacity, enabling us to address complex problems by breaking them down into a series of manageable cognitive steps. Yet, complex logical reasoning is still cumbersome for language models. Based on the dual process theory in cognitive science, we are the first to unravel the cognitive reasoning abilities of language models. Our framework employs an iterative methodology to construct a Cognitive Tree (CogTree). The root node of this tree represents the initial query, while the leaf nodes consist of straightforward questions that can be answered directly. This construction involves two main components: the implicit extraction module (referred to as the intuitive system) and the explicit reasoning module (referred to as the reflective system). The intuitive system rapidly generates multiple responses by utilizing in-context examples, while the reflective system scores these responses using comparative learning. The scores guide the intuitive system in its subsequent generation step.Our experimental results on two popular and challenging reasoning tasks indicate that it is possible to achieve a performance level comparable to that of GPT-3.5 (with 175B parameters), using a significantly smaller language model that contains fewer parameters (<=7B) than 5% of GPT-3.5.", "keywords": "Reasoning;Cognition;Dual Process Theory", "primary_area": "", "supplementary_material": "", "author": "Yan Junbing;Chengyu Wang;Taolin Zhang;Xiaofeng He;Jun Huang;Wei Zhang", "authorids": "~Yan_Junbing1;~Chengyu_Wang1;~Taolin_Zhang2;~Xiaofeng_He2;~Jun_Huang4;~Wei_Zhang27", "gender": ";M;;M;M;M", "homepage": ";https://chywang.github.io/;;;;https://weizhangltt.github.io/", "dblp": ";135/5147-1;;;51/5022-7;10/4661-56", "google_scholar": ";_AVfRnQAAAAJ;;;;DKcduF0AAAAJ", "or_profile": "~Yan_Junbing1;~Chengyu_Wang1;~Taolin_Zhang2;~Xiaofeng_He2;~Jun_Huang4;~Wei_Zhang27", "aff": ";Alibaba Group;;East China Normal University;Alibaba Group;East China Normal University", "aff_domain": ";alibaba-inc.com;;ecnu.edu.cn;alibaba.com;ecnu.edu.cn", "position": ";Researcher;;Full Professor;Researcher;Full Professor", "bibtex": "@inproceedings{\njunbing2023from,\ntitle={From Complex to Simple: Unraveling the Cognitive Tree for Reasoning with Small Language Models},\nauthor={Yan Junbing and Chengyu Wang and Taolin Zhang and Xiaofeng He and Jun Huang and Wei Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=DmrIEHJxN5}\n}", "github": "", "project": "", "reviewers": "UpNQ;BP1d;gKx3", "site": "https://openreview.net/forum?id=DmrIEHJxN5", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;4;4", "excitement": "4;4;2", "reproducibility": "3;4;4", "correctness": "3;4;2", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-6911-348X;;0000-0001-6763-8146", "linkedin": ";;;;https://www.linkedin.cn/injobs/in/%E4%BF%8A-%E9%BB%84-b28b6612b;", "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Alibaba Group;East China Normal University", "aff_unique_dep": ";", "aff_unique_url": "https://www.alibaba.com;http://www.ecnu.edu.cn", "aff_unique_abbr": "Alibaba;ECNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "DpNUrB6SeZ", "title": "Multi-Source Multi-Type Knowledge Exploration and Exploitation for Dialogue Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Open-domain multi-turn dialogue generation encounters the significant challenge of lacking various types of knowledge from diverse sources. Existing models typically focus on identifying specific types of dialogue knowledge and utilize corresponding datasets for training. However, this approach often leads to limited generalization capabilities and increased computational resource requirements. Recently, large language models (LLMs) have shown impressive performance on natural language processing tasks. To harness the knowledge storage of LLMs, we propose a framework named KnowEE that explores multi-source multi-type knowledge from LLMs by leveraging diverse datasets and then exploits the obtained knowledge for response generation. Our framework comprises two phases: First, we leverage five external datasets encompassing various types of knowledge to extract the most relevant samples to the dialogue context which are served as prompts to generate corresponding type of knowledge; Second, we inject the acquired knowledge into the ongoing dialogue context in fine-grained and coarse-grained manners, which is then fed into LLMs to generate the final dialogue response. Both automatic and manual evaluation results validate the effectiveness of our framework in exploring and exploiting multi-source multi-type knowledge to generate coherent, informative, and fluent responses.", "keywords": "Dialogue Generation;Natural Language Processing;Dialogue Knowledge", "primary_area": "", "supplementary_material": "", "author": "Xuanfan Ni;Hongliang Dai;Zhaochun Ren;Piji Li", "authorids": "~Xuanfan_Ni2;~Hongliang_Dai1;~Zhaochun_Ren1;~Piji_Li1", "gender": "M;M;M;M", "homepage": "https://patrick-ni.github.io/;;https://renzhaochun.github.io/;http://lipiji.com/", "dblp": ";;58/10440;77/8278.html", "google_scholar": "https://scholar.google.com.hk/citations?user=LgeFCiQAAAAJ;;fPcIPt0AAAAJ;88ZlyicAAAAJ", "or_profile": "~Xuanfan_Ni2;~Hongliang_Dai1;~Zhaochun_Ren1;~Piji_Li1", "aff": "Nanjing University of Aeronautics and Astronautics;Nanjing University of Aeronautics and Astronautics;Shandong University;Nanjing University of Aeronautics and Astronautics", "aff_domain": "nuaa.edu.cn;nuaa.edu.cn;sdu.edu.cn;nuaa.edu.cn", "position": "MS student;Assistant Professor;Full Professor;Professor", "bibtex": "@inproceedings{\nni2023multisource,\ntitle={Multi-Source Multi-Type Knowledge Exploration and Exploitation for Dialogue Generation},\nauthor={Xuanfan Ni and Hongliang Dai and Zhaochun Ren and Piji Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=DpNUrB6SeZ}\n}", "github": "", "project": "", "reviewers": "keZg;eFw1;Q3qA", "site": "https://openreview.net/forum?id=DpNUrB6SeZ", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "3;4;3", "reproducibility": "4;4;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-9076-6565;", "linkedin": ";;zhaochun-ren-460491296/?locale=nl_NL;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Nanjing University of Aeronautics and Astronautics;Shandong University", "aff_unique_dep": ";", "aff_unique_url": "http://www.nuaa.edu.cn;http://www.sdu.edu.cn", "aff_unique_abbr": "NUAA;SDU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "Dq023aV4Ih", "title": "Hallucination Mitigation in Natural Language Generation from Large-Scale Open-Domain Knowledge Graphs", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In generating natural language descriptions for knowledge graph triples, prior works used either small-scale, human-annotated datasets or datasets with limited variety of graph shapes, e.g., those having mostly star graphs. Graph-to-text models trained and evaluated on such datasets are largely not assessed for more realistic large-scale, open-domain settings. We introduce a new dataset, GraphNarrative, to fill this gap. Fine-tuning transformer-based pre-trained language models has achieved state-of-the-art performance among graph-to-text models. However, this method suffers from information hallucination---the generated text may contain fabricated facts not present in input graphs. We propose a novel approach that, given a graph-sentence pair in GraphNarrative, trims the sentence to eliminate portions that are not present in the corresponding graph, by utilizing the sentence's dependency parse tree. Our experiment results verify this approach using models trained on GraphNarrative and existing datasets. The dataset, source code, and trained models are released at https://github.com/idirlab/graphnarrator.", "keywords": "graph-to-text generation;knowledge graphs", "primary_area": "", "supplementary_material": "", "author": "Xiao Shi;Zhengyuan Zhu;Zeyu Zhang;Chengkai Li", "authorids": "~Xiao_Shi2;~Zhengyuan_Zhu2;~Zeyu_Zhang10;~Chengkai_Li1", "gender": "F;M;M;M", "homepage": ";https://824zzy.github.io/;;http://ranger.uta.edu/~cli/", "dblp": ";;;14/3692", "google_scholar": "4r7AJPMAAAAJ;;ncDgBMEAAAAJ;https://scholar.google.com.tw/citations?user=v8ZQDf8AAAAJ", "or_profile": "~Xiao_Shi2;~Zhengyuan_Zhu2;~Zeyu_Zhang10;~Chengkai_Li1", "aff": "University of Texas at Arlington;University of Texas at Arlington;University of Texas at Arlington, University of Texas at Arlington;University of Texas at Arlington", "aff_domain": "mavs.uta.edu;cse.uta.edu;mavs.uta.edu;uta.edu", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nshi2023hallucination,\ntitle={Hallucination Mitigation in Natural Language Generation from Large-Scale Open-Domain Knowledge Graphs},\nauthor={Xiao Shi and Zhengyuan Zhu and Zeyu Zhang and Chengkai Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Dq023aV4Ih}\n}", "github": "", "project": "", "reviewers": "hPGF;RttK;TcQj", "site": "https://openreview.net/forum?id=Dq023aV4Ih", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;4", "excitement": "3;4;3", "reproducibility": "4;4;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0005-8884-376X;0009-0002-8492-0551;0009-0007-8010-0544;", "linkedin": "xiao-shi-66299116a/;;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Texas at Arlington", "aff_unique_dep": "", "aff_unique_url": "https://www.uta.edu", "aff_unique_abbr": "UTA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Arlington;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "Dqg9SLOXZu", "title": "Multi-Source Probing for Open-Domain Conversational Understanding", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Dialogue comprehension and generation are vital to the success of open-domain dialogue systems. Although pre-trained generative conversation models have made significant progress in generating fluent responses, people have difficulty judging whether they understand and efficiently model the contextual information of the conversation. In this study, we propose a Multi-Source Probing (MSP) method to probe the dialogue comprehension abilities of open-domain dialogue models. MSP aggregates features from multiple sources to accomplish diverse task goals and conducts downstream tasks in a generative manner that is consistent with dialogue model pre-training to leverage model capabilities. We conduct probing experiments on seven tasks that require various dialogue comprehension skills, based on the internal representations encoded by dialogue models. Experimental results show that open-domain dialogue models can encode semantic information in the intermediate hidden states, which facilitates dialogue comprehension tasks. Models of different scales and structures possess different conversational understanding capabilities. Our findings encourage a comprehensive evaluation and design of open-domain dialogue models.", "keywords": "Open-domain dialogue systems;Dialogue comprehension;Dialogue probing;Prompt Learning", "primary_area": "", "supplementary_material": "", "author": "Yuanxi Li;Hao Zhou;Jie Zhou;Minlie Huang", "authorids": "~Yuanxi_Li1;~Hao_Zhou8;~Jie_Zhou8;~Minlie_Huang1", "gender": "M;M;M;M", "homepage": "https://yuanxili1.github.io/;;;http://coai.cs.tsinghua.edu.cn/hml", "dblp": ";;00/5012-16;", "google_scholar": ";q3WaozcAAAAJ;https://scholar.google.com.hk/citations?user=OijxQCMAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Yuanxi_Li1;~Hao_Zhou8;~Jie_Zhou8;~Minlie_Huang1", "aff": "University of Illinois, Urbana Champaign;Tencent;WeChat AI, Tencent Inc.;Tsinghua University", "aff_domain": "illinois.edu;tencent.com;tencent.com;tsinghua.edu.cn", "position": "Undergrad student;Researcher;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nli2023multisource,\ntitle={Multi-Source Probing for Open-Domain Conversational Understanding},\nauthor={Yuanxi Li and Hao Zhou and Jie Zhou and Minlie Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Dqg9SLOXZu}\n}", "github": "", "project": "", "reviewers": "i7vA;vySG;PXn6;qTdP", "site": "https://openreview.net/forum?id=Dqg9SLOXZu", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "2;4;4;3", "excitement": "2;4;3;2", "reproducibility": "5;4;4;3", "correctness": "4;3;3;3", "rating_avg": 4.0, "confidence_avg": 3.25, "excitement_avg": 2.75, "reproducibility_avg": 4.0, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-5899-5165;", "linkedin": "yuanxi-ewan-li/;;;", "aff_unique_index": "0;1;1;2", "aff_unique_norm": "University of Illinois Urbana-Champaign;Tencent;Tsinghua University", "aff_unique_dep": ";Tencent Holdings Limited;", "aff_unique_url": "https://illinois.edu;https://www.tencent.com;https://www.tsinghua.edu.cn", "aff_unique_abbr": "UIUC;Tencent;THU", "aff_campus_unique_index": "0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;China" }, { "id": "DxYDP3B31K", "title": "Enhancing Generative Retrieval with Reinforcement Learning from Relevance Feedback", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The recent advent of end-to-end generative retrieval marks a significant shift in document retrieval methods, leveraging differentiable search indexes to directly produce relevant document identifiers (docids) in response to a specific query. Nevertheless, this approach faces two fundamental challenges: (i) a discrepancy between the token-level probabilistic optimization and the broader document-level relevance estimation; (ii) an overemphasis on top-1 results at the expense of overall ranking quality. To tackle these challenges, we propose a generative retrieval model with reinforcement learning from relevance feedback, which aims to align token-level docid generation with document-level relevance estimation. The training process incorporates three stages: supervised fine-tuning, relevance reward model training, and reinforced learning-to-rank from relevance feedback. To train a high-quality reward model, we define \"relevance\" under three progressive scenarios, which collectively offer a comprehensive evaluation of the document relevance. Experiments conducted on two benchmark datasets demonstrate the effectiveness of our proposed approach.", "keywords": "Generative retrieval; Reinforcement learning; Document retrieval;", "primary_area": "", "supplementary_material": "", "author": "Yujia Zhou;Zhicheng Dou;Ji-Rong Wen", "authorids": "~Yujia_Zhou1;~Zhicheng_Dou1;~Ji-Rong_Wen1", "gender": "M;;M", "homepage": "https://www.zhouyujia.cn/;https://playbigdata.ruc.edu.cn/dou;https://gsai.ruc.edu.cn/english/jrwen", "dblp": "https://dblp.uni-trier.de/pid/166/2544-2.html;18/5740;w/JRWen", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;ChCjAAwAAAAJ;tbxCHJgAAAAJ", "or_profile": "~Yujia_Zhou1;~Zhicheng_Dou1;~Ji-Rong_Wen1", "aff": ";Renmin University of China;Renmin University of China", "aff_domain": ";ruc.edu.cn;ruc.edu.cn", "position": ";Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhou2023enhancing,\ntitle={Enhancing Generative Retrieval with Reinforcement Learning from Relevance Feedback},\nauthor={Yujia Zhou and Zhicheng Dou and Ji-Rong Wen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=DxYDP3B31K}\n}", "github": "", "project": "", "reviewers": "q2Hf;pWtF;UusY", "site": "https://openreview.net/forum?id=DxYDP3B31K", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;2;2", "excitement": "4;4;4", "reproducibility": "3;4;3", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 2.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-3530-3787;0000-0002-9781-948X;0000-0002-9777-9676", "linkedin": ";;", "aff_unique_index": "0;0", "aff_unique_norm": "Renmin University of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ruc.edu.cn", "aff_unique_abbr": "RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "Dy2mbQIdMz", "title": "DetectLLM: Leveraging Log Rank Information for Zero-Shot Detection of Machine-Generated Text", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "With the rapid progress of Large language models (LLMs) and the huge amount of text they generate, it becomes impractical to manually distinguish whether a text is machine-generated. The growing use of LLMs in social media and education, prompts us to develop methods to detect machine-generated text, preventing malicious use such as plagiarism, misinformation, and propaganda. In this paper, we introduce two novel zero-shot methods for detecting machine-generated text by leveraging the Log-Rank information. One is called DetectLLM-LRR, which is fast and efficient, and the other is called DetectLLM-NPR, which is more accurate, but slower due to the need for perturbations. Our experiments on three datasets and seven language models show that our proposed methods improve over the state of the art by 3.9 and 1.75 AUROC points absolute. Moreover, DetectLLM-NPR needs fewer perturbations than previous work to achieve the same level of performance, which makes it more practical for real-world use. We also investigate the efficiency-performance trade-off based on users' preference for these two measures and provide intuition for using them in practice effectively. We release the data and the code of both methods in https://github.com/mbzuai-nlp/DetectLLM.", "keywords": "Machine-generated Text; Large Language models; LLMs; zero-shot", "primary_area": "", "supplementary_material": "", "author": "Jinyan Su;Terry Yue Zhuo;Di Wang;Preslav Nakov", "authorids": "~Jinyan_Su1;~Terry_Yue_Zhuo1;~Di_Wang1;~Preslav_Nakov2", "gender": "F;;;M", "homepage": "https://jinyansu1.github.io/;;;https://mbzuai.ac.ae/study/faculty/preslav-nakov/", "dblp": ";;;https://dblp.uni-trier.de/pid/19/1947", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;;DfXsKZ4AAAAJ", "or_profile": "~Jinyan_Su1;~Terry_Yue_Zhuo1;~Di_Wang1;~Preslav_Nakov2", "aff": "Cornell University;;;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "cornell.edu;;;mbzuai.ac.ae", "position": "PhD student;;;Full Professor", "bibtex": "@inproceedings{\nsu2023detectllm,\ntitle={Detect{LLM}: Leveraging Log Rank Information for Zero-Shot Detection of Machine-Generated Text},\nauthor={Jinyan Su and Terry Yue Zhuo and Di Wang and Preslav Nakov},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Dy2mbQIdMz}\n}", "github": "", "project": "", "reviewers": "712P;CKfz;1rWH", "site": "https://openreview.net/forum?id=Dy2mbQIdMz", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;5;1", "excitement": "3;3;4", "reproducibility": "4;4;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-3600-1510", "linkedin": ";;;preslavnakov/", "aff_unique_index": "0;1", "aff_unique_norm": "Cornell University;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.cornell.edu;https://mbzuai.ac.ae", "aff_unique_abbr": "Cornell;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;United Arab Emirates" }, { "id": "DzCc4mpH1m", "title": "Faster Minimum Bayes Risk Decoding with Confidence-based Pruning", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Minimum Bayes risk (MBR) decoding outputs the hypothesis with the highest expected utility over the model distribution for some utility function. It has been shown to improve accuracy over beam search in conditional language generation problems and especially neural machine translation, in both human and automatic evaluations. However, the standard sampling-based algorithm for MBR is substantially more computationally expensive than beam search, requiring a large number of samples as well as a quadratic number of calls to the utility function, limiting its applicability. We describe an algorithm for MBR which gradually grows the number of samples used to estimate the utility while pruning hypotheses that are unlikely to have the highest utility according to confidence estimates obtained with bootstrap sampling. Our method requires fewer samples and drastically reduces the number of calls to the utility function compared to standard MBR while being statistically indistinguishable in terms of accuracy. We demonstrate the effectiveness of our approach in experiments on three language pairs, using chrF++ and COMET as utility/evaluation metrics.", "keywords": "Machine translation;minimum Bayes risk decoding", "primary_area": "", "supplementary_material": "", "author": "Julius Cheng;Andreas Vlachos", "authorids": "~Julius_Cheng1;~Andreas_Vlachos1", "gender": "M;M", "homepage": ";http://andreasvlachos.github.io/", "dblp": ";18/1071-1", "google_scholar": ";https://scholar.google.es/citations?user=XjWnyM4AAAAJ", "or_profile": "~Julius_Cheng1;~Andreas_Vlachos1", "aff": "University of Cambridge;University of Cambridge", "aff_domain": "cam.ac.uk;cam.ac.uk", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\ncheng2023faster,\ntitle={Faster Minimum Bayes Risk Decoding with Confidence-based Pruning},\nauthor={Julius Cheng and Andreas Vlachos},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=DzCc4mpH1m}\n}", "github": "", "project": "", "reviewers": "urSQ;JEUK;GsRs", "site": "https://openreview.net/forum?id=DzCc4mpH1m", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;5", "excitement": "4;4;4", "reproducibility": "3;4;3", "correctness": "5;5;4", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.666666666666667, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-2123-5071", "linkedin": "julius-cheng-0040799/;andreas-vlachos-70ab391", "aff_unique_index": "0;0", "aff_unique_norm": "University of Cambridge", "aff_unique_dep": "", "aff_unique_url": "https://www.cam.ac.uk", "aff_unique_abbr": "Cambridge", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "E4ebDehO3O", "title": "Not All Languages Are Created Equal in LLMs: Improving Multilingual Capability by Cross-Lingual-Thought Prompting", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models (LLMs) demonstrate impressive multilingual capability, but their performance varies substantially across different languages. In this work, we introduce a simple yet effective method, called cross-lingual-thought prompting (XLT), to systematically improve the multilingual capability of LLMs. Specifically, XLT is a generic template prompt that stimulates cross-lingual and logical reasoning skills to enhance task performance across languages. We conduct comprehensive evaluations on 7 typical benchmarks related to reasoning, understanding, and generation tasks, covering both high-resource and low-resource languages. Experimental results show that XLT not only remarkably enhances the performance of various multilingual tasks but also significantly reduces the gap between the average performance and the best performance of each task in different languages. Notably, XLT brings over 10 points of average improvement in arithmetic reasoning and open-domain question-answering tasks.", "keywords": "Large language models; multilingual capability; cross-lingual-thought prompting;", "primary_area": "", "supplementary_material": "", "author": "Haoyang Huang;Tianyi Tang;Dongdong Zhang;Xin Zhao;Ting Song;Yan Xia;Furu Wei", "authorids": "~Haoyang_Huang1;~Tianyi_Tang1;~Dongdong_Zhang4;~Xin_Zhao10;~Ting_Song2;~Yan_Xia7;~Furu_Wei1", "gender": "M;M;M;M;M;M;M", "homepage": ";https://steventang1998.github.io/;https://www.microsoft.com/en-us/research/people/dozhang/;https://gsai.ruc.edu.cn/addons/teacher/index/info.html?user_id=5&ruccode=20140041&ln=cn;https://github.com/tsong-ms;https://www.microsoft.com/en-us/research/people/yanxia/;https://www.microsoft.com/en-us/research/people/fuwei/", "dblp": ";276/9353;02/621-1.html;https://dblp.uni-trier.de/pid/52/8700.html;;;72/5870", "google_scholar": ";t1mRUvQAAAAJ;w2qu71oAAAAJ;JNhNacoAAAAJ;;;G-V1VpwAAAAJ", "or_profile": "~Haoyang_Huang1;~Tianyi_Tang1;~Dongdong_Zhang4;~Xin_Zhao10;~Ting_Song2;~Yan_Xia7;~Furu_Wei1", "aff": "Microsoft Research Asia;Renmin University of China;Microsoft Research Asia;Renmin University of China;;Microsoft Research;Microsoft Research", "aff_domain": "microsoft.com;ruc.edu.cn;microsoft.com;ruc.edu.cn;;research.microsoft.com;microsoft.com", "position": "FTE;MS student;Researcher;Full Professor;;Researcher;Distinguished Scientist", "bibtex": "@inproceedings{\nhuang2023not,\ntitle={Not All Languages Are Created Equal in {LLM}s: Improving Multilingual Capability by Cross-Lingual-Thought Prompting},\nauthor={Haoyang Huang and Tianyi Tang and Dongdong Zhang and Xin Zhao and Ting Song and Yan Xia and Furu Wei},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=E4ebDehO3O}\n}", "github": "", "project": "", "reviewers": "1a4U;eG1s;N1oa", "site": "https://openreview.net/forum?id=E4ebDehO3O", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "3;2;3", "reproducibility": "4;4;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-8333-6196;;;", "linkedin": "%E6%B5%A9%E6%B4%8B-%E9%BB%84-77a59016a/;;;;;;", "aff_unique_index": "0;1;0;1;0;0", "aff_unique_norm": "Microsoft;Renmin University of China", "aff_unique_dep": "Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research/group/asia;http://www.ruc.edu.cn", "aff_unique_abbr": "MSR Asia;RUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Asia;", "aff_country_unique_index": "0;0;0;0;1;1", "aff_country_unique": "China;United States" }, { "id": "E5r96sfKO0", "title": "AutoTrial: Prompting Language Models for Clinical Trial Design", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Clinical trials are critical for drug development. Constructing the appropriate eligibility criteria (i.e., the inclusion/exclusion criteria for patient recruitment) is essential for the trial's success. Proper design of clinical trial protocols should consider similar precedent trials and their eligibility criteria to ensure sufficient patient coverage. In this paper, we present a method named AutoTrial to aid the design of clinical eligibility criteria using language models. It allows (1) controllable generation under instructions via a hybrid of discrete and neural prompting, (2) scalable knowledge incorporation via in-context learning, and (3) explicit reasoning chains to provide rationales for understanding the outputs. Experiments on over 70K clinical trials verify that AutoTrial generates high-quality criteria texts that are fluent and coherent and with high accuracy in capturing the relevant clinical concepts to the target trial. It is noteworthy that our method, with a much smaller parameter size, gains around 60% winning rate against the GPT-3.5 baselines via human evaluations.", "keywords": "Drug Development;Clinical Trial;Large Language Model", "primary_area": "", "supplementary_material": "", "author": "Zifeng Wang;Cao Xiao;Jimeng Sun", "authorids": "~Zifeng_Wang3;~Cao_Xiao2;~Jimeng_Sun3", "gender": "M;F;", "homepage": "https://zifengwang.xyz;https://sites.google.com/view/danicaxiao/home;http://sunlab.org", "dblp": ";170/1833;", "google_scholar": "kMlWwTAAAAAJ;ahaV25EAAAAJ;9jmmp5sAAAAJ", "or_profile": "~Zifeng_Wang3;~Cao_Xiao2;~Jimeng_Sun3", "aff": "University of Illinois, Urbana Champaign;Relativity;Georgia Institute of Technology", "aff_domain": "illinois.edu;relativity.com;gatech.edu", "position": "PhD student;VP of ML and NLP;Associate Professor", "bibtex": "@inproceedings{\nwang2023autotrial,\ntitle={AutoTrial: Prompting Language Models for Clinical Trial Design},\nauthor={Zifeng Wang and Cao Xiao and Jimeng Sun},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=E5r96sfKO0}\n}", "github": "", "project": "", "reviewers": "JAD5;ZRCV;AGTQ", "site": "https://openreview.net/forum?id=E5r96sfKO0", "pdf_size": 0, "rating": "4;4;4", "confidence": "1;4;4", "excitement": "3;4;3", "reproducibility": "1;4;2", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-1512-6426", "linkedin": ";caoxiao/;jimengsun/", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Illinois Urbana-Champaign;Relativity;Georgia Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://illinois.edu;https://www.relativity.com;https://www.gatech.edu", "aff_unique_abbr": "UIUC;;Georgia Tech", "aff_campus_unique_index": "0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "E9dH0BP5VW", "title": "Scaling Laws vs Model Architectures: How does Inductive Bias Influence Scaling?", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "There have been a lot of interest in the scaling properties of Transformer models. However, not much has been done on the front of investigating the effect of scaling properties of different inductive biases and model architectures. Do model architectures scale differently? If so, how does inductive bias affect scaling behaviour? How does this influence upstream (pretraining) and downstream (transfer)? This paper conducts a systematic study of scaling behaviour of ten diverse model architectures such as Transformers, Switch Transformers, Universal Transformers, Dynamic convolutions, Performers, and recently proposed MLP-Mixers. Via extensive experiments, we show that (1) architecture is an indeed an important consideration when performing scaling and (2) the best performing model can fluctuate at different scales. We believe that the findings outlined in this work has significant implications to how model architectures are currently evaluated in the community.", "keywords": "language models;architectures;scaling laws", "primary_area": "", "supplementary_material": "", "author": "Yi Tay;Mostafa Dehghani;Samira Abnar;Hyung Won Chung;William Fedus;Jinfeng Rao;Sharan Narang;Vinh Q. Tran;Dani Yogatama;Donald Metzler", "authorids": "~Yi_Tay1;~Mostafa_Dehghani1;~Samira_Abnar1;~Hyung_Won_Chung1;~William_Fedus2;~Jinfeng_Rao2;~Sharan_Narang1;~Vinh_Q._Tran1;~Dani_Yogatama2;~Donald_Metzler1", "gender": "M;M;Unspecified;M;;;M;M;;M", "homepage": "http://yitay.net;http://mostafadehghani.com/;https://samiraabnar.github.io/;;;;;https://vqtran.github.io;;https://research.google/people/DonaldMetzler/", "dblp": ";125/4062;150/5405;;;;;77/2885-2.html;;95/2272", "google_scholar": "VBclY_cAAAAJ;https://scholar.google.nl/citations?user=MiHOX3QAAAAJ;https://scholar.google.nl/citations?user=jbxwjgMAAAAJ;1CAlXvYAAAAJ;;;CWOixywAAAAJ;ot3WsOwAAAAJ;;bmXpOd8AAAAJ", "or_profile": "~Yi_Tay1;~Mostafa_Dehghani1;~Samira_Abnar1;~Hyung_Won_Chung1;~William_Fedus2;~Jinfeng_Rao2;~Sharan_Narang1;~Vinh_Q._Tran1;~Dani_Yogatama2;~Donald_Metzler1", "aff": "Google;Google DeepMind;Apple;Google Brain;;;Meta;Google;;Google", "aff_domain": "google.com;google.com;apple.com;google.com;;;meta.com;google.com;;google.com", "position": "Research Scientist;Research Scientist;Researcher;Researcher;;;Researcher;Researcher;;Research Scientist", "bibtex": "@inproceedings{\ntay2023scaling,\ntitle={Scaling Laws vs Model Architectures: How does Inductive Bias Influence Scaling?},\nauthor={Yi Tay and Mostafa Dehghani and Samira Abnar and Hyung Won Chung and William Fedus and Jinfeng Rao and Sharan Narang and Vinh Q. Tran and Dani Yogatama and Donald Metzler},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=E9dH0BP5VW}\n}", "github": "", "project": "", "reviewers": "1kqg;MfUv;rySs;n3js", "site": "https://openreview.net/forum?id=E9dH0BP5VW", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;3;4;3", "excitement": "3;4;3;4", "reproducibility": "3;2;2;2", "correctness": "3;4;3;4", "rating_avg": 3.0, "confidence_avg": 3.25, "excitement_avg": 3.5, "reproducibility_avg": 2.25, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 10, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;;;0000-0003-4276-6269", "linkedin": ";;;;;;;vinh-tran-32597468/;;donmetzler/", "aff_unique_index": "0;0;1;0;2;0;0", "aff_unique_norm": "Google;Apple;Meta", "aff_unique_dep": "Google;Apple Inc.;Meta Platforms, Inc.", "aff_unique_url": "https://www.google.com;https://www.apple.com;https://meta.com", "aff_unique_abbr": "Google;Apple;Meta", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;0;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "EDuKP7DqCk", "title": "Text Embeddings Reveal (Almost) As Much As Text", "track": "main", "status": "Long Main", "tldr": "", "abstract": "How much private information do text embeddings reveal about the original text? We investigate the problem of embedding \\textit{inversion}, reconstructing the full text represented in dense text embeddings. We frame the problem as controlled generation: generating text that, when reembedded, is close to a fixed point in latent space. We find that although a naive model conditioned on the embedding performs poorly, a multi-step method that iteratively corrects and re-embeds text is able to recover 92% of 32-token text inputs exactly. We train our model to decode text embeddings from two state-of-the-art embedding models, and also show that our model can recover important personal information (full names) from a dataset of clinical notes.", "keywords": "text embeddings;text retrieval;privacy;inversion;leakage attack", "primary_area": "", "supplementary_material": "", "author": "John Xavier Morris;Volodymyr Kuleshov;Vitaly Shmatikov;Alexander M Rush", "authorids": "~John_Xavier_Morris1;~Volodymyr_Kuleshov1;~Vitaly_Shmatikov1;~Alexander_M_Rush1", "gender": "M;;;M", "homepage": "http://jxmo.io;https://www.cs.cornell.edu/~kuleshov/;;http://rush.seas.harvard.edu/", "dblp": "263/9958.html;81/8612;;http://dblp.uni-trier.de/pers/hd/r/Rush:Alexander_M=", "google_scholar": "Utsbve4AAAAJ;RY_t8XAAAAAJ;;LIjnUGgAAAAJ", "or_profile": "~John_Xavier_Morris1;~Volodymyr_Kuleshov1;~Vitaly_Shmatikov1;~Alexander_M_Rush1", "aff": "Cornell University;Cornell University;;School of Engineering and Applied Sciences, Harvard University", "aff_domain": "cornell.edu;cornell.edu;;seas.harvard.edu", "position": "PhD student;Assistant Professor;;Assistant Professor", "bibtex": "@inproceedings{\nmorris2023text,\ntitle={Text Embeddings Reveal (Almost) As Much As Text},\nauthor={John Xavier Morris and Volodymyr Kuleshov and Vitaly Shmatikov and Alexander M Rush},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=EDuKP7DqCk}\n}", "github": "", "project": "", "reviewers": "WRMs;Qu6b;3tQ6;a8TL", "site": "https://openreview.net/forum?id=EDuKP7DqCk", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "4;3;3;3", "excitement": "5;3;4;5", "reproducibility": "3;3;3;4", "correctness": "4;3;3;5", "rating_avg": 5.0, "confidence_avg": 3.25, "excitement_avg": 4.25, "reproducibility_avg": 3.25, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-9900-1606", "linkedin": ";;;sasha-rush-a69b6917/", "aff_unique_index": "0;0;1", "aff_unique_norm": "Cornell University;Harvard University", "aff_unique_dep": ";School of Engineering and Applied Sciences", "aff_unique_url": "https://www.cornell.edu;https://www.harvard.edu", "aff_unique_abbr": "Cornell;Harvard", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "EFML9BJcIH", "title": "Can Foundation Models Watch, Talk and Guide You Step by Step to Make a Cake?", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Despite tremendous advances in AI, it remains a significant challenge to develop interactive task guidance systems that can offer situated, personalized guidance and assist humans in various tasks. \nThese systems need to have a sophisticated understanding of the user as well as the environment, and make timely accurate decisions on when and what to say. To address this issue, we created a new multimodal benchmark dataset, Watch, Talk and Guide (WTaG) based on natural interaction between a human user and a human instructor. We further proposed two tasks: User and Environment Understanding, and Instructor Decision Making. We leveraged several foundation models to study to what extent these models can be quickly adapted to perceptually enabled task guidance. Our quantitative, qualitative, and human evaluation results show that these models can demonstrate fair performances in some cases with no task-specific training, but a fast and reliable adaptation remains a significant challenge. Our benchmark and baselines will provide a stepping stone for future work on situated task guidance.", "keywords": "Visual Task Guidance;Multimodal;Large Language Model;Foundation Model", "primary_area": "", "supplementary_material": "", "author": "Yuwei Bao;Keunwoo Peter Yu;Yichi Zhang;Shane Storks;Itamar Bar-Yossef;Alex de la Iglesia;Megan Su;Xiao Lin Zheng;Joyce Chai", "authorids": "~Yuwei_Bao1;~Keunwoo_Peter_Yu1;~Yichi_Zhang1;~Shane_Storks1;~Itamar_Bar-Yossef1;~Alex_de_la_Iglesia1;~Megan_Su1;~Xiao_Lin_Zheng1;~Joyce_Chai2", "gender": "F;M;M;M;M;M;F;F;F", "homepage": "http://baoyuwei.com;;https://594zyc.github.io/;https://www.shanestorks.com;;https://alexanderdelaiglesia.com;;;https://web.eecs.umich.edu/~chaijy/", "dblp": "255/8772;262/5993;86/7054-1;239/4098;359/9476;;;;c/JoyceYChai", "google_scholar": "z7XZlxEAAAAJ;wPIzAvEAAAAJ;xkBBhY8AAAAJ;;;;;;", "or_profile": "~Yuwei_Bao1;~Keunwoo_Peter_Yu1;~Yichi_Zhang1;~Shane_Storks1;~Itamar_Bar-Yossef1;~Alex_de_la_Iglesia1;~Megan_Su1;~Xiao_Lin_Zheng1;~Joyce_Y_Chai1", "aff": "University of Michigan;University of Michigan - Ann Arbor;University of Michigan;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;Syracuse University;University of Michigan", "aff_domain": "umich.edu;umich.edu;umich.edu;umich.edu;umich.edu;umich.edu;umich.edu;syr.edu;umich.edu", "position": "PhD student;PhD student;PhD student;PhD student;Undergrad student;Undergrad student;Undergrad student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nbao2023can,\ntitle={Can Foundation Models Watch, Talk and Guide You Step by Step to Make a Cake?},\nauthor={Yuwei Bao and Keunwoo Peter Yu and Yichi Zhang and Shane Storks and Itamar Bar-Yossef and Alex de la Iglesia and Megan Su and Xiao Lin Zheng and Joyce Chai},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=EFML9BJcIH}\n}", "github": "", "project": "", "reviewers": "nrzY;gqEJ;i9MT", "site": "https://openreview.net/forum?id=EFML9BJcIH", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;5;2", "excitement": "3;2;4", "reproducibility": "3;3;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-7500-5944;0000-0002-4387-2410;0000-0003-3214-1070;;;;;;0000-0002-9658-2230", "linkedin": "emily-yuwei-bao-314736117/;peter-yu-5b262a56/;yichi-zhang-354a83128/;;itamar-bar-yossef-496042222/;alex-de-la-iglesia;megan-su13;xiaolinzheng1;", "aff_unique_index": "0;0;0;0;0;0;0;1;0", "aff_unique_norm": "University of Michigan;Syracuse University", "aff_unique_dep": ";", "aff_unique_url": "https://www.umich.edu;https://www.syracuse.edu", "aff_unique_abbr": "UM;Syracuse", "aff_campus_unique_index": "1;1;1;1;1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "EG7gjHZ8cm", "title": "Geographical Erasure in Language Generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models (LLMs) encode vast amounts of world knowledge. However, since these models are trained on large swaths of internet data, they are at risk of inordinately capturing information about dominant groups. This imbalance can propagate into generated language. In this work, we study and operationalise a form of geographical erasure wherein language models underpredict certain countries. We demonstrate consistent instances of erasure across a range of LLMs. We discover that erasure strongly correlates with low frequencies of country mentions in the training corpus. Lastly, we mitigate erasure by finetuning using a custom objective.", "keywords": "large language models;fairness;language generation;bias;world knowledge", "primary_area": "", "supplementary_material": "", "author": "Pola Schw\u00f6bel;Jacek Golebiowski;Michele Donini;Cedric Archambeau;Danish Pruthi", "authorids": "~Pola_Schw\u00f6bel1;~Jacek_Golebiowski1;~Michele_Donini1;~Cedric_Archambeau1;~Danish_Pruthi1", "gender": "F;M;M;M;M", "homepage": "https://polaschwoebel.github.io;;https://sites.google.com/view/mdonini/;http://www0.cs.ucl.ac.uk/staff/c.archambeau/;https://danishpruthi.com/", "dblp": "262/6180;314/8576;149/0239;59/1878;192/7349", "google_scholar": ";K5ivIucAAAAJ;u3ogi00AAAAJ;pPx5WWIAAAAJ;JpSx3EMAAAAJ", "or_profile": "~Pola_Schw\u00f6bel1;~Jacek_Golebiowski1;~Michele_Donini1;~Cedric_Archambeau1;~Danish_Pruthi1", "aff": "Amazon;Amazon;Amazon;Amazon Web Services;Indian Institute of Science, Bangalore ", "aff_domain": "amazon.com;amazon.com;amazon.com;amazon.com;iisc.ac.in", "position": "Researcher;Researcher;Scientist;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nschw{\\\"o}bel2023geographical,\ntitle={Geographical Erasure in Language Generation},\nauthor={Pola Schw{\\\"o}bel and Jacek Golebiowski and Michele Donini and Cedric Archambeau and Danish Pruthi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=EG7gjHZ8cm}\n}", "github": "", "project": "", "reviewers": "u9cg;YS6F;U4e2;MJ7R", "site": "https://openreview.net/forum?id=EG7gjHZ8cm", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;3;3;4", "excitement": "4;4;2;4", "reproducibility": "5;4;5;4", "correctness": "4;4;3;4", "rating_avg": 3.0, "confidence_avg": 3.25, "excitement_avg": 3.5, "reproducibility_avg": 4.5, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";jacek-golebiowski/;michele-donini-2484734a/;carchambeau/;", "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Amazon;Indian Institute of Science", "aff_unique_dep": "Amazon.com, Inc.;", "aff_unique_url": "https://www.amazon.com;https://www.iisc.ac.in", "aff_unique_abbr": "Amazon;IISc", "aff_campus_unique_index": "1", "aff_campus_unique": ";Bangalore", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United States;India" }, { "id": "EJ4N7PX6dm", "title": "Improved Pseudo Data for Machine Translation Quality Estimation with Constrained Beam Search", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Machine translation (MT) quality estimation (QE) is a crucial task to estimate the quality of MT outputs when reference translations are unavailable. Many studies focus on generating pseudo data using large parallel corpus and achieve remarkable success in the supervised setting. However, pseudo data solutions are less satisfying in unsupervised scenarios because the pseudo labels are inaccurate or the pseudo translations differ from the real ones. To address these problems, we propose to generate pseudo data using the MT model with constrained beam search (CBSQE). CBSQE preserves the reference parts with high MT probabilities as correct translations, while the rest parts as the wrong ones for MT generation. Therefore, CBSQE can reduce the false negative labels caused by synonyms. Overall, beam search will prefer a more real hypothesis with a higher MT generation likelihood. Extensive experiments demonstrate that CBSQE outperforms strong baselines in both supervised and unsupervised settings. Analyses further show the superiority of CBSQE. The code is available at https://github.com/NJUNLP/njuqe.", "keywords": "Quality Estimation;Machine Translation;Pseudo Data", "primary_area": "", "supplementary_material": "", "author": "Xiang Geng;Yu Zhang;Zhejian Lai;Shuaijie She;Wei Zou;shimin tao;Hao Yang;Jiajun Chen;Shujian Huang", "authorids": "~Xiang_Geng1;~Yu_Zhang64;~Zhejian_Lai1;~Shuaijie_She1;~Wei_Zou3;~shimin_tao1;~Hao_Yang7;~Jiajun_Chen1;~Shujian_Huang1", "gender": "M;;M;M;M;M;M;M;M", "homepage": ";;https://scholar.google.com/citations?user=yBQHI1cAAAAJ&hl=zh-CN;https://ricardokevins.github.io/;;;https://github.com/yanghaocsg;https://cs.nju.edu.cn/chenjiajun/index_en.htm;http://nlp.nju.edu.cn/huangsj/", "dblp": "222/7968;;;335/2500;;;54/4089-7;;57/8451", "google_scholar": "n6QnFS0AAAAJ;fv-HOF0AAAAJ;yBQHI1cAAAAJ;https://scholar.google.com.hk/citations?user=Lvvr-lIAAAAJ;SYqASYcAAAAJ;Q5T8jbgAAAAJ;lOsjM5sAAAAJ;https://scholar.google.com.tw/citations?user=WIF7VaoAAAAJ;HF3-E9kAAAAJ", "or_profile": "~Xiang_Geng1;~Yu_Zhang64;~Zhejian_Lai1;~Shuaijie_She1;~Wei_Zou3;~shimin_tao1;~Hao_Yang7;~Jiajun_Chen1;~Shujian_Huang1", "aff": "Nanjing University;Nanjing University;Dalian University of Technology;Nanjing University;Nanjing University;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;dlut.edu.cn;nju.edu.cn;nju.edu.cn;huawei.com;huawei.com;nju.edu.cn;nju.edu.cn", "position": "PhD student;MS student;Undergrad student;PhD student;PhD student;Researcher;Principal Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\ngeng2023improved,\ntitle={Improved Pseudo Data for Machine Translation Quality Estimation with Constrained Beam Search},\nauthor={Xiang Geng and Yu Zhang and Zhejian Lai and Shuaijie She and Wei Zou and shimin tao and Hao Yang and Jiajun Chen and Shujian Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=EJ4N7PX6dm}\n}", "github": "", "project": "", "reviewers": "Ehgu;Zcy9;JQ3q;tS9S", "site": "https://openreview.net/forum?id=EJ4N7PX6dm", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;3;1;4", "excitement": "3;3;3;3", "reproducibility": "4;2;4;3", "correctness": "3;3;3;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.25, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-5500-9251;;;0000-0001-8861-7010;;", "linkedin": ";;;;;;;;", "aff_unique_index": "0;0;1;0;0;2;2;0;0", "aff_unique_norm": "Nanjing University;Dalian University of Technology;Huawei", "aff_unique_dep": ";;Huawei Technologies", "aff_unique_url": "https://www.nju.edu.cn;http://www.dlut.edu.cn/;https://www.huawei.com", "aff_unique_abbr": "Nanjing U;DUT;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "ESGY2Ftbfg", "title": "Pre-training Language Models for Comparative Reasoning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Comparative reasoning is a process of comparing objects, concepts, or entities to draw conclusions, which constitutes a fundamental cognitive ability. In this paper, we propose a novel framework to pre-train language models for enhancing their abilities of comparative reasoning over texts. While there have been approaches for NLP tasks that require comparative reasoning, they suffer from costly manual data labeling and limited generalizability to different tasks. Our approach introduces a novel method of collecting scalable data for text-based entity comparison, which leverages both structured and unstructured data. Moreover, we present a framework of pre-training language models via three novel objectives on comparative reasoning. Evaluation on downstream tasks including comparative question answering, question generation, and summarization shows that our pre-training framework significantly improves the comparative reasoning abilities of language models, especially under low-resource conditions. This work also releases the first integrated benchmark for comparative reasoning.", "keywords": "language model pre-training;question answering;question generation;summarization;comparative reasoning", "primary_area": "", "supplementary_material": "", "author": "Mengxia Yu;Zhihan Zhang;Wenhao Yu;Meng Jiang", "authorids": "~Mengxia_Yu1;~Zhihan_Zhang2;~Wenhao_Yu2;~Meng_Jiang3", "gender": "F;;M;M", "homepage": ";;https://wyu97.github.io/;http://www.meng-jiang.com/", "dblp": ";;159/8117-2.html;69/339-1", "google_scholar": "9d9qJt8AAAAJ;;z4qSdX8AAAAJ;LZIPfCkAAAAJ", "or_profile": "~Mengxia_Yu1;~Zhihan_Zhang2;~Wenhao_Yu2;~Meng_Jiang3", "aff": "University of Notre Dame;;University of Notre Dame;University of Notre Dame", "aff_domain": "nd.edu;;nd.edu;nd.edu", "position": "PhD student;;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nyu2023pretraining,\ntitle={Pre-training Language Models for Comparative Reasoning},\nauthor={Mengxia Yu and Zhihan Zhang and Wenhao Yu and Meng Jiang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ESGY2Ftbfg}\n}", "github": "", "project": "", "reviewers": "W4mp;5Nu8;EaYH", "site": "https://openreview.net/forum?id=ESGY2Ftbfg", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "4;3;3", "reproducibility": "4;4;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-6627-2709;;0000-0002-4075-5980;0000-0002-3009-519X", "linkedin": "mengxia-yu-bb7495206/;;;meng-jiang-94b10916/", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Notre Dame", "aff_unique_dep": "", "aff_unique_url": "https://www.nd.edu", "aff_unique_abbr": "Notre Dame", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "ESgkAKGUJP", "title": "Bridging Information-Theoretic and Geometric Compression in Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "For a language model (LM) to faithfully model human language, it must compress vast, potentially infinite information into relatively few dimensions. We propose analyzing compression in (pre-trained) LMs from two points of view: geometric and information-theoretic. We demonstrate that the two views are highly correlated, such that the intrinsic geometric dimension of linguistic data predicts their coding length under the LM. We then show that, in turn, high compression of a linguistic dataset predicts rapid adaptation to that dataset, confirming that being able to compress linguistic information is an important part of successful LM performance. As a practical byproduct of our analysis, we evaluate a battery of intrinsic dimension estimators for the first time on linguistic data, showing that only some encapsulate the relationship between information-theoretic compression, geometric compression, and ease-of-adaptation.", "keywords": "compression;information theory;language models", "primary_area": "", "supplementary_material": "", "author": "Emily Cheng;Corentin Kervadec;Marco Baroni", "authorids": "~Emily_Cheng1;~Corentin_Kervadec2;~Marco_Baroni1", "gender": "F;M;M", "homepage": "http://www.chengemily1.github.io;https://corentinkervadec.github.io/;http://marcobaroni.org", "dblp": "348/0281;224/0222;http://dblp.uni-trier.de/pers/hd/b/Baroni:Marco", "google_scholar": ";https://scholar.google.fr/citations?user=Rx507eQAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Emily_Cheng1;~Corentin_Kervadec2;~Marco_Baroni1", "aff": "Universitat Pompeu Fabra;Universitat Pompeu Fabra;Universitat Pompeu Fabra", "aff_domain": "upf.edu;upf.edu;upf.edu", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\ncheng2023bridging,\ntitle={Bridging Information-Theoretic and Geometric Compression in Language Models},\nauthor={Emily Cheng and Corentin Kervadec and Marco Baroni},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ESgkAKGUJP}\n}", "github": "", "project": "", "reviewers": "LFBq;xjpS;78nc", "site": "https://openreview.net/forum?id=ESgkAKGUJP", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;3", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Universitat Pompeu Fabra", "aff_unique_dep": "", "aff_unique_url": "https://www.upf.edu/", "aff_unique_abbr": "UPF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Spain" }, { "id": "ESyts8YSub", "title": "Efficiently Enhancing Zero-Shot Performance of Instruction Following Model via Retrieval of Soft Prompt", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Enhancing the zero-shot performance of instruction-following models requires heavy computation, either by scaling the total number of training datasets or the model size. In this work, we explore how retrieval of soft prompts obtained through prompt tuning can efficiently assist hard prompts in zero-shot task generalization. Specifically, we train soft prompt embeddings for each prompt through prompt tuning, store the samples of the training instances mapped with the prompt embeddings, and retrieve the corresponding prompt embedding of the training instance closest to the query instance during inference. While only adding 0.007% additional parameters, retrieval of soft prompt enhances the performance of T0 on unseen tasks by outperforming it on 10 out of 11 datasets as well as improving the mean accuracy of T0 on BIG-bench benchmark by 2.39% points. Also, we report an interesting finding that retrieving source embeddings trained on similar answer choice formats is more important than those on similar task types.", "keywords": "natural language processing;zeroshot language models;prompt tuning", "primary_area": "", "supplementary_material": "", "author": "Seonghyeon Ye;Joel Jang;Doyoung Kim;Yongrae Jo;Minjoon Seo", "authorids": "~Seonghyeon_Ye1;~Joel_Jang1;~Doyoung_Kim3;~Yongrae_Jo1;~Minjoon_Seo1", "gender": "M;M;M;Not Specified;M", "homepage": "https://vano1205.github.io/;https://joeljang.github.io/;https://doyoungkim-ml.github.io/;https://github.com/dreamgonfly;https://seominjoon.github.io", "dblp": "301/8927;;;252/6347;149/1367", "google_scholar": "https://scholar.google.co.kr/citations?user=JfGGjBoAAAAJ;xL-7eFEAAAAJ;https://scholar.google.co.kr/citations?user=PJR9ogMAAAAJ;https://scholar.google.com/citations?hl=en;zYze5fIAAAAJ", "or_profile": "~Seonghyeon_Ye1;~Joel_Jang1;~Doyoung_Kim3;~Yongrae_Jo1;~Minjoon_Seo1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;KAIST;Korea Advanced Institute of Science & Technology;Twelve Labs", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;twelvelabs.io", "position": "PhD student;MS student;MS student;MS student;Chief Scientist", "bibtex": "@inproceedings{\nye2023efficiently,\ntitle={Efficiently Enhancing Zero-Shot Performance of Instruction Following Model via Retrieval of Soft Prompt},\nauthor={Seonghyeon Ye and Joel Jang and Doyoung Kim and Yongrae Jo and Minjoon Seo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ESyts8YSub}\n}", "github": "", "project": "", "reviewers": "dsfs;GnhD;eWKn;YaNx", "site": "https://openreview.net/forum?id=ESyts8YSub", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;2;5;3", "excitement": "2;3;2;3", "reproducibility": "4;4;4;4", "correctness": "2;4;2;3", "rating_avg": 3.0, "confidence_avg": 3.25, "excitement_avg": 2.5, "reproducibility_avg": 4.0, "correctness_avg": 2.75, "replies_avg": 12, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";joel-jang-1289331a5/;doyoung-kim-870a141a2/;;minjoon-seo/", "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Twelve Labs", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://twelvelabs.com", "aff_unique_abbr": "KAIST;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "South Korea;United States" }, { "id": "ETNa4Wb65J", "title": "Let's Sample Step by Step: Adaptive-Consistency for Efficient Reasoning and Coding with LLMs", "track": "main", "status": "Long Main", "tldr": "", "abstract": "A popular approach for improving the correctness of output from large language models (LLMs) is Self-Consistency - poll the LLM multiple times and output the most frequent solution. Existing Self-Consistency techniques always generate a constant number of samples per question, where a better approach will be to non-uniformly distribute the available budget based on the amount of agreement in the samples generated so far. In response, we introduce Adaptive-Consistency, a cost-efficient, model-agnostic technique that dynamically adjusts the number of samples per question using a lightweight stopping criterion. Our experiments over 17 reasoning and code generation datasets and three LLMs demonstrate that Adaptive-Consistency reduces sample budget by up to 7.9 times with an average accuracy drop of less than 0.1%", "keywords": "LLMs;reasoning;efficient reasoning;sampling in llms", "primary_area": "", "supplementary_material": "", "author": "Pranjal Aggarwal;Aman Madaan;Yiming Yang;Mausam .", "authorids": "~Pranjal_Aggarwal1;~Aman_Madaan1;~Yiming_Yang1;~Mausam_.1", "gender": "M;;F;M", "homepage": "https://github.com/Pranjal2041/;https://madaan.github.io;http://www.cs.cmu.edu/~yiming/;http://www.cse.iitd.ac.in/~mausam", "dblp": "163/0764;138/1043;25/1666;30/6391.html", "google_scholar": "https://scholar.google.com/citations?hl=en;jW9ts2cAAAAJ;MlZq4XwAAAAJ;https://scholar.google.co.in/citations?hl=en", "or_profile": "~Pranjal_Aggarwal1;~Aman_Madaan1;~Yiming_Yang1;~Mausam_Mausam2", "aff": "Indian Institute of Technology, Delhi;Carnegie Mellon University;School of Computer Science, Carnegie Mellon University;Indian Institute of Technology Delhi", "aff_domain": "iitd.ac.in;cmu.edu;cs.cmu.edu;iitd.ac.in", "position": "Undergrad student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\naggarwal2023lets,\ntitle={Let's Sample Step by Step: Adaptive-Consistency for Efficient Reasoning and Coding with {LLM}s},\nauthor={Pranjal Aggarwal and Aman Madaan and Yiming Yang and Mausam .},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ETNa4Wb65J}\n}", "github": "", "project": "", "reviewers": "FcEp;pwx4;tDcp;HiHf", "site": "https://openreview.net/forum?id=ETNa4Wb65J", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "3;4;4;4", "excitement": "4;4;3;4", "reproducibility": "5;4;4;4", "correctness": "4;3;4;4", "rating_avg": 5.0, "confidence_avg": 3.75, "excitement_avg": 3.75, "reproducibility_avg": 4.25, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-2962-1535;;0000-0001-8322-607X;0000-0003-4088-4296", "linkedin": ";amnmadaan/;yiming-yang-24100924/;", "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Indian Institute of Technology Delhi;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.iitdelhi.ac.in;https://www.cmu.edu", "aff_unique_abbr": "IIT Delhi;CMU", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Delhi;;Pittsburgh", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "India;United States" }, { "id": "EV5dNDiC7I", "title": "BLM-s/lE: A structured dataset of English spray-load verb alternations for testing generalization in LLMs", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Current NLP models appear to be achieving performance comparable to human capabilities on well-established benchmarks. New benchmarks are now necessary to test deeper layers of understanding of natural languages by these models.\n\nBlackbird's Language Matrices are a recently developed framework that draws inspiration from tests of human analytic intelligence. The BLM task has revealed that successful performances in previously studied linguistic problems do not yet stem from a deep understanding of the generative factors that define these problems. \n\nIn this study, we define a new BLM task for predicate-argument structure, and develop a structured dataset for its investigation, concentrating on the spray-load verb alternations in English, as a case study. The context sentences include one alternant from the spray-load alternation and the target sentence is the other alternant, to be chosen among a minimally contrastive and adversarial set of answers. We describe the generation process of the dataset and the reasoning behind the generating rules. The dataset aims to facilitate investigations into how verb information is encoded in sentence embeddings and how models generalize to the complex properties of argument structures. \n\nBenchmarking experiments conducted on the dataset and qualitative error analysis on the answer set reveal the inherent challenges associated with the problem even for current high-performing representations.", "keywords": "verb alternation;generating rules;sentence embeddings", "primary_area": "", "supplementary_material": "", "author": "Giuseppe Samo;Vivi Nastase;Chunyang Jiang;Paola Merlo", "authorids": "~Giuseppe_Samo1;~Vivi_Nastase1;~Chunyang_Jiang1;~Paola_Merlo1", "gender": "M;F;F;", "homepage": ";https://vivinastase.github.io/homepage/;;", "dblp": ";05/2290;;", "google_scholar": "TaL7sQYAAAAJ;https://scholar.google.ca/citations?user=fNzQUbsAAAAJ;VWIuHKgAAAAJ;", "or_profile": "~Giuseppe_Samo1;~Vivi_Nastase1;~Chunyang_Jiang1;~Paola_Merlo1", "aff": "Beijing Language and Culture University;University of Geneva;University of Geneva;", "aff_domain": "blcu.edu.cn;unige.ch;unige.ch;", "position": "Full Professor;Research associate;MS student;", "bibtex": "@inproceedings{\nsamo2023blmsle,\ntitle={{BLM}-s/lE: A structured dataset of English spray-load verb alternations for testing generalization in {LLM}s},\nauthor={Giuseppe Samo and Vivi Nastase and Chunyang Jiang and Paola Merlo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=EV5dNDiC7I}\n}", "github": "", "project": "", "reviewers": "YHa3;uWko;fcXT", "site": "https://openreview.net/forum?id=EV5dNDiC7I", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;3;3", "excitement": "2;4;4", "reproducibility": "4;3;3", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-3449-8006;;0009-0008-9871-4082;", "linkedin": ";;;", "aff_unique_index": "0;1;1", "aff_unique_norm": "Beijing Language and Culture University;University of Geneva", "aff_unique_dep": ";", "aff_unique_url": "http://www.blcu.edu.cn;https://www.unige.ch", "aff_unique_abbr": "BLCU;UNIGE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;Switzerland" }, { "id": "EVfHUvhRra", "title": "Large Language Models Only Pass Primary School Exams in Indonesia: A Comprehensive Test on IndoMMLU", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Although large language models (LLMs) are often pre-trained on large-scale multilingual texts, their reasoning abilities and real-world knowledge are mainly evaluated based on English datasets. Assessing LLM capabilities beyond English is increasingly vital but hindered due to the lack of suitable datasets. In this work, we introduce IndoMMLU, the first multi-task language understanding benchmark for Indonesian culture and languages, which consists of questions from primary school to university entrance exams in Indonesia. By employing professional teachers, we obtain 14,981 questions across 64 tasks and education levels, with 46% of the questions focusing on assessing proficiency in the Indonesian language and knowledge of nine local languages and cultures in Indonesia. Our empirical evaluations show that GPT-3.5 only manages to pass the Indonesian primary school level, with limited knowledge of local Indonesian languages and culture. Other smaller models such as BLOOMZ and Falcon perform at even lower levels.", "keywords": "large language models;Indonesian school exam problems;evaluation;local languages and cultures", "primary_area": "", "supplementary_material": "", "author": "Fajri Koto;Nurul Aisyah;Haonan Li;Timothy Baldwin", "authorids": "~Fajri_Koto1;~Nurul_Aisyah1;~Haonan_Li2;~Timothy_Baldwin1", "gender": "M;;M;", "homepage": "https://fajrikoto.com/;;https://haonan-li.github.io/;https://eltimster.github.io/www/", "dblp": "160/0019;;218/7270.html;65/4863", "google_scholar": "RA9l3s4AAAAJ;;IqfgexsAAAAJ;wjBD1dkAAAAJ", "or_profile": "~Fajri_Koto1;~Nurul_Aisyah1;~Haonan_Li2;~Timothy_Baldwin1", "aff": "Mohamed bin Zayed University of Artificial Intelligence;;Mohamed bin Zayed University of Artificial Intelligence;The University of Melbourne", "aff_domain": "mbzuai.ac.ae;;mbzuai.ac.ae;unimelb.edu.au", "position": "Postdoc;;Postdoc;Full Professor", "bibtex": "@inproceedings{\nkoto2023large,\ntitle={Large Language Models Only Pass Primary School Exams in Indonesia: A Comprehensive Test on Indo{MMLU}},\nauthor={Fajri Koto and Nurul Aisyah and Haonan Li and Timothy Baldwin},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=EVfHUvhRra}\n}", "github": "", "project": "", "reviewers": "49YF;AUi2;Qs8E", "site": "https://openreview.net/forum?id=EVfHUvhRra", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "4;3;3", "reproducibility": "3;3;3", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-6623-5089;0000-0003-4525-6950", "linkedin": "fajri-koto-02705860/;nuaisyah/;haonan-li-809709b9/;", "aff_unique_index": "0;0;1", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence;University of Melbourne", "aff_unique_dep": ";", "aff_unique_url": "https://mbzuai.ac.ae;https://www.unimelb.edu.au", "aff_unique_abbr": "MBZUAI;UniMelb", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Arab Emirates;Australia" }, { "id": "EY9k2x5qWB", "title": "KRLS: Improving End-to-End Response Generation in Task Oriented Dialog with Reinforced Keywords Learning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In task-oriented dialogs (TOD), reinforcement learning (RL) algorithms train a model to directly optimize response for task-related metrics.\nHowever, RL often needs to perform exploration, which can be time-consuming due to the slow auto-regressive sequence generation process. We investigate an approach to create a more efficient RL-based algorithm to improve TOD performance in an offline setting.\nFirst, we use a faster generation procedure that samples from independent next-word distributions after training the language model (LM) with supervised learning. We then introduce a fine-grained reward function to help the model focus on learning key information in a dialog, by measuring the importance and semantic closeness of each generated token. Experiments on the MultiWoZ dataset show our new training algorithm, Keywords Reinforcement Learning with Next-word Sampling (KRLS), achieves state-of-the-art performance on the end-to-end response generation task, with a 15% training time reduction compared to a standard RL algorithm using auto-regressive generation.", "keywords": "task-oriented dialogues;reinforcement learning", "primary_area": "", "supplementary_material": "", "author": "Xiao Yu;Qingyang Wu;Kun Qian;Zhou Yu", "authorids": "~Xiao_Yu4;~Qingyang_Wu1;~Kun_Qian2;~Zhou_Yu1", "gender": "M;M;M;F", "homepage": ";https://qywu.github.io/about.html;https://qbetterk.github.io/;http://www.cs.columbia.edu/~zhouyu/", "dblp": ";;77/2062;83/3205", "google_scholar": "QblBy88AAAAJ;https://scholar.google.ca/citations?user=HDiw-TsAAAAJ;oRKl5eoAAAAJ;https://scholar.google.com.tw/citations?user=jee2Dy0AAAAJ", "or_profile": "~Xiao_Yu4;~Qingyang_Wu1;~Kun_Qian2;~Zhou_Yu1", "aff": "Columbia University;Amazon;Columbia University;Columbia University", "aff_domain": "columbia.edu;amazon.com;columbia.edu;columbia.edu", "position": "Undergrad student;Intern;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nyu2023krls,\ntitle={{KRLS}: Improving End-to-End Response Generation in Task Oriented Dialog with Reinforced Keywords Learning},\nauthor={Xiao Yu and Qingyang Wu and Kun Qian and Zhou Yu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=EY9k2x5qWB}\n}", "github": "", "project": "", "reviewers": "uymB;vXVC;GMA2", "site": "https://openreview.net/forum?id=EY9k2x5qWB", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;5;3", "excitement": "4;4;4", "reproducibility": "5;3;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-8255-2527;", "linkedin": ";;kun-qian-6b01b113a/;", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Columbia University;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.columbia.edu;https://www.amazon.com", "aff_unique_abbr": "Columbia;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "EhPYwBBFYb", "title": "UPRISE: Universal Prompt Retrieval for Improving Zero-Shot Evaluation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large Language Models (LLMs) are popular for their impressive abilities, but the need for model-specific fine-tuning or task-specific prompt engineering can hinder their generalization. We propose UPRISE (Universal Prompt Retrieval for Improving zero-Shot Evaluation), which tunes a lightweight and versatile retriever that automatically retrieves prompts for a given zero-shot task input. Specifically, we demonstrate universality in a cross-task and cross-model scenario: the retriever is tuned on diverse tasks, but tested on unseen task types; we use a small frozen LLM, GPT-Neo-2.7B, for tuning the retriever, but test the retriever on different LLMs of much larger scales, such as BLOOM-7.1B, OPT-66B and GPT3-175B. Additionally, we show that UPRISE mitigates the hallucination problem in our experiments with ChatGPT, suggesting its potential to improve even the strongest LLMs. Our model and code are available at https://github.com/microsoft/LMOps.", "keywords": "Prompt Engineering;Large Language Model;Zero-shot Evaluation", "primary_area": "", "supplementary_material": "", "author": "Daixuan Cheng;Shaohan Huang;Junyu Bi;Yuefeng Zhan;Jianfeng Liu;Yujing Wang;Hao Sun;Furu Wei;Weiwei Deng;Qi Zhang", "authorids": "~Daixuan_Cheng1;~Shaohan_Huang1;~Junyu_Bi1;~Yuefeng_Zhan1;~Jianfeng_Liu1;~Yujing_Wang1;~Hao_Sun6;~Furu_Wei1;~Weiwei_Deng2;~Qi_Zhang19", "gender": "F;M;F;;M;F;M;M;M;M", "homepage": ";;;;https://www.jianfengliu.com;;;https://www.microsoft.com/en-us/research/people/fuwei/;;", "dblp": "289/2865;176/0380;276/3644;331/1573;;16/4075;;72/5870;311/3565.html;", "google_scholar": "https://scholar.google.com/citations?hl=en;;;dQIMlM0AAAAJ;https://scholar.google.com.tw/citations?user=NcpaZMIAAAAJ;https://scholar.google.com/citations?hl=en;OjWD_SsAAAAJ;G-V1VpwAAAAJ;;", "or_profile": "~Daixuan_Cheng1;~Shaohan_Huang1;~Junyu_Bi1;~Yuefeng_Zhan1;~Jianfeng_Liu1;~Yujing_Wang1;~Hao_Sun6;~Furu_Wei1;~Weiwei_Deng2;~Qi_Zhang19", "aff": "Beijing University of Posts and Telecommunications;Microsoft;, Chinese Academy of Sciences;Microsoft;Microsoft;Microsoft;Microsoft;Microsoft Research;Microsoft;Microsoft", "aff_domain": "bupt.edu.cn;microsoft.com;ict.ac.cn;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com", "position": "MS student;Researcher;MS student;Principal Applied Scientist;Researcher;Software Engineering Manager;Researcher;Distinguished Scientist;Researcher;Researcher", "bibtex": "@inproceedings{\ncheng2023uprise,\ntitle={{UPRISE}: Universal Prompt Retrieval for Improving Zero-Shot Evaluation},\nauthor={Daixuan Cheng and Shaohan Huang and Junyu Bi and Yuefeng Zhan and Jianfeng Liu and Yujing Wang and Hao Sun and Furu Wei and Weiwei Deng and Qi Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=EhPYwBBFYb}\n}", "github": "", "project": "", "reviewers": "xPye;zjV3;5iUs", "site": "https://openreview.net/forum?id=EhPYwBBFYb", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;2", "excitement": "3;3;3", "reproducibility": "3;3;5", "correctness": "4;3;3", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 9, "authors#_avg": 10, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0009-0007-5902-417X;;;0009-0004-5027-7478;;0009-0001-4793-9715;", "linkedin": ";;;yuefengzhan/;;;;;;qizhang07/", "aff_unique_index": "0;1;2;1;1;1;1;1;1;1", "aff_unique_norm": "Beijing University of Posts and Telecommunications;Microsoft;Chinese Academy of Sciences", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "http://www.bupt.edu.cn/;https://www.microsoft.com;http://www.cas.cn", "aff_unique_abbr": "BUPT;Microsoft;CAS", "aff_campus_unique_index": "0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;1;0;1;1;1;1;1;1;1", "aff_country_unique": "China;United States" }, { "id": "Eib6OOeVJI", "title": "Pre-training Multi-task Contrastive Learning Models for Scientific Literature Understanding", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Scientific literature understanding tasks have gained significant attention due to their potential to accelerate scientific discovery. Pre-trained language models (LMs) have shown effectiveness in these tasks, especially when tuned via contrastive learning. However, jointly utilizing pre-training data across multiple heterogeneous tasks (e.g., extreme multi-label paper classification, citation prediction, and literature search) remains largely unexplored. To bridge this gap, we propose a multi-task contrastive learning framework, SciMult, with a focus on facilitating common knowledge sharing across different scientific literature understanding tasks while preventing task-specific skills from interfering with each other. To be specific, we explore two techniques -- task-aware specialization and instruction tuning. The former adopts a Mixture-of-Experts Transformer architecture with task-aware sub-layers; the latter prepends task-specific instructions to the input text so as to produce task-aware outputs. Extensive experiments on a comprehensive collection of benchmark datasets verify the effectiveness of our task-aware specialization strategy, where we outperform state-of-the-art scientific pre-trained LMs. Code, datasets, and pre-trained models can be found at https://scimult.github.io/.", "keywords": "scientific literature understanding;multi-task learning;contrastive learning;language model pre-training", "primary_area": "", "supplementary_material": "", "author": "Yu Zhang;Hao Cheng;Zhihong Shen;Xiaodong Liu;Ye-Yi Wang;Jianfeng Gao", "authorids": "~Yu_Zhang26;~Hao_Cheng4;~Zhihong_Shen1;~Xiaodong_Liu1;~Ye-Yi_Wang1;~Jianfeng_Gao1", "gender": "M;M;F;;;M", "homepage": "https://yuzhimanhua.github.io/;https://sites.google.com/site/hcheng2site/Home;;;;https://www.microsoft.com/en-us/research/people/jfgao/", "dblp": "50/671-44;09/5158-2;55/7939;65/622;13/1228;92/5339", "google_scholar": "N0PrmgIAAAAJ;https://scholar.google.com/citations?hl=en;-yFiUGcAAAAJ;NIewcxMAAAAJ;;https://scholar.google.com/citations?hl=en", "or_profile": "~Yu_Zhang26;~Hao_Cheng4;~Zhihong_Shen1;~Xiaodong_Liu1;~Ye-Yi_Wang1;~Jianfeng_Gao1", "aff": "University of Illinois, Urbana Champaign;Microsoft Research;Microsoft;Microsoft Research;Microsoft;Microsoft Research", "aff_domain": "illinois.edu;microsoft.com;microsoft.com;microsoft.com;microsoft.com;microsoft.com", "position": "PhD student;Researcher;Principal Researcher;Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nzhang2023pretraining,\ntitle={Pre-training Multi-task Contrastive Learning Models for Scientific Literature Understanding},\nauthor={Yu Zhang and Hao Cheng and Zhihong Shen and Xiaodong Liu and Ye-Yi Wang and Jianfeng Gao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Eib6OOeVJI}\n}", "github": "", "project": "", "reviewers": "BsFu;UpiD;keSz", "site": "https://openreview.net/forum?id=Eib6OOeVJI", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;3;4", "reproducibility": "4;3;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-0540-6758;0000-0001-7988-3149;;;;", "linkedin": ";;;;;", "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://illinois.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "UIUC;MSR", "aff_campus_unique_index": "0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Ek87791lcO", "title": "Prompt as Triggers for Backdoor Attack: Examining the Vulnerability in Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The prompt-based learning paradigm, which bridges the gap between pre-training and fine-tuning, achieves state-of-the-art performance on several NLP tasks, particularly in few-shot settings. Despite being widely applied, prompt-based learning is vulnerable to backdoor attacks. \nTextual backdoor attacks are designed to introduce targeted vulnerabilities into models by poisoning a subset of training samples through trigger injection and label modification. However, they suffer from flaws such as abnormal natural language expressions resulting from the trigger and incorrect labeling of poisoned samples. In this study, we propose ProAttack, a novel and efficient method for performing clean-label backdoor attacks based on the prompt, which uses the prompt itself as a trigger. Our method does not require external triggers and ensures correct labeling of poisoned samples, improving the stealthy nature of the backdoor attack. With extensive experiments on rich-resource and few-shot text classification tasks, we empirically validate ProAttack's competitive performance in textual backdoor attacks. Notably, in the rich-resource setting, ProAttack achieves state-of-the-art attack success rates in the clean-label backdoor attack benchmark without external triggers.", "keywords": "Backdoor Attack; Prompt; Large Language Model", "primary_area": "", "supplementary_material": "", "author": "Shuai Zhao;Jinming Wen;Anh Tuan Luu;Junbo Zhao;Jie Fu", "authorids": "~Shuai_Zhao2;~Jinming_Wen1;~Anh_Tuan_Luu2;~Junbo_Zhao1;~Jie_Fu2", "gender": "M;M;M;M;M", "homepage": "https://shuaizhao95.github.io/;https://scholar.google.com/citations?user=L_ssfM4AAAAJ&hl=en;https://tuanluu.github.io/;http://jakezhao.net/;https://bigaidream.github.io/", "dblp": "116/8682-7;36/8492.html;81/8329.html;191/6665;", "google_scholar": "upbsFBAAAAAJ;L_ssfM4AAAAJ;https://scholar.google.com.sg/citations?hl=en;8ipao8MAAAAJ;66osleIAAAAJ", "or_profile": "~Shuai_Zhao2;~Jinming_Wen1;~Anh_Tuan_Luu2;~Junbo_Zhao1;~Jie_Fu1", "aff": "Nanyang Technological University;;Nanyang Technological University;Zhejiang University;Beijing Academy of Artificial Intelligence", "aff_domain": "ntu.edu.sg;;ntu.edu.sg;zju.edu.cn;baai.ac.cn", "position": "Intern;;Assistant Professor;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nzhao2023prompt,\ntitle={Prompt as Triggers for Backdoor Attack: Examining the Vulnerability in Language Models},\nauthor={Shuai Zhao and Jinming Wen and Anh Tuan Luu and Junbo Zhao and Jie Fu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Ek87791lcO}\n}", "github": "", "project": "", "reviewers": "4EYX;GgC2;3TY9", "site": "https://openreview.net/forum?id=Ek87791lcO", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;5;4", "excitement": "4;3;3", "reproducibility": "4;3;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 4.666666666666667, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-5174-5182;;;;0000-0002-4494-843X", "linkedin": ";;;;", "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Nanyang Technological University;Zhejiang University;Beijing Academy of Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.zju.edu.cn;https://www.baaic.cn", "aff_unique_abbr": "NTU;ZJU;BAAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "Singapore;China" }, { "id": "EkftL7NgtW", "title": "DNA: Denoised Neighborhood Aggregation for Fine-grained Category Discovery", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Discovering fine-grained categories from coarsely labeled data is a practical and challenging task, which can bridge the gap between the demand for fine-grained analysis and the high annotation cost. Previous works mainly focus on instance-level discrimination to learn low-level features, but ignore semantic similarities between data, which may prevent these models learning compact cluster representations. In this paper, we propose $\\textit{Denoised Neighborhood Aggregation}$ (DNA), a self-supervised framework that encodes semantic structures of data into the embedding space. Specifically, we retrieve $\\textit{k}$-nearest neighbors of a query as its positive keys to capture semantic similarities between data and then aggregate information from the neighbors to learn compact cluster representations, which can make fine-grained categories more separatable. However, the retrieved neighbors can be noisy and contain many false-positive keys, which can degrade the quality of learned embeddings. To cope with this challenge, we propose three principles to filter out these false neighbors for better representation learning. Furthermore, we theoretically justify that the learning objective of our framework is equivalent to a clustering loss, which can capture semantic similarities between data to form compact fine-grained clusters. Extensive experiments on three benchmark datasets show that our method can retrieve more accurate neighbors (21.31% accuracy improvement) and outperform state-of-the-art models by a large margin (average 9.96% improvement on three metrics). Our code and data are available at https://github.com/Lackel/DNA.", "keywords": "Fine-grained Category Discovery;Denoised Neighborhood Contrastive Learning", "primary_area": "", "supplementary_material": "", "author": "Wenbin An;Feng Tian;Wenkai Shi;Yan Chen;Qinghua Zheng;QianYing Wang;Ping Chen", "authorids": "~Wenbin_An1;~Feng_Tian4;~Wenkai_Shi1;~Yan_Chen16;~Qinghua_Zheng1;~QianYing_Wang1;~Ping_Chen1", "gender": "M;;M;;;F;", "homepage": ";;https://github.com/yibai-shi;;http://gr.xjtu.edu.cn/web/qhzheng;https://research.lenovo.com/webapp/view/home.html;http://www.cs.umb.edu/~pchen", "dblp": "331/2394;;;;32/1858;86/11012;", "google_scholar": "https://scholar.google.com.hk/citations?user=BpkQZGgAAAAJ;;;;;gXgWhfEAAAAJ;", "or_profile": "~Wenbin_An1;~Feng_Tian4;~Wenkai_Shi1;~Yan_Chen16;~Qinghua_Zheng1;~QianYing_Wang1;~Ping_Chen1", "aff": "Xi'an Jiaotong University;;Xi'an Jiaotong University;Xi'an Jiaotong University;Xi'an Jiaotong University;lenovo group;University of Massachusetts, Boston", "aff_domain": "xjtu.edu.cn;;xjtu.edu.cn;xjtu.edu.cn;xjtu.edu.cn;lenovo.com;umb.edu", "position": "PhD student;;MS student;Associate Professor;Full Professor;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nan2023dna,\ntitle={{DNA}: Denoised Neighborhood Aggregation for Fine-grained Category Discovery},\nauthor={Wenbin An and Feng Tian and Wenkai Shi and Yan Chen and Qinghua Zheng and QianYing Wang and Ping Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=EkftL7NgtW}\n}", "github": "", "project": "", "reviewers": "Sqwo;Mz1B;1nk5", "site": "https://openreview.net/forum?id=EkftL7NgtW", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;3", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-4838-3779;;;", "linkedin": ";;;;;qianying-jane-wang-0255231/;", "aff_unique_index": "0;0;0;0;1;2", "aff_unique_norm": "Xi'an Jiao Tong University;Lenovo Group;University of Massachusetts Boston", "aff_unique_dep": ";;", "aff_unique_url": "https://www.xjtu.edu.cn;https://www.lenovo.com;https://www.umb.edu", "aff_unique_abbr": "XJTU;Lenovo;UMass Boston", "aff_campus_unique_index": "1", "aff_campus_unique": ";Boston", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "China;United States" }, { "id": "EnUgSeghBl", "title": "Impressions: Visual Semiotics and Aesthetic Impact Understanding", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Is aesthetic impact different from beauty? Is visual salience a reflection of its capacity for effective communication? We present Impressions, a novel dataset through which to investigate the semiotics of images, and how specific visual features and design choices can elicit specific emotions, thoughts and beliefs. We posit that the impactfulness of an image extends beyond formal definitions of aesthetics, to its success as a communicative act, where style contributes as much to meaning formation as the subject matter. We also acknowledge that existing Image Captioning datasets are not designed to empower state-of-the-art architectures to model potential human impressions or interpretations of images. To fill this need, we design an annotation task heavily inspired by image analysis techniques in the Visual Arts to collect 1,440 image-caption pairs and 4,320 unique annotations exploring impact, pragmatic image description, impressions and aesthetic design choices. We show that existing multimodal image captioning and conditional generation models struggle to simulate plausible human responses to images. However, this dataset significantly improves their ability to model impressions and aesthetic evaluations of images through fine-tuning and few-shot adaptation.", "keywords": "Visual Semiotics;Stylistic Analysis;Computational Aesthetics;Image Captioning;Multimodal Datasets", "primary_area": "", "supplementary_material": "", "author": "Julia Kruk;Caleb Ziems;Diyi Yang", "authorids": "~Julia_Kruk1;~Caleb_Ziems1;~Diyi_Yang2", "gender": "F;M;F", "homepage": ";http://calebziems.com/;https://cs.stanford.edu/~diyiy/", "dblp": ";252/5058;70/11145", "google_scholar": "DOmg2LMAAAAJ;Hm4XL1AAAAAJ;j9jhYqQAAAAJ", "or_profile": "~Julia_Kruk1;~Caleb_Ziems1;~Diyi_Yang2", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Stanford University", "aff_domain": "gatech.edu;gatech.edu;stanford.edu", "position": "MS student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nkruk2023impressions,\ntitle={Impressions: Visual Semiotics and Aesthetic Impact Understanding},\nauthor={Julia Kruk and Caleb Ziems and Diyi Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=EnUgSeghBl}\n}", "github": "", "project": "", "reviewers": "GJAP;7iNP;p7vQ", "site": "https://openreview.net/forum?id=EnUgSeghBl", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;4", "excitement": "3;3;4", "reproducibility": "3;4;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "julia-kruk-854155112/;caleb-ziems-4b1283126/;", "aff_unique_index": "0;0;1", "aff_unique_norm": "Georgia Institute of Technology;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.stanford.edu", "aff_unique_abbr": "Georgia Tech;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "EpBNf4Arod", "title": "PR-MCS: Perturbation Robust Metric for MultiLingual Image Captioning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Vulnerability to lexical perturbation is a critical weakness of automatic evaluation metrics for image captioning. This paper proposes Perturbation Robust Multi-Lingual CLIPScore(PR-MCS), which exhibits robustness to such perturbations, as a novel reference-free image captioning metric applicable to multiple languages. To achieve perturbation robustness, we fine-tune the text encoder of CLIP with our language-agnostic method to distinguish the perturbed text from the original text. To verify the robustness of PR-MCS, we introduce a new fine-grained evaluation dataset consisting of detailed captions, critical objects, and the relationships between the objects for 3,000 images in five languages. In our experiments, PR-MCS significantly outperforms baseline metrics in capturing lexical noise of all various perturbation types in all five languages, while maintaining a strong correlation with human judgments.", "keywords": "Image Captioning metric;Perturbation Robustness", "primary_area": "", "supplementary_material": "", "author": "Yongil Kim;Yerin Hwang;Hyeongu Yun;Seunghyun Yoon;Trung Bui;Kyomin Jung", "authorids": "~Yongil_Kim1;~Yerin_Hwang1;~Hyeongu_Yun1;~Seunghyun_Yoon1;~Trung_Bui1;~Kyomin_Jung1", "gender": "M;F;M;M;M;M", "homepage": "https://yong1-kim.github.io;https://yerin-hwang49.github.io/;;https://david-yoon.github.io/;https://sites.google.com/site/trungbuistanford/;http://milab.snu.ac.kr/kjung/index.html", "dblp": "96/4712;;194/2671;68/3020-2;180/0632;48/3867", "google_scholar": "https://scholar.google.com/citations?hl=en;;;https://scholar.google.com/citations?hl=en;FpFTduYAAAAJ;https://scholar.google.co.kr/citations?user=u3uMl4MAAAAJ", "or_profile": "~Yongil_Kim1;~Yerin_Hwang1;~Hyeongu_Yun1;~Seunghyun_Yoon1;~Trung_Bui1;~Kyomin_Jung1", "aff": "Seoul National University;Seoul National University;LG Corporation;Adobe Research;Adobe Research;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;lgresearch.ai;adobe.com;adobe.com;snu.ac.kr", "position": "PhD student;PhD student;Researcher;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nkim2023prmcs,\ntitle={{PR}-{MCS}: Perturbation Robust Metric for MultiLingual Image Captioning},\nauthor={Yongil Kim and Yerin Hwang and Hyeongu Yun and Seunghyun Yoon and Trung Bui and Kyomin Jung},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=EpBNf4Arod}\n}", "github": "", "project": "", "reviewers": "ccYd;J7jR;j2CY", "site": "https://openreview.net/forum?id=EpBNf4Arod", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "3;3;4", "reproducibility": "4;3;3", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-7262-3579;0000-0002-0871-349X;", "linkedin": ";;;david-s-yoon/;trung-bui-4333322/;", "aff_unique_index": "0;0;1;2;2;0", "aff_unique_norm": "Seoul National University;LG;Adobe", "aff_unique_dep": ";LG Corporation;Adobe Research", "aff_unique_url": "https://www.snu.ac.kr;https://www.lg.com;https://research.adobe.com", "aff_unique_abbr": "SNU;LG;Adobe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;1;0", "aff_country_unique": "South Korea;United States" }, { "id": "EpJ7qqR0ad", "title": "MetaReVision: Meta-Learning with Retrieval for Visually Grounded Compositional Concept Acquisition", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Humans have the ability to learn novel compositional concepts by recalling primitive concepts acquired from past experience and generalizing these primitive concepts to novel compositions. \nInspired by the above human\u2019s compositional learning procedure, in this paper, we propose MetaReVision, a retrievalenhanced meta-learning model to solve the visually grounded compositional concept learning problem. \nThe proposed MetaReVision consists of a retrieval module and a meta-\nlearning module which are designed to incorporate retrieved primitive concepts as supporting set to meta-train visual-language models for grounded compositional concept recognition. \nThrough meta-learning from episodes constructed by the retriever, MetaReVision learns a generic compositional representation that can be fast updated to recognize novel composi tional concepts. \nWe create CompCOCO and CompFlickr to benchmark the grounded compositional concept learning. \nOur experimental results show MetaReVision outperforms other competitive baselines and the retrieval module does plays an important role in this compositional learning process.", "keywords": "Compositional Learning;Meta-Learning;Retrieval-enhance Learning;Visual-Language Models", "primary_area": "", "supplementary_material": "", "author": "Guangyue Xu;Parisa Kordjamshidi;Joyce Chai", "authorids": "~Guangyue_Xu1;~Parisa_Kordjamshidi1;~Joyce_Chai2", "gender": "M;F;F", "homepage": "https://xugy16.github.io/;http://www.cse.msu.edu/~kordjams/;https://web.eecs.umich.edu/~chaijy/", "dblp": "44/8494;73/3423;c/JoyceYChai", "google_scholar": "BVbyVlEAAAAJ;https://scholar.google.com.tw/citations?user=Ugo3NGgAAAAJ;", "or_profile": "~Guangyue_Xu1;~Parisa_Kordjamshidi1;~Joyce_Y_Chai1", "aff": "Michigan State University;Michigan State University;University of Michigan", "aff_domain": "msu.edu;msu.edu;umich.edu", "position": "PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nxu2023metarevision,\ntitle={MetaReVision: Meta-Learning with Retrieval for Visually Grounded Compositional Concept Acquisition},\nauthor={Guangyue Xu and Parisa Kordjamshidi and Joyce Chai},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=EpJ7qqR0ad}\n}", "github": "", "project": "", "reviewers": "6gSR;tj2B;c8Aa;SB8Y", "site": "https://openreview.net/forum?id=EpJ7qqR0ad", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "2;3;4;3", "excitement": "3;3;3;3", "reproducibility": "2;3;3;3", "correctness": "3;3;3;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 2.75, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-9658-2230", "linkedin": ";;", "aff_unique_index": "0;0;1", "aff_unique_norm": "Michigan State University;University of Michigan", "aff_unique_dep": ";", "aff_unique_url": "https://www.msu.edu;https://www.umich.edu", "aff_unique_abbr": "MSU;UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "EtC8wfjSw4", "title": "Human Raters Cannot Distinguish English Translations from Original English Texts", "track": "main", "status": "Short Main", "tldr": "", "abstract": "The term translationese describes the set of linguistic features unique to translated texts, which appear regardless of translation quality. Though automatic classifiers designed to distinguish translated texts achieve high accuracy and prior work has identified common hallmarks of translationese, human accuracy of identifying translated text is understudied. In this work, we perform a human evaluation of English original/translated texts in order to explore raters' ability to classify texts as being original or translated English and the features that lead a rater to judge text as being translated. Ultimately, we find that, regardless of the annotators' native language or the source language of the text, annotators are unable to distinguish translations from original English texts and also have low agreement. Our results provide critical insight into work in translation studies and context for assessments of translationese classifiers.", "keywords": "translationese;human evaluation;translation", "primary_area": "", "supplementary_material": "", "author": "Shira Wein", "authorids": "~Shira_Wein1", "gender": "", "homepage": "https://shirawein.github.io", "dblp": "263/2502", "google_scholar": "", "or_profile": "~Shira_Wein1", "aff": "Georgetown University", "aff_domain": "georgetown.edu", "position": "PhD student", "bibtex": "@inproceedings{\nwein2023human,\ntitle={Human Raters Cannot Distinguish English Translations from Original English Texts},\nauthor={Shira Wein},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=EtC8wfjSw4}\n}", "github": "", "project": "", "reviewers": "J1E6;xarF;p1Sh", "site": "https://openreview.net/forum?id=EtC8wfjSw4", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "2;4;3", "reproducibility": "4;4;3", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 1, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-1062-0866", "linkedin": "", "aff_unique_index": "0", "aff_unique_norm": "Georgetown University", "aff_unique_dep": "", "aff_unique_url": "https://www.georgetown.edu", "aff_unique_abbr": "GU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "EtNebdSBpe", "title": "Learning under Label Proportions for Text Classification", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We present one of the preliminary NLP works under the challenging setup of Learning from Label Proportions (LLP), where the data is provided in an aggregate form called bags and only the proportion of samples in each class as the ground truth. This setup is inline with the desired characteristics of training models under Privacy settings and Weakly supervision. By characterizing some irregularities of the most widely used baseline technique DLLP, we propose a novel formulation that is also robust. This is accompanied with a learnability result that provides a generalization bound under LLP. Combining this formulation with a self-supervised objective, our method achieves better results as compared to the baselines in almost 87% of the experimental configurations which include large scale models for both long and short range texts across multiple metrics.", "keywords": "Label Proportions;Privacy;Weak Supervision;Theory", "primary_area": "", "supplementary_material": "", "author": "Jatin Chauhan;Xiaoxuan Wang;Wei Wang", "authorids": "~Jatin_Chauhan3;~Xiaoxuan_Wang2;~Wei_Wang13", "gender": "M;F;F", "homepage": "https://chauhanjatin10.github.io/;;http://www.cs.ucla.edu/~weiwang", "dblp": "242/7749;;w/WeiWang.html", "google_scholar": "kTiFFPcAAAAJ;5LDKaEYAAAAJ;UedS9LQAAAAJ", "or_profile": "~Jatin_Chauhan3;~Xiaoxuan_Wang2;~Wei_Wang13", "aff": "University of California, Los Angeles;, University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "ucla.edu;cs.ucla.edu;ucla.edu", "position": "MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nchauhan2023learning,\ntitle={Learning under Label Proportions for Text Classification},\nauthor={Jatin Chauhan and Xiaoxuan Wang and Wei Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=EtNebdSBpe}\n}", "github": "", "project": "", "reviewers": "i5AH;g6p6;qeAx", "site": "https://openreview.net/forum?id=EtNebdSBpe", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "3;2;4", "reproducibility": "4;3;3", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-8180-2886", "linkedin": ";mandy-wang-a72046192/;wei-wang-8800845/", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "EuMmDTVFjL", "title": "Dimensions of Online Conflict: Towards Modeling Agonism", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Agonism plays a vital role in democratic dialogue by fostering diverse perspectives and robust discussions. Within the realm of online conflict there is another type: hateful antagonism, which undermines constructive dialogue. Detecting conflict online is central to platform moderation and monetization. It is also vital for democratic dialogue, but only when it takes the form of agonism. To model these two types of conflict, we collected Twitter conversations related to trending controversial topics. We introduce a comprehensive annotation schema for labelling different dimensions of conflict in the conversations, such as the source of conflict, the target, and the rhetorical strategies deployed. Using this schema, we annotated approximately 4,000 conversations with multiple labels. We then train both logistic regression and transformer-based models on the dataset, incorporating context from the conversation, including the number of participants and the structure of the interactions. Results show that contextual labels are helpful in identifying conflict and make the models robust to variations in topic. Our research contributes a conceptualization of different dimensions of conflict, a richly annotated dataset, and promising results that can contribute to content moderation.", "keywords": "online conversations;conflict;agonism;content moderation", "primary_area": "", "supplementary_material": "", "author": "Matt Canute;Mali Jin;hannah holtzclaw;Alberto Lusoli;Philippa R Adams;Mugdha Pandya;Maite Taboada;Diana Maynard;WENDY Hui Kyong CHUN", "authorids": "~Matt_Canute1;~Mali_Jin1;~hannah_holtzclaw1;~Alberto_Lusoli1;~Philippa_R_Adams1;~Mugdha_Pandya1;~Maite_Taboada1;~Diana_Maynard1;~WENDY_Hui_Kyong_CHUN1", "gender": "M;F;Non-Binary;M;;F;F;F;F", "homepage": ";https://twitter.com/malidilei;https://hannah-holtzclaw.squarespace.com;https://labora.co/;;;http://www.sfu.ca/~mtaboada/;https://www.sheffield.ac.uk/dcs/people/research-staff/diana-maynard;https://digitaldemocracies.org", "dblp": ";277/0737;;;;226/1789;78/554;69/4767;", "google_scholar": ";Br8h1WIAAAAJ;;9UG9_VQAAAAJ;https://scholar.google.com/citations?hl=en;bzU8NIUAAAAJ;37jEMC0AAAAJ;https://scholar.google.co.uk/citations?user=YZZPJ2oAAAAJ;v_x4nFoAAAAJ", "or_profile": "~Matt_Canute1;~Mali_Jin1;~hannah_holtzclaw1;~Alberto_Lusoli1;~Philippa_R_Adams1;~Mugdha_Pandya1;~Maite_Taboada1;~Diana_Maynard1;~WENDY_Hui_Kyong_CHUN1", "aff": "Simon Fraser University;University of Sheffield;Simon Fraser University;Simon Fraser University;Simon Fraser University;University of Sheffield;Simon Fraser University;University of Sheffield;Simon Fraser University", "aff_domain": "sfu.ca;sheffield.ac.uk;sfu.ca;sfu.ca;sfu.ca;shef.ac.uk;sfu.ca;sheffield.ac.uk;sfu.ca", "position": "Researcher;PhD student;PhD student;Postdoc;PhD student;Postdoc;Full Professor;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\ncanute2023dimensions,\ntitle={Dimensions of Online Conflict: Towards Modeling Agonism},\nauthor={Matt Canute and Mali Jin and hannah holtzclaw and Alberto Lusoli and Philippa R Adams and Mugdha Pandya and Maite Taboada and Diana Maynard and WENDY Hui Kyong CHUN},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=EuMmDTVFjL}\n}", "github": "", "project": "", "reviewers": "KsvZ;bZMt;APKa", "site": "https://openreview.net/forum?id=EuMmDTVFjL", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;5", "excitement": "4;3;4", "reproducibility": "4;5;5", "correctness": "4;2;3", "rating_avg": 4.0, "confidence_avg": 4.666666666666667, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.666666666666667, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-4984-4744;;0000-0001-9125-3561;0000-0002-1134-4867;;0000-0002-6750-8891;0000-0002-1773-7020;", "linkedin": "mattcanute/;;;albertolusoli/;;mugdha-pandya-5a755414a/;maite-taboada/;dianamaynard/?originalSubdomain=uk;wendy-chun-0615923/", "aff_unique_index": "0;1;0;0;0;1;0;1;0", "aff_unique_norm": "Simon Fraser University;University of Sheffield", "aff_unique_dep": ";", "aff_unique_url": "https://www.sfu.ca;https://www.sheffield.ac.uk", "aff_unique_abbr": "SFU;Sheffield", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;1;0;1;0", "aff_country_unique": "Canada;United Kingdom" }, { "id": "EvVWHQ5l6X", "title": "One For All $\\&$ All For One: Bypassing Hyperparameter Tuning with Model Averaging for Cross-Lingual Transfer", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Multilingual language models enable zero-shot cross-lingual transfer (ZS-XLT): fine-tuned on sizable source-language task data, they perform the task in target languages without labeled instances. The effectiveness of ZS-XLT hinges on the linguistic proximity between languages and the amount of pretraining data for a language. Because of this, model selection based on source-language validation is unreliable: it picks model snapshots with suboptimal target-language performance. As a remedy, some work optimizes ZS-XLT by extensively tuning hyperparameters: the follow-up work then routinely struggles to replicate the original results. Other work searches over narrower hyperparameter grids, reporting substantially lower performance. In this work, we therefore propose an unsupervised evaluation protocol for ZS-XLT that decouples performance maximization from hyperparameter tuning. As a robust and more transparent alternative to extensive hyperparameter tuning, we propose to accumulatively average snapshots from different runs into a single model. We run broad ZS-XLT experiments on both higher-level semantic tasks (NLI, extractive QA) and a lower-level token classification task (NER) and find that conventional model selection based on source-language validation quickly plateaus to suboptimal ZS-XLT performance. On the other hand, our accumulative run-by-run averaging of models trained with different hyperparameters boosts ZS-XLT performance and closely correlates with ``oracle'' ZS-XLT, i.e., model selection based on target-language validation performance.", "keywords": "zero-shot cross-lingual transfer;multilingual representation learning", "primary_area": "", "supplementary_material": "", "author": "Fabian David Schmidt;Ivan Vuli\u0107;Goran Glava\u0161", "authorids": "~Fabian_David_Schmidt1;~Ivan_Vuli\u01071;~Goran_Glava\u01611", "gender": "M;M;M", "homepage": "https://fdschmidt93.github.io/;https://sites.google.com/site/ivanvulic/;https://sites.google.com/view/goranglavas", "dblp": "254/9181;77/9768;50/11059", "google_scholar": "U_ukcNYAAAAJ;ZX8js60AAAAJ;Ym0myOwAAAAJ", "or_profile": "~Fabian_David_Schmidt1;~Ivan_Vuli\u01071;~Goran_Glava\u01611", "aff": "Bayerische Julius-Maximilians-Universit\u00e4t W\u00fcrzburg;PolyAI Limited;Julius-Maximilians-Universit\u00e4t W\u00fcrzburg", "aff_domain": "uni-wuerzburg.de;poly-ai.com;uni-wuerzburg.de", "position": "PhD student;Senior Scientist;Full Professor", "bibtex": "@inproceedings{\nschmidt2023one,\ntitle={One For All \\${\\textbackslash}\\&\\$ All For One: Bypassing Hyperparameter Tuning with Model Averaging for Cross-Lingual Transfer},\nauthor={Fabian David Schmidt and Ivan Vuli{\\'c} and Goran Glava{\\v{s}}},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=EvVWHQ5l6X}\n}", "github": "", "project": "", "reviewers": "c3Ph;Znmg;H7uL", "site": "https://openreview.net/forum?id=EvVWHQ5l6X", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;2;5", "excitement": "3;5;3", "reproducibility": "4;5;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";ivan-vuli%C4%87-286b4a81/;goran-glava\u0161-8484b420", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of W\u00fcrzburg;PolyAI Limited;Julius-Maximilians-Universit\u00e4t W\u00fcrzburg", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-wuerzburg.de;https://www.poly.ai;https://www.uni-wuerzburg.de", "aff_unique_abbr": "JMU;PolyAI;JMU", "aff_campus_unique_index": "0", "aff_campus_unique": "W\u00fcrzburg;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Germany;United Kingdom" }, { "id": "EwNVh5fuRF", "title": "Select, Prompt, Filter: Distilling Large Language Models for Summarizing Conversations", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Large language models (LLMs) like ChatGPT can be expensive to train, deploy, and use for specific natural language generation tasks such as text summarization and for certain domains. A promising alternative is to fine-tune relatively smaller language models (LMs) on a particular task using high-quality, in-domain datasets. However, it can be prohibitively expensive to get such high-quality training data. This issue has been mitigated by generating weakly supervised data via knowledge distillation (KD) of LLMs. We propose a three-step approach to distill ChatGPT and fine-tune smaller LMs for summarizing forum conversations. More specifically, we design a method to selectively sample a large unannotated corpus of forum conversation using a semantic similarity metric. Then, we use the same metric to retrieve suitable prompts for ChatGPT from a small annotated validation set in the same domain. The generated dataset is then filtered to remove low-quality instances. Our proposed select-prompt-filter KD approach leads to significant improvements of up to 6.6 ROUGE-2 score by leveraging sufficient in-domain pseudo-labeled data over a standard KD approach given the same size of training data.", "keywords": "Text Summarization;Generative AI;Knowledge distillation;Data filtering", "primary_area": "", "supplementary_material": "", "author": "Minh-Quang PHAM;Sathish Reddy Indurthi;Shamil Chollampatt;Marco Turchi", "authorids": "~Minh-Quang_PHAM1;~Sathish_Reddy_Indurthi2;~Shamil_Chollampatt1;~Marco_Turchi2", "gender": "M;M;M;M", "homepage": ";;https://shamil.github.io;http://marcoturchi.com", "dblp": "228/5646;223/2379;182/2351;96/4886", "google_scholar": "vXX_GLwAAAAJ;xZrGdhgAAAAJ;b1B1DpYAAAAJ;loHH3HcAAAAJ", "or_profile": "~Minh-Quang_PHAM1;~Sathish_Reddy_Indurthi2;~Shamil_Chollampatt1;~Marco_Turchi2", "aff": "Zoom Video Communications;Zoom Video Communications;Zoom Video Communications;Zoom", "aff_domain": "zoom.us;zoom.us;zoom.us;zoom.us", "position": "Researcher;Senior Research Scientist;Research Scientist;Principal Researcher", "bibtex": "@inproceedings{\npham2023select,\ntitle={Select, Prompt, Filter: Distilling Large Language Models for Summarizing Conversations},\nauthor={Minh-Quang PHAM and Sathish Reddy Indurthi and Shamil Chollampatt and Marco Turchi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=EwNVh5fuRF}\n}", "github": "", "project": "", "reviewers": "AQP7;jSRx;8qtH", "site": "https://openreview.net/forum?id=EwNVh5fuRF", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;2", "excitement": "4;2;3", "reproducibility": "3;3;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-3618-481X;;;0000-0002-5899-4496", "linkedin": "minh-quang-pham-a18600a8/;sathishindurthi/;shamilcm/;", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Zoom Video Communications;Zoom Video Communications Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://zoom.us;https://zoom.us", "aff_unique_abbr": "Zoom;Zoom", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "Exh156fVSS", "title": "Exploiting Contrastive Learning and Numerical Evidence for Confusing Legal Judgment Prediction", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Given the fact description text of a legal case, legal judgment prediction (LJP) aims to predict the case's charge, applicable law article, and term of penalty. A core problem of LJP is distinguishing confusing legal cases where only subtle text differences exist. Previous studies fail to distinguish different classification errors with a standard cross-entropy classification loss and ignore the numbers in the fact description for predicting the term of penalty. To tackle these issues, in this work, first, in order to exploit the numbers in legal cases for predicting the term of penalty of certain charges, we enhance the representation of the fact description with extracted crime amounts which are encoded by a pre-trained numeracy model. Second, we propose a moco-based supervised contrastive learning to learn distinguishable representations and explore the best strategy to construct positive example pairs to benefit all three subtasks of LJP simultaneously. Extensive experiments on real-world datasets show that the proposed method achieves new state-of-the-art results, particularly for confusing legal cases. Ablation studies also demonstrate the effectiveness of each component.", "keywords": "legal artificial intelligence;legal judgment prediction;contrastive learning;information extraction", "primary_area": "", "supplementary_material": "", "author": "Leilei Gan;Baokui Li;Kun Kuang;Yating Zhang;Lei Wang;Anh Tuan Luu;Yi Yang;Fei Wu", "authorids": "~Leilei_Gan1;~Baokui_Li1;~Kun_Kuang1;~Yating_Zhang1;~Lei_Wang47;~Anh_Tuan_Luu2;~Yi_Yang4;~Fei_Wu1", "gender": ";M;M;F;M;M;M;M", "homepage": ";https://github.com/;http://kunkuang.github.io;;https://www.ren3ren.com;https://tuanluu.github.io/;http://reler.net/;https://person.zju.edu.cn/wufei", "dblp": ";;194/4245;29/5889;;81/8329.html;;84/3254-1", "google_scholar": ";;https://scholar.google.com.hk/citations?user=FOsNiMQAAAAJ;;;https://scholar.google.com.sg/citations?hl=en;https://scholar.google.com.au/citations?user=RMSuNFwAAAAJ;XJLn4MYAAAAJ", "or_profile": "~Leilei_Gan1;~Baokui_Li1;~Kun_Kuang1;~Yating_Zhang1;~Lei_Wang47;~Anh_Tuan_Luu2;~Yi_Yang4;~Fei_Wu1", "aff": ";Zhejiang University;Zhejiang University;;;Nanyang Technological University;Zhejiang University;Zhejiang University", "aff_domain": ";zju.edu.cn;zju.edu.cn;;;ntu.edu.sg;zju.edu.cn;zju.edu.cn", "position": ";MS student;Associate Professor;;;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\ngan2023exploiting,\ntitle={Exploiting Contrastive Learning and Numerical Evidence for Confusing Legal Judgment Prediction},\nauthor={Leilei Gan and Baokui Li and Kun Kuang and Yating Zhang and Lei Wang and Anh Tuan Luu and Yi Yang and Fei Wu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Exh156fVSS}\n}", "github": "", "project": "", "reviewers": "VZ2V;kzgp;zrQE", "site": "https://openreview.net/forum?id=Exh156fVSS", "pdf_size": 0, "rating": "3;3;3", "confidence": "1;5;3", "excitement": "3;3;4", "reproducibility": "3;4;4", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0009-0000-7528-8131;;;;;", "linkedin": ";;;;;;;", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Zhejiang University;Nanyang Technological University", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.ntu.edu.sg", "aff_unique_abbr": "ZJU;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;Singapore" }, { "id": "ExpskenHdP", "title": "StereoMap: Quantifying the Awareness of Human-like Stereotypes in Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large Language Models (LLMs) have been observed to encode and perpetuate harmful associations present in the training data. We propose a theoretically grounded framework called StereoMap to gain insights into their perceptions of how demographic groups have been viewed by society. The framework is grounded in the Stereotype Content Model (SCM); a well-established theory from psychology. According to SCM, stereotypes are not all alike. Instead, the dimensions of Warmth and Competence serve as the factors that delineate the nature of stereotypes. Based on the SCM theory, StereoMap maps LLMs' perceptions of social groups (defined by socio-demographic features) using the dimensions of Warmth and Competence. Furthermore, the framework enables the investigation of keywords and verbalizations of reasoning of LLMs' judgments to uncover underlying factors influencing their perceptions.\n\nOur results show that LLMs exhibit a diverse range of perceptions towards these groups, characterized by mixed evaluations along the dimensions of Warmth and Competence. Furthermore, analyzing the reasonings of LLMs, our findings indicate that LLMs demonstrate an awareness of social disparities, often stating statistical data and research findings to support their reasoning. This study contributes to the understanding of how LLMs perceive and represent social groups, shedding light on their potential biases and the perpetuation of harmful associations.", "keywords": "Stereotype;Bias;Social perception in language models", "primary_area": "", "supplementary_material": "", "author": "Sullam Jeoung;Yubin Ge;Jana Diesner", "authorids": "~Sullam_Jeoung1;~Yubin_Ge1;~Jana_Diesner1", "gender": ";M;", "homepage": ";;", "dblp": ";216/4408;", "google_scholar": ";Q0HQH3YAAAAJ;", "or_profile": "~Sullam_Jeoung1;~Yubin_Ge1;~Jana_Diesner1", "aff": ";University of Illinois, Urbana Champaign;", "aff_domain": ";illinois.edu;", "position": ";PhD student;", "bibtex": "@inproceedings{\njeoung2023stereomap,\ntitle={StereoMap: Quantifying the Awareness of Human-like Stereotypes in Large Language Models},\nauthor={Sullam Jeoung and Yubin Ge and Jana Diesner},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ExpskenHdP}\n}", "github": "", "project": "", "reviewers": "5TSk;aZZz;NHSL", "site": "https://openreview.net/forum?id=ExpskenHdP", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;4", "excitement": "3;4;3", "reproducibility": "1;3;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";yubin-ge-6220a012a/;", "aff_unique_index": "0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "F1G7y94K02", "title": "Dissecting Recall of Factual Associations in Auto-Regressive Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Transformer-based language models (LMs) are known to capture factual knowledge in their parameters. While previous work looked into where factual associations are stored, only little is known about how they are retrieved internally during inference. We investigate this question through the lens of information flow. Given a subject-relation query, we study how the model aggregates information about the subject and relation to predict the correct attribute. With interventions on attention edges, we first identify two critical points where information propagates to the prediction: one from the relation positions followed by another from the subject positions. Next, by analyzing the information at these points, we unveil a three-step internal mechanism for attribute extraction. First, the representation at the last-subject position goes through an enrichment process, driven by the early MLP sublayers, to encode many subject-related attributes. Second, information from the relation propagates to the prediction. Third, the prediction representation \"queries\" the enriched subject to extract the attribute. Perhaps surprisingly, this extraction is typically done via attention heads, which often encode subject-attribute mappings in their parameters. Overall, our findings introduce a comprehensive view of how factual associations are stored and extracted internally in LMs, facilitating future research on knowledge localization and editing.", "keywords": "language models;knowledge tracing;knowledge localization;interpretability", "primary_area": "", "supplementary_material": "", "author": "Mor Geva;Jasmijn Bastings;Katja Filippova;Amir Globerson", "authorids": "~Mor_Geva1;~Jasmijn_Bastings1;~Katja_Filippova1;~Amir_Globerson1", "gender": "F;F;M;F", "homepage": "https://mega002.github.io/;;http://www.cs.tau.ac.il/~gamir/;https://bastings.github.io", "dblp": "203/9159;24/5028;08/4162.html;146/3824", "google_scholar": "https://scholar.google.co.il/citations?user=GxpQbSkAAAAJ;https://scholar.google.ch/citations?user=23xz9QgAAAAJ;https://scholar.google.com.tw/citations?user=5JserkUAAAAJ;VG_wuYkAAAAJ", "or_profile": "~Mor_Geva1;~Katja_Filippova1;~Amir_Globerson1;~Jasmijn_Bastings2", "aff": "Google DeepMind;Research, Google;Tel Aviv University;Google DeepMind", "aff_domain": "google.com;research.google.com;tau.ac.il;google.com", "position": "Postdoc;Researcher;Associate Professor;Researcher", "bibtex": "@inproceedings{\ngeva2023dissecting,\ntitle={Dissecting Recall of Factual Associations in Auto-Regressive Language Models},\nauthor={Mor Geva and Jasmijn Bastings and Katja Filippova and Amir Globerson},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=F1G7y94K02}\n}", "github": "", "project": "", "reviewers": "cRFb;6DKT;Bdt7", "site": "https://openreview.net/forum?id=F1G7y94K02", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;1;3", "excitement": "4;4;3", "reproducibility": "4;2;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 2.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-5445-4417", "linkedin": "morgeva/;katja-filippova-93a2144;;jasmijn-bastings/", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Google;Tel Aviv University", "aff_unique_dep": "Google DeepMind;", "aff_unique_url": "https://deepmind.com;https://www.tau.ac.il", "aff_unique_abbr": "DeepMind;TAU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "United Kingdom;United States;Israel" }, { "id": "F4qNZtkk3V", "title": "An Empirical Study on Multiple Knowledge from ChatGPT for Emotion Recognition in Conversations", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Multiple knowledge (e.g., co-reference, topics, emotional causes, etc) has been demonstrated effective for emotion detection. However, exploring this knowledge in Emotion Recognition in Conversations (ERC) is currently a blank slate due to the lack of annotated data and the high cost involved in obtaining such knowledge. Fortunately, the emergence of Large Language Models (LLMs) holds promise in filling this void. Therefore, we propose a Multiple Knowledge Fusion Model (MKFM) to effectively integrate such knowledge generated by LLMs for ERC and empirically study its impact on the model. Experimental results on three public datasets have demonstrated the effectiveness of multiple knowledge for ERC. Furthermore, we conduct a detailed analysis of the contribution and complementarity of this knowledge.", "keywords": "Emotion Recognition in Conversations;Graph Network;Supervised Contrastive Learning", "primary_area": "", "supplementary_material": "", "author": "Geng Tu;Bin Liang;Bing Qin;Kam-Fai Wong;Ruifeng Xu", "authorids": "~Geng_Tu2;~Bin_Liang6;~Bing_Qin2;~Kam-Fai_Wong2;~Ruifeng_Xu1", "gender": "M;M;;M;M", "homepage": ";https://binliang-nlp.github.io/;http://ir.hit.edu.cn/~qinb;http://www.se.cuhk.edu.hk/~kfwong;http://faculty.hitsz.edu.cn/xuruifeng", "dblp": ";71/6053-4;86/5934.html;w/KamFaiWong;93/5407-1", "google_scholar": "https://scholar.google.com.hk/citations?user=OvI-eTkAAAAJ;djpQeLEAAAAJ;LKnCub0AAAAJ;;mObXnNIAAAAJ", "or_profile": "~Geng_Tu2;~Bin_Liang6;~Bing_Qin2;~Kam-Fai_Wong2;~Ruifeng_Xu1", "aff": "Harbin Institute of Technology;The Chinese University of Hong Kong;Harbin Institute of Technology;The Chinese University of Hong Kong;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;cuhk.edu.hk;hit.edu.cn;cuhk.edu.hk;hit.edu.cn", "position": "PhD student;Postdoc;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\ntu2023an,\ntitle={An Empirical Study on Multiple Knowledge from Chat{GPT} for Emotion Recognition in Conversations},\nauthor={Geng Tu and Bin Liang and Bing Qin and Kam-Fai Wong and Ruifeng Xu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=F4qNZtkk3V}\n}", "github": "", "project": "", "reviewers": "9VVj;7phw;poob", "site": "https://openreview.net/forum?id=F4qNZtkk3V", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;4;3", "reproducibility": "3;3;3", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-7234-1347;0000-0002-2543-5604;0000-0002-9427-5659;0000-0002-4009-5679", "linkedin": ";;;;", "aff_unique_index": "0;1;0;1;0", "aff_unique_norm": "Harbin Institute of Technology;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "http://www.hit.edu.cn/;https://www.cuhk.edu.hk", "aff_unique_abbr": "HIT;CUHK", "aff_campus_unique_index": "0;1;0;1;0", "aff_campus_unique": "Harbin;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "FAiFBfFTGZ", "title": "Accelerating Toeplitz Neural Network with Constant-time Inference Complexity", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Toeplitz Neural Networks (TNNs) have exhibited outstanding performance in various sequence modeling tasks. They outperform commonly used Transformer-based models while benefiting from log-linear space-time complexities. On the other hand, State Space Models (SSMs) achieve lower performance than TNNs in language modeling but offer the advantage of constant inference complexity. In this paper, we aim to combine the strengths of TNNs and SSMs by converting TNNs to SSMs during inference, thereby enabling TNNs to achieve the same constant inference complexities as SSMs.\nTo accomplish this, we formulate the conversion process as an optimization problem and provide a closed-form solution. We demonstrate how to transform the target equation into a Vandermonde linear system problem, which can be efficiently solved using the Discrete Fourier Transform (DFT).\nNotably, our method requires no training and maintains numerical stability. It can be also applied to any LongConv-based model. To assess its effectiveness, we conduct extensive experiments on language modeling tasks across various settings. Additionally, we compare our method to other gradient-descent solutions, highlighting the superior numerical stability of our approach. The source code is available at https://github.com/OpenNLPLab/ETSC-Exact-Toeplitz-to-SSM-Conversion.", "keywords": "Toeplitz Neural Network;inference;constant-time complexity", "primary_area": "", "supplementary_material": "", "author": "Zhen Qin;Yiran Zhong", "authorids": "~Zhen_Qin6;~Yiran_Zhong1", "gender": ";M", "homepage": "https://github.com/Doraemonzzz;", "dblp": ";158/9624", "google_scholar": "https://scholar.google.com.sg/citations?user=IcBRtycAAAAJ;https://scholar.google.com.sg/citations?user=E9NVOBUAAAAJ", "or_profile": "~Zhen_Qin6;~Yiran_Zhong1", "aff": "Sensetime;Shanghai AI Lab", "aff_domain": "sensetime.com;pjlab.org.cn", "position": "Researcher;PI", "bibtex": "@inproceedings{\nqin2023accelerating,\ntitle={Accelerating Toeplitz Neural Network with Constant-time Inference Complexity},\nauthor={Zhen Qin and Yiran Zhong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=FAiFBfFTGZ}\n}", "github": "", "project": "", "reviewers": "SKym;78gu;ehMV", "site": "https://openreview.net/forum?id=FAiFBfFTGZ", "pdf_size": 0, "rating": "5;5;5", "confidence": "2;2;3", "excitement": "3;3;4", "reproducibility": "2;3;4", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 2.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 5, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;1", "aff_unique_norm": "SenseTime;Shanghai AI Lab", "aff_unique_dep": ";", "aff_unique_url": "https://www.sensetime.com;https://www.shanghaiailab.com", "aff_unique_abbr": "SenseTime;SAIL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "FAimEpR9Fh", "title": "What Makes it Ok to Set a Fire? Iterative Self-distillation of Contexts and Rationales for Disambiguating Defeasible Social and Moral Situations", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Moral or ethical judgments rely heavily on the specific contexts in which they occur. Understanding varying shades of defeasible contextualizations (i.e., additional information that strengthens or attenuates the moral acceptability of an action) is critical to accurately represent the subtlety and intricacy of grounded human moral judgment in real-life scenarios.\n\nWe introduce defeasible moral reasoning: a task to provide grounded contexts that make an action more or less morally acceptable, along with commonsense rationales that justify the reasoning. To elicit high-quality task data, we take an iterative self-distillation approach that starts from a small amount of unstructured seed knowledge from GPT-3 and then alternates between (1) self-distillation from student models; (2) targeted filtering with a critic model trained by human judgment (to boost validity) and NLI (to boost diversity); (3) self-imitation learning (to amplify the desired data quality). This process yields a student model that produces defeasible contexts with improved validity, diversity, and defeasibility. From this model we distill a high-quality dataset, $\\delta$-Rules-of-Thumb, of 1.2M entries of contextualizations and rationales for 115K defeasible moral actions rated highly by human annotators 85.9% to 99.8% of the time. Using $\\delta$-RoT we obtain a final student model that wins over all intermediate student models by a notable margin.", "keywords": "moral reasoning;defeasible reasoning;commonsense reasoning;language groundings;knowledge distillation", "primary_area": "", "supplementary_material": "", "author": "Kavel Rao;Liwei Jiang;Valentina Pyatkin;Yuling Gu;Niket Tandon;Nouha Dziri;Faeze Brahman;Yejin Choi", "authorids": "~Kavel_Rao1;~Liwei_Jiang2;~Valentina_Pyatkin1;~Yuling_Gu1;~Niket_Tandon2;~Nouha_Dziri2;~Faeze_Brahman1;~Yejin_Choi1", "gender": "M;F;;;M;;F;F", "homepage": "http://kavelrao.dev;https://liweijiang.me;;;https://niket.tandon.info;;https://fabrahman.github.io;https://yejinc.github.io/", "dblp": ";;;194/1346;29/9923;;276/6005;89/579-1", "google_scholar": ";lcPsDgUAAAAJ;;;9uWuZkUAAAAJ;;viCG2ikAAAAJ;vhP-tlcAAAAJ", "or_profile": "~Kavel_Rao1;~Liwei_Jiang2;~Valentina_Pyatkin1;~Yuling_Gu1;~Niket_Tandon2;~Nouha_Dziri2;~Faeze_Brahman1;~Yejin_Choi1", "aff": "Department of Computer Science, University of Washington;University of Washington;;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;;Allen Institute for AI;Department of Computer Science, University of Washington", "aff_domain": "cs.washington.edu;washington.edu;;allenai.org;allenai.org;;allenai.org;cs.washington.edu", "position": "Undergrad student;PhD student;;Predoctoral Young Investigator;Researcher;;Postdoc;Full Professor", "bibtex": "@inproceedings{\nrao2023what,\ntitle={What Makes it Ok to Set a Fire? Iterative Self-distillation of Contexts and Rationales for Disambiguating Defeasible Social and Moral Situations},\nauthor={Kavel Rao and Liwei Jiang and Valentina Pyatkin and Yuling Gu and Niket Tandon and Nouha Dziri and Faeze Brahman and Yejin Choi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=FAimEpR9Fh}\n}", "github": "", "project": "", "reviewers": "RfsH;7Yd4;Uk2j", "site": "https://openreview.net/forum?id=FAimEpR9Fh", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;4", "excitement": "3;3;5", "reproducibility": "4;4;3", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;", "linkedin": ";;;yuling-gu/;;;;", "aff_unique_index": "0;0;1;1;2;0", "aff_unique_norm": "University of Washington;Allen Institute for Artificial Intelligence;Allen Institute for AI", "aff_unique_dep": "Department of Computer Science;;", "aff_unique_url": "https://www.washington.edu;https://allenai.org;https://allenai.org", "aff_unique_abbr": "UW;AI2;AI2", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "FGBEoz9WzI", "title": "Automatic Prompt Augmentation and Selection with Chain-of-Thought from Labeled Data", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Chain-of-thought (CoT) advances the reasoning abilities of large language models (LLMs) and achieves superior performance in complex reasoning tasks. However, most CoT studies rely on carefully designed human-annotated rational chains to prompt LLMs, posing challenges for real-world applications where labeled data is available without rational chains. This paper proposes a new strategy, AutomateCoT (Automatic Prompt Augmentation and Selection with Chain-of-Thought), that can bypass human engineering of CoT by automatically augmenting rational chains from a small labeled dataset, and then pruning low-quality chains to construct a candidate pool of machinegenerated rationale chains based on the labels. Finally, it selects the optimal combination of several rationale chains from the pool for CoT prompting by employing a variance-reduced policy gradient strategy to estimate the significance of each example. Automate-CoT enables a quick adaptation of the CoT technique to different tasks. Experimental results demonstrate the effectiveness of our method, where competitive results are achieved on arithmetic reasoning (+2.7%), commonsense reasoning (+3.4%), symbolic reasoning (+3.2%), and non-reasoning tasks (+2.5%).", "keywords": "large language models;chain-of-thought;prompt tuning;few-shot prompting", "primary_area": "", "supplementary_material": "", "author": "KaShun SHUM;Shizhe Diao;Tong Zhang", "authorids": "~KaShun_SHUM1;~Shizhe_Diao2;~Tong_Zhang2", "gender": "M;M;M", "homepage": "https://shumkashun.github.io;http://tongzhang-ml.org;https://shizhediao.github.io/", "dblp": "297/9971.html;07/4227-1;221/3896", "google_scholar": "JQakEawAAAAJ;LurWtuYAAAAJ;NDFQrLQAAAAJ", "or_profile": "~KaShun_SHUM1;~Tong_Zhang2;~SHIZHE_DIAO1", "aff": "Department of Computer Science and Engineering, Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": "cse.ust.hk;ust.hk;ust.hk", "position": "PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nshum2023automatic,\ntitle={Automatic Prompt Augmentation and Selection with Chain-of-Thought from Labeled Data},\nauthor={KaShun SHUM and Shizhe Diao and Tong Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=FGBEoz9WzI}\n}", "github": "", "project": "", "reviewers": "5T7Y;rCCa;1kzZ", "site": "https://openreview.net/forum?id=FGBEoz9WzI", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "4;3;3", "reproducibility": "4;3;3", "correctness": "4;4;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0006-5168-8345;0000-0002-5511-2558;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "Department of Computer Science and Engineering", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "FGBWDf7Z19", "title": "XLS-R fine-tuning on noisy word boundaries for unsupervised speech segmentation into words", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Due to the absence of explicit word boundaries in the speech stream, the task of segmenting spoken sentences into word units without text supervision is particularly challenging. In this work, we leverage the most recent self-supervised speech models that have proved to quickly adapt to new tasks through fine-tuning, even in low resource conditions. Taking inspiration from semi-supervised learning, we fine-tune an XLS-R model to predict word boundaries themselves produced by top-tier speech segmentation systems: DPDP, VG-HuBERT and DP-Parse. Once XLS-R is fine-tuned, it is used to infer new word boundary labels that are used in turn for another fine-tuning step. Our method consistently improves the performance of each system and set a new state-of-the-art that is, on average 130% higher than the previous one as measured by the F1 score on correctly discovered word tokens on five corpora featuring different languages. Finally, our system can segment speech from languages unseen during fine-tuning in a zero-shot fashion.", "keywords": "unsupervised speech segmentation into words;self-supervised Learning;self-training", "primary_area": "", "supplementary_material": "", "author": "Robin Jonathan Algayres;Pablo J. Diego Simon;Beno\u00eet Sagot;Emmanuel Dupoux", "authorids": "~Robin_Jonathan_Algayres1;~Pablo_J._Diego_Simon1;~Beno\u00eet_Sagot1;~Emmanuel_Dupoux1", "gender": "M;M;M;M", "homepage": ";https://www.linkedin.com/in/pablo-j-diego-sim%C3%B3n-b3475a212/;http://pauillac.inria.fr/~sagot/;http://www.lscp.net/persons/dupoux/", "dblp": "239/8581.html;;66/1016;41/8160", "google_scholar": "Rc1SZTIAAAAJ;;https://scholar.google.fr/citations?user=HXUT9ZkAAAAJ;https://scholar.google.fr/citations?user=94c1abIAAAAJ", "or_profile": "~Robin_Jonathan_Algayres1;~Pablo_J._Diego_Simon1;~Beno\u00eet_Sagot1;~Emmanuel_Dupoux1", "aff": "INRIA;ETHZ - ETH Zurich;Inria;EHESS", "aff_domain": "inria.fr;ethz.ch;inria.fr;ehess.fr", "position": "PhD student;MS student;Research Director;Full Professor", "bibtex": "@inproceedings{\nalgayres2023xlsr,\ntitle={{XLS}-R fine-tuning on noisy word boundaries for unsupervised speech segmentation into words},\nauthor={Robin Jonathan Algayres and Pablo J. Diego Simon and Beno{\\^\\i}t Sagot and Emmanuel Dupoux},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=FGBWDf7Z19}\n}", "github": "", "project": "", "reviewers": "j6sr;gXyg;oKq6", "site": "https://openreview.net/forum?id=FGBWDf7Z19", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;4", "excitement": "4;3;2", "reproducibility": "5;3;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-0107-8526;0000-0002-7814-2952", "linkedin": "robin-algayres/;;beno\u00eet-sagot-4731735/;emmanuel-dupoux-18034055/", "aff_unique_index": "0;1;0;2", "aff_unique_norm": "INRIA;ETH Zurich;Ecole des Hautes Etudes en Sciences Sociales", "aff_unique_dep": ";;", "aff_unique_url": "https://www.inria.fr;https://www.ethz.ch;https://www.ehess.fr", "aff_unique_abbr": "INRIA;ETHZ;EHESS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "France;Switzerland" }, { "id": "FKNtgr0qQy", "title": "Emergence of Abstract State Representations in Embodied Sequence Modeling", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Decision making via sequence modeling aims to mimic the success of language models, where actions taken by an embodied agent are modeled as tokens to predict. Despite their promising performance, it remains unclear if embodied sequence modeling leads to the emergence of internal representations that represent the environmental state information. A model that lacks abstract state representations would be liable to make decisions based on surface statistics which fail to generalize. We take the BabyAI environment, a grid world in which language-conditioned navigation tasks are performed, and build a sequence modeling Transformer, which takes a language instruction, a sequence of actions, and environmental observations as its inputs. In order to investigate the emergence of abstract state representations, we design a \"blindfolded\" navigation task, where only the initial environmental layout, the language instruction, and the action sequence to complete the task are available for training. Our probing results show that intermediate environmental layouts can be reasonably reconstructed from the internal activations of a trained model, and that language instructions play a role in the reconstruction accuracy. Our results suggest that many key features of state representations can emerge via embodied sequence modeling, supporting an optimistic outlook for applications of sequence modeling objectives to more complex embodied decision-making domains.", "keywords": "Interpretability and Analysis; Decision Making via Sequence Modeling; Language Grounding to Vision and Beyond", "primary_area": "", "supplementary_material": "", "author": "Tian Yun;Zilai Zeng;Kunal Handa;Ashish V Thapliyal;Bo Pang;Ellie Pavlick;Chen Sun", "authorids": "~Tian_Yun2;~Zilai_Zeng1;~Kunal_Handa1;~Ashish_V_Thapliyal1;~Bo_Pang3;~Ellie_Pavlick1;~Chen_Sun1", "gender": "M;M;;Not Specified;;F;M", "homepage": "https://tttyuntian.github.io/;https://zilaiz.github.io;https://kunhanda.github.io/;;https://sites.google.com/site/bopang42/;http://cs.brown.edu/people/epavlick/;https://chensun.me", "dblp": "33/303;306/6661;336/6747.html;42/4147;16/6344-1;141/4059;01/6072-2", "google_scholar": "https://scholar.google.com/citations?hl=en;nyqMsxQAAAAJ;scdcthMAAAAJ;1JtHXbAAAAAJ;qCdLtIoAAAAJ;sFyrSa8AAAAJ;vQa7heEAAAAJ", "or_profile": "~Tian_Yun2;~Zilai_Zeng1;~Kunal_Handa1;~Ashish_V_Thapliyal1;~Bo_Pang3;~Ellie_Pavlick1;~Chen_Sun1", "aff": "Brown University;Brown University;Brown University;Google;Google;Brown University;Google", "aff_domain": "brown.edu;brown.edu;brown.edu;google.com;google.com;brown.edu;google.com", "position": "PhD student;MS student;Undergrad student;Research Software Engineer;Researcher;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\nyun2023emergence,\ntitle={Emergence of Abstract State Representations in Embodied Sequence Modeling},\nauthor={Tian Yun and Zilai Zeng and Kunal Handa and Ashish V Thapliyal and Bo Pang and Ellie Pavlick and Chen Sun},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=FKNtgr0qQy}\n}", "github": "", "project": "", "reviewers": "GDPh;zk2R;89YP", "site": "https://openreview.net/forum?id=FKNtgr0qQy", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "4;4;4", "reproducibility": "3;4;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-1671-5484;;;0000-0002-7219-0515;;;", "linkedin": "tian-yun-83b385146/;;;;;;", "aff_unique_index": "0;0;0;1;1;0;1", "aff_unique_norm": "Brown University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.brown.edu;https://www.google.com", "aff_unique_abbr": "Brown;Google", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "FLSQjYmzIp", "title": "Language Guided Visual Question Answering: Elevate Your Multimodal Language Model Using Knowledge-Enriched Prompts", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Visual question answering (VQA) is the task of answering questions about an image. The task assumes an understanding of both the image and the question to provide a natural language answer. VQA has gained popularity in recent years due to its potential applications in a wide range of fields, including robotics, education, and healthcare. In this paper, we focus on knowledge-augmented VQA, where answering the question requires commonsense knowledge, world knowledge, and reasoning about ideas and concepts not present in the image. We propose a multimodal framework that uses language guidance (LG) in the form of rationales, image captions, scene graphs, etc to answer questions more accurately. We benchmark our method on the multi-choice question-answering task of the A-OKVQA, Science-QA, VSR, and IconQA datasets using CLIP and BLIP models. We show that the use of language guidance is a simple but powerful and effective strategy for visual question answering. Our language guidance improves the performance of CLIP by 7.6% and BLIP-2 by 4.8% in the challenging A-OKVQA dataset. We also observe consistent improvement in performance on the Science-QA, VSR, and IconQA datasets when using the proposed language guidances. The implementation of LG-VQA is publicly available at https://github.com/declare-lab/LG-VQA.", "keywords": "VQA;Multimodal Language Models;Question Answering", "primary_area": "", "supplementary_material": "", "author": "Deepanway Ghosal;Navonil Majumder;Roy Ka-Wei Lee;Rada Mihalcea;Soujanya Poria", "authorids": "~Deepanway_Ghosal1;~Navonil_Majumder1;~Roy_Ka-Wei_Lee1;~Rada_Mihalcea1;~Soujanya_Poria1", "gender": ";M;M;F;M", "homepage": ";;https://www.socialai.studio/team;https://web.eecs.umich.edu/~mihalcea/;https://soujanyaporia.github.io", "dblp": "203/9407;198/3608;139/2266;m/RadaMihalcea;116/4904", "google_scholar": "https://scholar.google.co.in/citations?user=95YiIWUAAAAJ;jPfEvuQAAAAJ;https://scholar.google.com.sg/citations?user=uQxdOlsAAAAJ;https://scholar.google.com.tw/citations?user=UetM7FgAAAAJ;https://scholar.google.co.in/citations?user=oS6gRc4AAAAJ", "or_profile": "~Deepanway_Ghosal1;~Navonil_Majumder1;~Roy_Ka-Wei_Lee1;~Rada_Mihalcea1;~Soujanya_Poria1", "aff": "Singapore University of Technology and Design;Singapore University of Technology and Design;Singapore University of Technology and Design;University of Michigan;Singapore University of Technology and Design", "aff_domain": "sutd.edu.sg;sutd.edu.sg;sutd.edu.sg;umich.edu;sutd.edu.sg", "position": "PhD student;Researcher;Assistant Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nghosal2023language,\ntitle={Language Guided Visual Question Answering: Elevate Your Multimodal Language Model Using Knowledge-Enriched Prompts},\nauthor={Deepanway Ghosal and Navonil Majumder and Roy Ka-Wei Lee and Rada Mihalcea and Soujanya Poria},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=FLSQjYmzIp}\n}", "github": "", "project": "", "reviewers": "WdmU;PMF9;ssgE", "site": "https://openreview.net/forum?id=FLSQjYmzIp", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "2;3;4", "reproducibility": "3;4;4", "correctness": "2;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-1986-7750;0000-0002-0767-6703;", "linkedin": ";;;;", "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Singapore University of Technology and Design;University of Michigan", "aff_unique_dep": ";", "aff_unique_url": "https://www.sutd.edu.sg;https://www.umich.edu", "aff_unique_abbr": "SUTD;UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Singapore;United States" }, { "id": "FMWVtVct0V", "title": "Towards Anytime Fine-tuning: Continually Pre-trained Language Models with Hypernetwork Prompts", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Continual pre-training has been urgent for adapting a pre-trained model to a multitude of domains and tasks in the fast-evolving world. In practice, a continually pre-trained model is expected to demonstrate not only greater capacity when fine-tuned on pre-trained domains but also a non-decreasing performance on unseen ones. In this work, we first investigate such anytime fine-tuning effectiveness of existing continual pre-training approaches, concluding with unanimously decreased performance on unseen domains. To this end, we propose a prompt-guided continual pre-training method, where we train a hypernetwork to generate domain-specific prompts by both agreement and disagreement losses. The agreement loss maximally preserves the generalization of a pre-trained model to new domains, and the disagreement one guards the exclusiveness of the generated hidden states for each domain. Remarkably, prompts by the hypernetwork alleviate the domain identity when fine-tuning and promote knowledge transfer across domains. Our method achieved improvements of 3.57\\% and 3.4\\% on two real-world datasets (including domain shift and temporal shift), respectively, demonstrating its efficacy.", "keywords": "Continual learning; Pre-trained language model; Prompt learning", "primary_area": "", "supplementary_material": "", "author": "Gangwei Jiang;Caigao JIANG;Siqiao Xue;James Y. Zhang;JUN ZHOU;Defu Lian;Ying Wei", "authorids": "~Gangwei_Jiang1;~Caigao_JIANG2;~Siqiao_Xue1;~James_Y._Zhang1;~JUN_ZHOU6;~Defu_Lian1;~Ying_Wei1", "gender": "M;M;M;M;M;M;F", "homepage": "https://gangwJiang.github.io;;https://www.antgroup.com/en;https://scholar.google.com/citations?user=Ywakh_sAAAAJ;https://scholar.google.com/citations?user=mCVvloEAAAAJ&hl=en;https://faculty.ustc.edu.cn/liandefu/en/index.htm;https://wei-ying.net/", "dblp": "286/8533;292/3817;302/7766;151/3086;99/3847-11;87/10734;14/4899-1", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;;pZqTpoEAAAAJ;Ywakh_sAAAAJ;mCVvloEAAAAJ;QW0ad4sAAAAJ;5UpFdKsAAAAJ", "or_profile": "~Gangwei_Jiang1;~Caigao_JIANG2;~Siqiao_Xue1;~James_Y._Zhang1;~JUN_ZHOU6;~Defu_Lian1;~Ying_Wei1", "aff": "University of Science and Technology of China;Alibaba Group;Alibaba;Ant Group;Ant Group;University of Science and Technology of China;City University of Hong Kong", "aff_domain": "ustc.edu.cn;alibaba-inc.com;alibaba-inc.com;alipay.com;antgroup.com;ustc.edu.cn;cityu.edu.hk", "position": "PhD student;Researcher;researcher;managing director;Researcher;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\njiang2023towards,\ntitle={Towards Anytime Fine-tuning: Continually Pre-trained Language Models with Hypernetwork Prompts},\nauthor={Gangwei Jiang and Caigao JIANG and Siqiao Xue and James Y. Zhang and JUN ZHOU and Defu Lian and Ying Wei},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=FMWVtVct0V}\n}", "github": "", "project": "", "reviewers": "JHzV;Q6RZ;YTnf;ke4a", "site": "https://openreview.net/forum?id=FMWVtVct0V", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "2;3;3;3", "excitement": "2;3;3;4", "reproducibility": "3;2;3;4", "correctness": "3;4;4;3", "rating_avg": 3.0, "confidence_avg": 2.75, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.5, "replies_avg": 13, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-6519-676X;0000-0001-6033-6102;0000-0002-3507-9607;", "linkedin": ";caigao-jiang-309710194;;jamesymzhang/;;;", "aff_unique_index": "0;1;2;3;3;0;4", "aff_unique_norm": "University of Science and Technology of China;Alibaba Group;Alibaba Group Holding Limited;Ant Group;City University of Hong Kong", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.alibaba.com;https://www.alibaba.com;https://www.antgroup.com;https://www.cityu.edu.hk", "aff_unique_abbr": "USTC;Alibaba;Alibaba;Ant Group;CityU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "FMwflM9yVJ", "title": "CONTRASTE: Supervised Contrastive Pre-training With Aspect-based Prompts For Aspect Sentiment Triplet Extraction", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Existing works on Aspect Sentiment Triplet Extraction (ASTE) explicitly focus on developing more efficient fine-tuning techniques for the task. Instead, our motivation is to come up with a generic approach that can improve the downstream performances of multiple ABSA tasks simultaneously. Towards this, we present CONTRASTE, a novel pre-training strategy using CONTRastive learning to enhance the ASTE performance. While we primarily focus on ASTE, we also demonstrate the advantage of our proposed technique on other ABSA tasks such as ACOS, TASD, and AESC. Given a sentence and its associated (aspect, opinion, sentiment) triplets, first, we design aspect-based prompts with corresponding sentiments masked. We then (pre)train an encoder-decoder model by applying contrastive learning on the decoder-generated aspect-aware sentiment representations of the masked terms. For fine-tuning the model weights thus obtained, we then propose a novel multi-task approach where the base encoder-decoder model is combined with two complementary modules, a tagging-based Opinion Term Detector, and a regression-based Triplet Count Estimator. Exhaustive experiments on four benchmark datasets and a detailed ablation study establish the importance of each of our proposed components as we achieve new state-of-the-art ASTE results.", "keywords": "supervised contrastive learning;pretraining;t5;encoder-decoder;generative;aste;acos;aesc;tasd;absa", "primary_area": "", "supplementary_material": "", "author": "Rajdeep Mukherjee;Nithish Kannen;Saurabh Kumar Pandey;Pawan Goyal", "authorids": "~Rajdeep_Mukherjee1;~Nithish_Kannen1;~Saurabh_Kumar_Pandey1;~Pawan_Goyal1", "gender": "M;M;M;M", "homepage": "https://rajdeep345.github.io/;https://nitkannen.github.io/;;http://cse.iitkgp.ac.in/~pawang/", "dblp": "124/3803;;;77/2307-2", "google_scholar": "https://scholar.google.com/citations?hl=en;nPQMsWMAAAAJ;gP9uqGYAAAAJ;https://scholar.google.com.tw/citations?user=F14FHsIAAAAJ", "or_profile": "~Rajdeep_Mukherjee1;~Nithish_Kannen1;~Saurabh_Kumar_Pandey1;~Pawan_Goyal1", "aff": "Indian Institute of Technology Kharagpur;Indian Institute of Technology, Kharagpur;;IIT Kharagpur", "aff_domain": "iitkgp.ac.in;iitkgp.ac.in;;cse.iitkgp.ac.in", "position": "PhD student;MS student;;Associate Professor", "bibtex": "@inproceedings{\nmukherjee2023contraste,\ntitle={{CONTRASTE}: Supervised Contrastive Pre-training With Aspect-based Prompts For Aspect Sentiment Triplet Extraction},\nauthor={Rajdeep Mukherjee and Nithish Kannen and Saurabh Kumar Pandey and Pawan Goyal},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=FMwflM9yVJ}\n}", "github": "", "project": "", "reviewers": "RHnr;8sQy;ZRNq", "site": "https://openreview.net/forum?id=FMwflM9yVJ", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;3;3", "reproducibility": "4;4;3", "correctness": "4;3;2", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-2267-1695;;0000-0003-1111-8816;", "linkedin": "rajdeepmukherjee89;nithish-kannen-7a7823177/;saurabh-kumar-pandey-b8936a15b/;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Indian Institute of Technology Kharagpur;Indian Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.iitkgp.ac.in;https://www.iitkgp.ac.in", "aff_unique_abbr": "IIT Kharagpur;IIT Kharagpur", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Kharagpur", "aff_country_unique_index": "0;0;0", "aff_country_unique": "India" }, { "id": "FRRlmKxuf2", "title": "Cue-CoT: Chain-of-thought Prompting for Responding to In-depth Dialogue Questions with LLMs", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large Language Models (LLMs), such as ChatGPT, greatly empower dialogue systems with strong language understanding and generation capabilities. However, most of the previous works prompt the LLMs to directly generate a response based on the dialogue context, overlooking the underlying linguistic cues about the user status exhibited in the context. Such in-depth dialogue scenarios are challenging for existing LLMs to figure out the user's hidden needs and respond satisfactorily through a single-step inference. To this end, we propose a novel linguistic cue-based chain-of-thoughts (Cue-CoT), which enhances the LLMs inference with an intermediate reasoning step to find cues exhibited in the dialogue, aiming to provide a more personalized and engaging response. To evaluate the approach, we build a benchmark with in-depth dialogue questions, consisting of 6 datasets in both Chinese and English, targeting 3 major linguistic cues during the conversation: personality, emotion, and psychology. We conducted experiments on the proposed benchmark with 5 LLMs under both zero-shot and one-shot settings. Empirical results demonstrate our proposed Cue-CoT method outperforms standard prompting methods in terms of both helpfulness and acceptability on all datasets.", "keywords": "chain-of-thoughts;in-context learning;personalized dialogue system;empathetic dialogue system;large language models", "primary_area": "", "supplementary_material": "", "author": "Hongru WANG;Rui Wang;Fei Mi;Yang Deng;Zezhong WANG;Bin Liang;Ruifeng Xu;Kam-Fai Wong", "authorids": "~Hongru_WANG1;~Rui_Wang30;~Fei_Mi1;~Yang_Deng4;~Zezhong_WANG1;~Bin_Liang6;~Ruifeng_Xu1;~Kam-Fai_Wong2", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://rulegreen.github.io/;;https://mifei.github.io/;https://dengyang17.github.io/;;https://binliang-nlp.github.io/;http://faculty.hitsz.edu.cn/xuruifeng;http://www.se.cuhk.edu.hk/~kfwong", "dblp": "72/1462-3;06/2293-92;161/0068;115/6282-2;217/9660.html;71/6053-4;93/5407-1;w/KamFaiWong", "google_scholar": "s6UtVYUAAAAJ;https://scholar.google.com/citations?view_op=list_works;gX3493QAAAAJ;https://scholar.google.com.hk/citations?user=OshWT3UAAAAJ;xfl6gcgAAAAJ;djpQeLEAAAAJ;mObXnNIAAAAJ;", "or_profile": "~Hongru_WANG1;~Rui_Wang30;~Fei_Mi1;~Yang_Deng4;~Zezhong_WANG1;~Bin_Liang6;~Ruifeng_Xu1;~Kam-Fai_Wong2", "aff": "University of Edinburgh;Harbin Institute of Technology;;The Chinese University of Hong Kong;The Chinese University of Hong Kong;The Chinese University of Hong Kong;Harbin Institute of Technology;The Chinese University of Hong Kong", "aff_domain": "ed.ac.uk;hit.edu.cn;;cuhk.edu.hk;cuhk.edu.hk;cuhk.edu.hk;hit.edu.cn;cuhk.edu.hk", "position": "Visiting Student;MS student;;PhD student;PhD student;Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwang2023cuecot,\ntitle={Cue-CoT: Chain-of-thought Prompting for Responding to In-depth Dialogue Questions with {LLM}s},\nauthor={Hongru WANG and Rui Wang and Fei Mi and Yang Deng and Zezhong WANG and Bin Liang and Ruifeng Xu and Kam-Fai Wong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=FRRlmKxuf2}\n}", "github": "", "project": "", "reviewers": "cvAj;U5Qt;qL3t", "site": "https://openreview.net/forum?id=FRRlmKxuf2", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "3;3;4", "reproducibility": "3;4;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-5027-0138;;;;0000-0003-4079-0097;0000-0001-7234-1347;0000-0002-4009-5679;0000-0002-9427-5659", "linkedin": ";;;;;;;", "aff_unique_index": "0;1;2;2;2;1;2", "aff_unique_norm": "University of Edinburgh;Harbin Institute of Technology;Chinese University of Hong Kong", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ed.ac.uk;http://www.hit.edu.cn/;https://www.cuhk.edu.hk", "aff_unique_abbr": "Edinburgh;HIT;CUHK", "aff_campus_unique_index": "1;2;2;2;1;2", "aff_campus_unique": ";Harbin;Hong Kong SAR", "aff_country_unique_index": "0;1;1;1;1;1;1", "aff_country_unique": "United Kingdom;China" }, { "id": "FS1a4CDZsP", "title": "PAC-tuning: Fine-tuning Pre-trained Language Models with PAC-driven Perturbed Gradient Descent", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Fine-tuning pretrained language models (PLMs) for downstream tasks is a large-scale optimization problem, in which the choice of the training algorithm critically determines how well the trained model can generalize to unseen test data, especially in the context of few-shot learning. To achieve good generalization performance and avoid overfitting, techniques such as data augmentation and pruning are often applied. However, adding these regularizations necessitates heavy tuning of the hyperparameters of optimization algorithms, such as the popular Adam optimizer. In this paper, we propose a two-stage fine-tuning method, PAC-tuning, to address this optimization challenge. First, based on PAC-Bayes training, PAC-tuning directly minimizes the PAC-Bayes generalization bound to learn proper parameter distribution. Second, PAC-tuning modifies the gradient by injecting noise with the variance learned in the first stage into the model parameters during training, resulting in a variant of perturbed gradient descent (PGD). In the past, the few-shot scenario posed difficulties for PAC-Bayes training because the PAC-Bayes bound, when applied to large models with limited training data, might not be stringent. Our experimental results across 5 GLUE benchmark tasks demonstrate that PAC-tuning successfully handles the challenges of fine-tuning tasks and outperforms strong baseline methods by a visible margin, further confirming the potential to apply PAC training for any other settings where the Adam optimizer is currently used for training.", "keywords": "language model;fine-tuning;pac-bayesian bound;perturbed gradient descent", "primary_area": "", "supplementary_material": "", "author": "Guangliang Liu;Zhiyu Xue;Xitong Zhang;Kristen Johnson;Rongrong Wang", "authorids": "~Guangliang_Liu2;~Zhiyu_Xue1;~Xitong_Zhang1;~Kristen_Johnson1;~Rongrong_Wang1", "gender": "M;M;M;F;", "homepage": ";https://chrisyxue.github.io/zyxue.github.com/;;;https://users.math.msu.edu/users/wangron6/", "dblp": ";271/7548;156/9687;185/1679;", "google_scholar": ";;Ci9svAcAAAAJ;iHeTIZEAAAAJ;", "or_profile": "~Guangliang_Liu2;~Zhiyu_Xue1;~Xitong_Zhang1;~Kristen_Johnson1;~Rongrong_Wang1", "aff": "Michigan State University;Michigan State University;Michigan State University;Michigan State University;Michigan State University", "aff_domain": "msu.edu;msu.edu;msu.edu;msu.edu;msu.edu", "position": "PhD student;MS student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nliu2023pactuning,\ntitle={{PAC}-tuning: Fine-tuning Pre-trained Language Models with {PAC}-driven Perturbed Gradient Descent},\nauthor={Guangliang Liu and Zhiyu Xue and Xitong Zhang and Kristen Johnson and Rongrong Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=FS1a4CDZsP}\n}", "github": "", "project": "", "reviewers": "uBJv;6Piy;msWo;NWbh", "site": "https://openreview.net/forum?id=FS1a4CDZsP", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "2;4;3;4", "excitement": "3;4;4;4", "reproducibility": "2;4;3;4", "correctness": "3;4;4;3", "rating_avg": 4.0, "confidence_avg": 3.25, "excitement_avg": 3.75, "reproducibility_avg": 3.25, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;xitong-zhang-70118915a/;kristenmariejohns/;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Michigan State University", "aff_unique_dep": "", "aff_unique_url": "https://www.msu.edu", "aff_unique_abbr": "MSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "FTiXh63BVO", "title": "Uniform Complexity for Text Generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models (LLMs) have shown promising results in a wide array of generative NLP tasks, such as summarization and machine translation. In the context of narrative generation, however, existing models still do not capture factors that contribute to producing consistent text. For instance, it is logical that a piece of text or a story should be uniformly readable throughout and that this form of complexity should be controllable. As such, if the complexity of an input text prompt is rated first-grade reading level in the Flesch Reading Ease test, then the generated text continuing the plot should also be within this range of complexity. With this in mind, we introduce Uniform Complexity for Text Generation (UCTG), a new benchmark test which raises the challenge of making generative models observe uniform linguistic properties with respect to prompts. We experiment with over 150+ linguistically and cognitively motivated features for evaluating text complexity in humans and generative models. From our results, we find that models such as GPT-2 struggle to preserve the complexity of input prompts used in its generations, even if finetuned with professionally written texts.", "keywords": "text complexity;natural language generation;evaluation;narrative generation", "primary_area": "", "supplementary_material": "", "author": "Joseph Marvin Imperial;Harish Tayyar Madabushi", "authorids": "~Joseph_Marvin_Imperial1;~Harish_Tayyar_Madabushi1", "gender": "M;M", "homepage": "https://www.josephimperial.com;https://www.harishtayyarmadabushi.com/", "dblp": "246/4647;186/7335", "google_scholar": "irs_5ekAAAAJ;EHOS_5QAAAAJ", "or_profile": "~Joseph_Marvin_Imperial1;~Harish_Tayyar_Madabushi1", "aff": "University of Bath;University of Bath", "aff_domain": "bath.ac.uk;bath.ac.uk", "position": "PhD student;Lecturer", "bibtex": "@inproceedings{\nimperial2023uniform,\ntitle={Uniform Complexity for Text Generation},\nauthor={Joseph Marvin Imperial and Harish Tayyar Madabushi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=FTiXh63BVO}\n}", "github": "", "project": "", "reviewers": "udN3;bS2i;kTEu", "site": "https://openreview.net/forum?id=FTiXh63BVO", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "4;3;2", "reproducibility": "5;3;5", "correctness": "4;2;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-1073-6129;0000-0001-5260-3653", "linkedin": "joseph-marvin-imperial-9382b9a7/;harishtayyarmadabushi/", "aff_unique_index": "0;0", "aff_unique_norm": "University of Bath", "aff_unique_dep": "", "aff_unique_url": "https://www.bath.ac.uk", "aff_unique_abbr": "Bath", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "FXObwPWgUc", "title": "Leveraging GPT-4 for Automatic Translation Post-Editing", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "While Neural Machine Translation (NMT) represents the leading approach to Machine Translation (MT), the outputs of NMT models still require translation post-editing to rectify errors and enhance quality under critical settings. In this work, we formalize the task of direct translation post-editing with Large Language Models (LLMs) and explore the use of GPT-4 to automatically post-edit NMT outputs across several language pairs. Our results demonstrate that GPT-4 is adept at translation post-editing, producing meaningful and trustworthy edits to translations that help improve its general quality as well as remove different classes of major errors in translations. In particular, human evaluations on assessing edit trustworthiness show that GPT-4 exhibits a large improvement over the prior state-of-the-art LLM. Notably, we improve upon state-of-the-art performance on WMT-22 English-Chinese, English-German, Chinese-English and German-English language pairs using GPT-4 based post-editing, as evaluated by state-of-the-art MT quality metrics. However, we also show that GPT-4 could produce hallucinated edits, thereby urging caution in its use as an expert translation post-editor.", "keywords": "automatic post editing;neural machine translation;large language models;application", "primary_area": "", "supplementary_material": "", "author": "Vikas Raunak;Amr Sharaf;Yiren Wang;Hany Hassan Awadalla;Arul Menezes", "authorids": "~Vikas_Raunak2;~Amr_Sharaf1;~Yiren_Wang1;~Hany_Hassan_Awadalla1;~Arul_Menezes1", "gender": "M;M;Unspecified;M;M", "homepage": "https://vyraun.github.io/;http://cs.umd.edu/~amr;https://publish.illinois.edu/yirenwang/;https://www.linkedin.com/in/arulmenezes;", "dblp": "205/2388;159/1156;;89/2869;83/64", "google_scholar": "25Tjnq4AAAAJ;It3Gm1EAAAAJ;wd3FbFMAAAAJ;DnhOg3YAAAAJ;", "or_profile": "~Vikas_Raunak2;~Amr_Sharaf1;~Yiren_Wang1;~Arul_Menezes1;~Hany_Hassan1", "aff": "Microsoft;Microsoft;University of Illinois, Urbana Champaign;Microsoft Research;Microsoft", "aff_domain": "microsoft.com;microsoft.com;illinois.edu;research.microsoft.com;microsoft.com", "position": "Researcher;Researcher;PhD student;Distinguished Engineer;Research Scientist", "bibtex": "@inproceedings{\nraunak2023leveraging,\ntitle={Leveraging {GPT}-4 for Automatic Translation Post-Editing},\nauthor={Vikas Raunak and Amr Sharaf and Yiren Wang and Hany Hassan Awadalla and Arul Menezes},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=FXObwPWgUc}\n}", "github": "", "project": "", "reviewers": "NRkU;Cpb4;aKRm", "site": "https://openreview.net/forum?id=FXObwPWgUc", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;4", "excitement": "4;4;2", "reproducibility": "2;4;2", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "vraunak;amrsharaf/;;arulmenezes;", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Microsoft;University of Illinois Urbana-Champaign", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://illinois.edu", "aff_unique_abbr": "Microsoft;UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Faxkz2V56o", "title": "Noisy Self-Training with Synthetic Queries for Dense Retrieval", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Although existing neural retrieval models reveal promising results when training data is abundant and the performance keeps improving as training data increases, collecting high-quality annotated data is prohibitively costly. To this end, we introduce a novel noisy self-training framework combined with synthetic queries, showing that neural retrievers can be improved in a self-evolution manner with no reliance on any external models. Experimental results show that our method improves consistently over existing methods on both general-domain (e.g., MS-MARCO) and out-of-domain (i.e., BEIR) retrieval benchmarks. Extra analysis on low-resource settings reveals that our method is data efficient and outperforms competitive baselines, with as little as 30\\% of labelled training data. Further extending the framework for reranker training demonstrates that the proposed method is general and yields additional gains on tasks of diverse domains.\\footnote{Source code is available at \\url{https://github.com/Fantabulous-J/Self-Training-DPR}}", "keywords": "dense retrieval;self training;synthetic queries", "primary_area": "", "supplementary_material": "", "author": "Fan Jiang;Tom Drummond;Trevor Cohn", "authorids": "~Fan_Jiang2;~Tom_Drummond1;~Trevor_Cohn1", "gender": ";M;M", "homepage": ";;https://people.eng.unimelb.edu.au/tcohn/", "dblp": ";50/1633;66/4613", "google_scholar": ";https://scholar.google.com.au/citations?user=6sWGL5wAAAAJ;https://scholar.google.com.au/citations?user=FCom398AAAAJ", "or_profile": "~Fan_Jiang2;~Tom_Drummond1;~Trevor_Cohn1", "aff": ";University of Melbourne;The University of Melbourne", "aff_domain": ";unimelb.edu.au;unimelb.edu.au", "position": ";Full Professor;Professor", "bibtex": "@inproceedings{\njiang2023noisy,\ntitle={Noisy Self-Training with Synthetic Queries for Dense Retrieval},\nauthor={Fan Jiang and Tom Drummond and Trevor Cohn},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Faxkz2V56o}\n}", "github": "", "project": "", "reviewers": "ugd9;uzg5;E4NL", "site": "https://openreview.net/forum?id=Faxkz2V56o", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "3;3;3", "reproducibility": "4;4;3", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-8204-5904;", "linkedin": ";;", "aff_unique_index": "0;0", "aff_unique_norm": "University of Melbourne", "aff_unique_dep": "", "aff_unique_url": "https://www.unimelb.edu.au", "aff_unique_abbr": "UniMelb", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Australia" }, { "id": "FgEM735i5M", "title": "Scene Graph Enhanced Pseudo-Labeling for Referring Expression Comprehension", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Referring Expression Comprehension (ReC) is a task that involves localizing objects in images based on natural language expressions. Most ReC methods typically approach the task as a supervised learning problem. However, the need for costly annotations, such as clear image-text pairs or region-text pairs, hinders the scalability of existing approaches. \nIn this work, we propose a novel scene graph-based framework that automatically generates high-quality pseudo region-query pairs. Our method harnesses scene graphs to capture the relationships between objects in images and generate expressions enriched with relation information. To ensure accurate mapping between visual regions and text, we introduce an external module that employs a calibration algorithm to filter out ambiguous queries. Additionally, we employ a rewriter module to enhance the diversity of our generated pseudo queries through rewriting. \nExtensive experiments demonstrate that our method outperforms previous pseudo-labeling methods by about 10%, 12%, and 11% on RefCOCO, RefCOCO+, and RefCOCOg, respectively. Furthermore, it surpasses the state-of-the-art unsupervised approach by more than 15% on the RefCOCO dataset.", "keywords": "Referring Expression Comprehension;Visual Grounding", "primary_area": "", "supplementary_material": "", "author": "Cantao Wu;Yi Cai;Liuwu Li;Jiexin Wang", "authorids": "~Cantao_Wu1;~Yi_Cai1;~Liuwu_Li1;~Jiexin_Wang1", "gender": "M;M;;M", "homepage": "https://taotaotao0412.github.io/cantaoW.github.io/;http://www2.scut.edu.cn/sse/2018/0615/c16788a270751/page.htm;https://github.com/itrues;", "dblp": ";58/3467-1.html;;", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;", "or_profile": "~Cantao_Wu1;~Yi_Cai1;~Liuwu_Li1;~Jiexin_Wang1", "aff": "South China University of Technology;South China University of Technology;South China University of Technology;South China University of Technology", "aff_domain": "scut.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn", "position": "MS student;Full Professor;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwu2023scene,\ntitle={Scene Graph Enhanced Pseudo-Labeling for Referring Expression Comprehension},\nauthor={Cantao Wu and Yi Cai and Liuwu Li and Jiexin Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=FgEM735i5M}\n}", "github": "", "project": "", "reviewers": "2nSK;G4b6;3Jqz", "site": "https://openreview.net/forum?id=FgEM735i5M", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;5;4", "excitement": "4;4;3", "reproducibility": "4;3;4", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-1767-789X;;0000-0002-7064-6507", "linkedin": ";;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "South China University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.scut.edu.cn", "aff_unique_abbr": "SCUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "FghDWBBsIm", "title": "Target-to-Source Augmentation for Aspect Sentiment Triplet Extraction", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Aspect Sentiment Triplet Extraction (ASTE) is an important task in sentiment analysis, aiming to extract aspect-level opinions and sentiments from user-generated reviews. \nThe fine-grained nature of ASTE incurs a high annotation cost, while the scarcity of annotated data limits the performance of existing methods.\nThis paper exploits data augmentation to address this issue.\nTraditional augmentation methods typically modify the input sentences of existing samples via heuristic rules or language models, which have shown success in text classification tasks. \nHowever, applying these methods to fine-grained tasks like ASTE poses challenges in generating diverse augmented samples while maintaining alignment between modified sentences and origin labels.\nTherefore, this paper proposes a target-to-source augmentation approach for ASTE.\nOur approach focuses on learning a generator that can directly generate new sentences based on labels and syntactic templates. With this generator, we can generate a substantial number of diverse augmented samples by mixing labels and syntactic templates from different samples.\nBesides, to ensure the quality of the generated sentence, we introduce fluency and alignment discriminators to provide feedback on the generated sentence and then use this feedback to optimize the generator via a reinforcement learning framework.\nExperiments demonstrate that our approach significantly enhances the performance of existing ASTE models.", "keywords": "Aspect-Based Sentiment Analysis;Data Augmentation;Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Yice Zhang;yifan yang;Meng Li;Bin Liang;Shiwei Chen;Ruifeng Xu", "authorids": "~Yice_Zhang1;~yifan_yang7;~Meng_Li15;~Bin_Liang6;~Shiwei_Chen1;~Ruifeng_Xu1", "gender": "M;M;F;M;M;M", "homepage": ";https://github.com/yyf12047;https://mimas.top;https://binliang-nlp.github.io/;http://www.hitsz-hlt.com/chengyuanjieshao/;http://faculty.hitsz.edu.cn/xuruifeng", "dblp": "225/4508;;;71/6053-4;;93/5407-1", "google_scholar": "a4akjpYAAAAJ;;;djpQeLEAAAAJ;;mObXnNIAAAAJ", "or_profile": "~Yice_Zhang1;~yifan_yang7;~Meng_Li15;~Bin_Liang6;~Shiwei_Chen1;~Ruifeng_Xu1", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology(Shenzhen);The Chinese University of Hong Kong;Harbin Institute of Technology;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;hit.edu.cn;hitsz.edu.cn;cuhk.edu.hk;hit.edu.cn;hit.edu.cn", "position": "PhD student;MS student;Undergrad student;Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\nzhang2023targettosource,\ntitle={Target-to-Source Augmentation for Aspect Sentiment Triplet Extraction},\nauthor={Yice Zhang and yifan yang and Meng Li and Bin Liang and Shiwei Chen and Ruifeng Xu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=FghDWBBsIm}\n}", "github": "", "project": "", "reviewers": "NQZC;Xree;DtPr", "site": "https://openreview.net/forum?id=FghDWBBsIm", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;3", "excitement": "3;4;4", "reproducibility": "3;4;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-7234-1347;;0000-0002-4009-5679", "linkedin": ";;;;;", "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Harbin Institute of Technology;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "http://www.hit.edu.cn/;https://www.cuhk.edu.hk", "aff_unique_abbr": "HIT;CUHK", "aff_campus_unique_index": "0;0;1;2;0;0", "aff_campus_unique": "Harbin;Shenzhen;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "Fj07R03qkz", "title": "IAEval: A Comprehensive Evaluation of Instance Attribution on Natural Language Understanding", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Instance attribution (IA) aims to identify the training instances leading to the prediction of a test example, helping researchers understand the dataset better and optimize data processing. While many IA methods have been proposed recently, how to evaluate them still remains open. Previous evaluations of IA only focus on one or two dimensions and are not comprehensive. In this work, we introduce IAEval for IA methods, a systematic and comprehensive evaluation scheme covering four significant requirements: sufficiency, completeness, stability and plausibility. We elaborately design novel metrics to measure these requirements for the first time. Three representative IA methods are evaluated under IAEval on four natural language understanding datasets. Extensive experiments confirmed the effectiveness of IAEval and exhibited its ability to provide comprehensive comparison among IA methods. With IAEval, researchers can choose the most suitable IA methods for applications like model debugging.", "keywords": "evaluation; instance attribution", "primary_area": "", "supplementary_material": "", "author": "Peijian Gu;Yaozong Shen;Lijie Wang;Quan Wang;Hua Wu;Zhendong Mao", "authorids": "~Peijian_Gu1;~Yaozong_Shen1;~Lijie_Wang2;~Quan_Wang7;~Hua_Wu4;~Zhendong_Mao1", "gender": "M;M;;F;;", "homepage": "https://github.com/GuPeijian;http://linkedin.com/in/yaozong-shen-a50570150;;;;", "dblp": ";;;;;", "google_scholar": ";;;l2yEbhAAAAAJ;;", "or_profile": "~Peijian_Gu1;~Yaozong_Shen1;~Lijie_Wang2;~Quan_Wang7;~Hua_Wu4;~Zhendong_Mao1", "aff": "Baidu;Baidu;;Beijing University of Posts and Telecommunications;;", "aff_domain": "baidu.com;baidu.com;;bupt.edu.cn;;", "position": "Intern;Researcher;;Associate Professor;;", "bibtex": "@inproceedings{\ngu2023iaeval,\ntitle={{IAE}val: A Comprehensive Evaluation of Instance Attribution on Natural Language Understanding},\nauthor={Peijian Gu and Yaozong Shen and Lijie Wang and Quan Wang and Hua Wu and Zhendong Mao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Fj07R03qkz}\n}", "github": "", "project": "", "reviewers": "wyYh;sPvk;VKZW", "site": "https://openreview.net/forum?id=Fj07R03qkz", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "4;4;3", "reproducibility": "4;3;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";http://linkedin.com/in/yaozong-shen-a50570150;;;;", "aff_unique_index": "0;0;1", "aff_unique_norm": "Baidu;Beijing University of Posts and Telecommunications", "aff_unique_dep": "Baidu, Inc.;", "aff_unique_url": "https://www.baidu.com;http://www.bupt.edu.cn/", "aff_unique_abbr": "Baidu;BUPT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "Fm0Brp3cTS", "title": "UPTON: Preventing Authorship Leakage from Public Text Release via Data Poisoning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Consider a scenario where an author (e.g., activist, whistle-blower) with many public writings wishes to write \u201canonymously\" when attackers may have already built an authorship attribution (AA) model based off of public writings including those of the author. To enable her wish, we ask a question \u201ccan one make the publicly released writings, T , unattributable so that AA models trained on T cannot attribute its authorship well?\" Toward this question, we present a novel solution, UPTON, that exploits black-box data poisoning methods to weaken the authorship features in training samples and make released texts unlearnable. It is different from previous obfuscation works (e.g., adversarial attacks that modify test samples or backdoor works that only change the model outputs when triggering words occur). Using four authorship datasets (IMDb10, IMDb64, Enron and WJO), we present empirical validation where UPTON successfully downgrades the accuracy of AA models to the impractical level (e.g., \u223c 35%) while keeping texts still readable (e.g., > 0.9 in BERTScore). UPTON remains effective to AA models that are already trained on available clean writings of authors.", "keywords": "Authorship Attribution", "primary_area": "", "supplementary_material": "", "author": "Ziyao Wang;Thai Le;Dongwon Lee", "authorids": "~Ziyao_Wang2;~Thai_Le1;~Dongwon_Lee1", "gender": "M;;M", "homepage": "https://ziyaow-about.netlify.app;https://lethaiq.github.io/tql3/;https://pike.psu.edu/dongwon", "dblp": ";03/9889;l/DongwonLee", "google_scholar": "_PdzpfAAAAAJ;Fd8K7kAAAAAJ;MzL-WnEAAAAJ", "or_profile": "~Ziyao_Wang2;~Thai_Le1;~Dongwon_Lee1", "aff": "Wuhan University;University of Mississippi;The Pennsylvania State University", "aff_domain": "whu.edu.cn;olemiss.edu;psu.edu", "position": "Undergrad student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nwang2023upton,\ntitle={{UPTON}: Preventing Authorship Leakage from Public Text Release via Data Poisoning},\nauthor={Ziyao Wang and Thai Le and Dongwon Lee},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Fm0Brp3cTS}\n}", "github": "", "project": "", "reviewers": "6F8X;cjjY;MCWh", "site": "https://openreview.net/forum?id=Fm0Brp3cTS", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;1;5", "excitement": "5;3;2", "reproducibility": "5;4;3", "correctness": "5;3;2", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-9632-6870;0000-0001-8371-7629", "linkedin": "https://www.linkedin.cn/injobs/in/ziyao-wang-370229234;;", "aff_unique_index": "0;1;2", "aff_unique_norm": "Wuhan University;University of Mississippi;Pennsylvania State University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.whu.edu.cn/;https://www.olemiss.edu;https://www.psu.edu", "aff_unique_abbr": "WHU;UM;PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "id": "Fqv0rgvkol", "title": "Paraphrase Types for Generation and Detection", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Current approaches in paraphrase generation and detection heavily rely on a single general similarity score, ignoring the intricate linguistic properties of language. This paper introduces two new tasks to address this shortcoming by considering paraphrase types - specific linguistic perturbations at particular text positions. We name these tasks Paraphrase Type Generation and Paraphrase Type Detection. Our results suggest that while current techniques perform well in a binary classification scenario, i.e., paraphrased or not, the inclusion of fine-grained paraphrase types poses a significant challenge. While most approaches are good at generating and detecting general semantic similar content, they fail to understand the intrinsic linguistic variables they manipulate. Models trained in generating and identifying paraphrase types also show improvements in tasks without them. In addition, scaling these models further improves their ability to understand paraphrase types. We believe paraphrase types can unlock a new paradigm for developing paraphrase models and solving tasks in the future.", "keywords": "paraphrase generation and detection;paraphrase types;paraphrasing tasks", "primary_area": "", "supplementary_material": "", "author": "Jan Philip Wahle;Bela Gipp;Terry Ruas", "authorids": "~Jan_Philip_Wahle1;~Bela_Gipp1;~Terry_Ruas1", "gender": "M;M;", "homepage": "https://jpwahle.com;https://gipplab.org/team/prof-dr-bela-gipp/;", "dblp": "288/1075.html;12/6082;", "google_scholar": "MI0C9mAAAAAJ;No2ot2YAAAAJ;", "or_profile": "~Jan_Philip_Wahle1;~Bela_Gipp1;~Terry_Ruas1", "aff": "University of G\u00f6ttingen, Germany;Georg-August Universit\u00e4t G\u00f6ttingen;", "aff_domain": "uni-goettingen.de;uni-goettingen.de;", "position": "PhD student;Full Professor;", "bibtex": "@inproceedings{\nwahle2023paraphrase,\ntitle={Paraphrase Types for Generation and Detection},\nauthor={Jan Philip Wahle and Bela Gipp and Terry Ruas},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Fqv0rgvkol}\n}", "github": "", "project": "", "reviewers": "qoRy;ow9w;aya4", "site": "https://openreview.net/forum?id=Fqv0rgvkol", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;5;3", "excitement": "2;4;3", "reproducibility": "4;5;3", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-2116-9767;0000-0001-6522-3019;", "linkedin": "https://linkedin.com/in/jan-philip-wahle/;;", "aff_unique_index": "0;1", "aff_unique_norm": "University of G\u00f6ttingen;Georg-August Universit\u00e4t G\u00f6ttingen", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-goettingen.de;https://www.uni-goettingen.de", "aff_unique_abbr": "Georg-August-Universit\u00e4t;GAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "G0ZGGpSj7i", "title": "Defining a New NLP Playground", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The recent explosion of performance of large language models (LLMs) has changed the field of Natural Language Processing (NLP) more abruptly and seismically than any other shift in the field's 80 year history. This has resulted in concerns that the field will become homogenized and resource-intensive. This new status quo has put many academic researchers, especially PhD students, at a disadvantage. This paper aims to define a new NLP playground by proposing 20+ PhD-dissertation-worthy research directions, covering theoretical analysis, new and challenging problems, learning paradigms and interdisciplinary applications.", "keywords": "position paper; theme track; large language models", "primary_area": "", "supplementary_material": "", "author": "Sha Li;Chi Han;Pengfei Yu;Carl Edwards;Manling Li;Xingyao Wang;Yi Fung;Charles Yu;Joel R. Tetreault;Eduard Hovy;Heng Ji", "authorids": "~Sha_Li1;~Chi_Han1;~Pengfei_Yu1;~Carl_Edwards1;~Manling_Li1;~Xingyao_Wang1;~Yi_Fung1;~Charles_Yu1;~Joel_R._Tetreault2;~Eduard_Hovy1;~Heng_Ji3", "gender": "F;M;M;M;F;M;F;;F;M;", "homepage": ";https://glaciohound.github.io;;https://cnedwards.com/;https://limanling.github.io/;https://xwang.dev;https://mayrfung.github.io;;http://blender.cs.illinois.edu/hengji.html;http://www.cs.cmu.edu/~hovy;https://www.cs.rochester.edu/~tetreaul/academic.html", "dblp": ";255/6993;;300/1001;178/3620;264/9892;223/2782-1.html;265/1258.html;;47/2454;40/4518", "google_scholar": "OIo8J2YAAAAJ;https://scholar.google.com.sg/citations?user=DcSvbuAAAAAJ;bFbykBYAAAAJ;https://scholar.google.com/citations?hl=en;6U4SXnUAAAAJ;F7qq3YcAAAAJ;eUae2K0AAAAJ;;z7GCqT4AAAAJ;https://scholar.google.com.tw/citations?user=PUFxrroAAAAJ;Fn52EXUAAAAJ", "or_profile": "~Sha_Li1;~Chi_Han1;~Pengfei_Yu1;~Carl_Edwards1;~Manling_Li1;~Xingyao_Wang1;~Yi_Fung1;~Charles_Yu1;~Heng_Ji3;~Eduard_H_Hovy1;~Joel_R_Tetreault1", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;Research, Google;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana-Champaign;Carnegie Mellon University;Dataminr", "aff_domain": "illinois.edu;illinois.edu;illinois.edu;illinois.edu;illinois.edu;research.google.com;illinois.edu;illinois.edu;uiuc.edu;cmu.edu;dataminr.com", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;Intern;PhD student;PhD student;Full Professor;Adjunct Professor;Principal Researcher", "bibtex": "@inproceedings{\nli2023defining,\ntitle={Defining a New {NLP} Playground},\nauthor={Sha Li and Chi Han and Pengfei Yu and Carl Edwards and Manling Li and Xingyao Wang and Yi Fung and Charles Yu and Joel R. Tetreault and Eduard Hovy and Heng Ji},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=G0ZGGpSj7i}\n}", "github": "", "project": "", "reviewers": "52jY;AhjZ;R4dS", "site": "https://openreview.net/forum?id=G0ZGGpSj7i", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "5;3;4", "reproducibility": "", "correctness": "5;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 0, "correctness_avg": 4.333333333333333, "replies_avg": 10, "authors#_avg": 11, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-6235-5841;;;;0000-0002-3483-8624;;;;;", "linkedin": ";chi-han-b01a93141/;;carl-edwards-70a90592;;;;charles-t-yu/;;;joel-tetreault-67234512", "aff_unique_index": "0;0;0;0;0;1;0;0;2;3;4", "aff_unique_norm": "University of Illinois Urbana-Champaign;Google;University of Illinois;Carnegie Mellon University;Dataminr", "aff_unique_dep": ";Google Research;;;", "aff_unique_url": "https://illinois.edu;https://research.google;https://illinois.edu;https://www.cmu.edu;https://www.dataminr.com", "aff_unique_abbr": "UIUC;Google;UIUC;CMU;Dataminr", "aff_campus_unique_index": "0;0;0;0;0;1;0;0;0", "aff_campus_unique": "Urbana-Champaign;Mountain View;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "G12y1Pz3vJ", "title": "Improving Unsupervised Relation Extraction by Augmenting Diverse Sentence Pairs", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Unsupervised relation extraction (URE) aims to extract relations between named entities from raw text without requiring manual annotations or pre-existing knowledge bases. \nIn recent studies of URE, researchers put a notable emphasis on contrastive learning strategies for acquiring relation representations. However, these studies often overlook two important aspects: the inclusion of diverse positive pairs for contrastive learning and the exploration of appropriate loss functions. \nIn this paper, we propose AugURE with both within-sentence pairs augmentation and augmentation through cross-sentence pairs extraction to increase the diversity of positive pairs and strengthen the discriminative power of contrastive learning. We also identify the limitation of noise-contrastive estimation (NCE) loss for relation representation learning and propose to apply margin loss for sentence pairs. Experiments on NYT-FB and TACRED datasets demonstrate that the proposed relation representation learning and a simple K-Means clustering achieves state-of-the-art performance.", "keywords": "unsupervised relation extraction;relation representation learning;contrastive learning", "primary_area": "", "supplementary_material": "", "author": "Qing Wang;Kang Zhou;Qiao Qiao;Yuepei Li;Qi Li", "authorids": "~Qing_Wang15;~Kang_Zhou2;~Qiao_Qiao1;~Yuepei_Li1;~Qi_Li14", "gender": "F;M;;M;F", "homepage": "https://www.cs.iastate.edu/people/qing-wang;https://sites.google.com/iastate.edu/kangzhou/home;;https://www.sites.google.com/view/yuepeili;https://sites.google.com/iastate.edu/qili/", "dblp": ";;;199/8882;181/2688-12", "google_scholar": "jY7bx4gAAAAJ;pNhbQq8AAAAJ;fGhPJ9IAAAAJ;or3srI0AAAAJ;Gvld0foAAAAJ", "or_profile": "~Qing_Wang15;~Kang_Zhou2;~Qiao_Qiao1;~Yuepei_Li1;~Qi_Li14", "aff": "Iowa State University;Iowa State University;Iowa State University;Iowa State University;Iowa State University", "aff_domain": "iastate.edu;iastate.edu;iastate.edu;iastate.edu;iastate.edu", "position": "PhD student;PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwang2023improving,\ntitle={Improving Unsupervised Relation Extraction by Augmenting Diverse Sentence Pairs},\nauthor={Qing Wang and Kang Zhou and Qiao Qiao and Yuepei Li and Qi Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=G12y1Pz3vJ}\n}", "github": "", "project": "", "reviewers": "TtUf;PHta;Ydii", "site": "https://openreview.net/forum?id=G12y1Pz3vJ", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;3;3", "reproducibility": "3;4;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-3136-2157", "linkedin": ";kang-zhou-5054a4142/;;yuepei-li-870037210/;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Iowa State University", "aff_unique_dep": "", "aff_unique_url": "https://www.iastate.edu", "aff_unique_abbr": "ISU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "G13P9iWzKc", "title": "When Language Models Fall in Love: Animacy Processing in Transformer Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Animacy\u2014whether an entity is alive and sentient\u2014is fundamental to cognitive processing, impacting areas such as memory, vision, and language. However, animacy is not always expressed directly in language: in English it often manifests indirectly, in the form of selectional constraints on verbs and adjectives. This poses a potential issue for transformer language models (LMs): they often train only on text, and thus lack access to extralinguistic information from which humans learn about animacy. We ask: how does this impact LMs' animacy processing\u2014do they still behave as humans do? We answer this question using open-source LMs. Like previous studies, we find that LMs behave much like humans when presented with entities whose animacy is typical. However, we also show that even when presented with stories about atypically animate entities, such as a peanut in love, LMs adapt: they treat these entities as animate, though they do not adapt as well as humans. Even when the context indicating atypical animacy is very short, LMs pick up on subtle clues and change their behavior. We conclude that despite the limited signal through which LMs can learn about animacy, they are indeed sensitive to the relevant lexical semantic nuances available in English.", "keywords": "animacy;language models;selectional constraints;semantics;discourse context", "primary_area": "", "supplementary_material": "", "author": "Michael Hanna;Yonatan Belinkov;Sandro Pezzelle", "authorids": "~Michael_Hanna1;~Yonatan_Belinkov1;~Sandro_Pezzelle1", "gender": "M;M;M", "homepage": "http://hannamw.github.io;https://www.belinkov.com;https://sandropezzelle.github.io/", "dblp": "306/9666;136/8705;182/2260", "google_scholar": "0wOdTeYAAAAJ;https://scholar.google.com/citations?authorid=K-6ujU4AAAAJ;https://scholar.google.it/citations?user=PW6eQ6YAAAAJ", "or_profile": "~Michael_Hanna1;~Yonatan_Belinkov1;~Sandro_Pezzelle1", "aff": "University of Amsterdam;Technion, Technion;University of Amsterdam", "aff_domain": "uva.nl;technion.ac.il;uva.nl", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nhanna2023when,\ntitle={When Language Models Fall in Love: Animacy Processing in Transformer Language Models},\nauthor={Michael Hanna and Yonatan Belinkov and Sandro Pezzelle},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=G13P9iWzKc}\n}", "github": "", "project": "", "reviewers": "QnBL;w2ux;9t4f;jHyH", "site": "https://openreview.net/forum?id=G13P9iWzKc", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "4;4;4;3", "excitement": "5;4;4;4", "reproducibility": "5;4;4;4", "correctness": "4;4;4;4", "rating_avg": 5.0, "confidence_avg": 3.75, "excitement_avg": 4.25, "reproducibility_avg": 4.25, "correctness_avg": 4.0, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-3969-7445", "linkedin": "michael-hanna-a29279140/;;", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Amsterdam;Technion - Israel Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.uva.nl;https://www.technion.ac.il/en/", "aff_unique_abbr": "UvA;Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Netherlands;Israel" }, { "id": "G3IjhUERrD", "title": "CoF-CoT: Enhancing Large Language Models with Coarse-to-Fine Chain-of-Thought Prompting for Multi-domain NLU Tasks", "track": "main", "status": "Short Main", "tldr": "", "abstract": "While Chain-of-Thought prompting is popular in reasoning tasks, its application to Large Language Models (LLMs) in Natural Language Understanding (NLU) is under-explored. Motivated by multi-step reasoning of LLMs, we propose Coarse-to-Fine Chain-of-Thought (CoF-CoT) approach that breaks down NLU tasks into multiple reasoning steps where LLMs can learn to acquire and leverage essential concepts to solve tasks from different granularities. Moreover, we propose leveraging semantic-based Abstract Meaning Representation (AMR) structured knowledge as an intermediate step to capture the nuances and diverse structures of utterances, and to understand connections between their varying levels of granularity. Our proposed approach is demonstrated effective in assisting the LLMs adapt to the multi-grained NLU tasks under both zero-shot and few-shot multi-domain settings.", "keywords": "large language model;natural language understanding;chain-of-thought;multi-step reasoning;slot filling;intent detection;semantic parsing;abstract meaning representation", "primary_area": "", "supplementary_material": "", "author": "Hoang H Nguyen;Ye Liu;Chenwei Zhang;TAO ZHANG;Philip S. Yu", "authorids": "~Hoang_H_Nguyen1;~Ye_Liu4;~Chenwei_Zhang1;~TAO_ZHANG12;~Philip_S._Yu1", "gender": "M;F;M;F;M", "homepage": ";;https://www.cwzhang.com;;https://cs.uic.edu/profiles/philip-yu/", "dblp": "200/9071.html;96/2615-6;133/3207;https://dblp.org/rec/conf/coling/ZhangXLY20;y/PhilipSYu", "google_scholar": "https://scholar.google.com/citations?hl=en;QMKD6YMAAAAJ;u_bIiBQAAAAJ;;D0lL1r0AAAAJ", "or_profile": "~Hoang_H_Nguyen1;~Ye_Liu4;~Chenwei_Zhang1;~TAO_ZHANG12;~Philip_S._Yu1", "aff": "University of Illinois at Chicago;SalesForce.com;Amazon;University of Illinois at Chicago;University of Illinois Chicago", "aff_domain": "uic.edu;salesforce.com;amazon.com;uic.edu;uic.edu", "position": "PhD student;Researcher;Researcher;PhD student;Full Professor", "bibtex": "@inproceedings{\nnguyen2023cofcot,\ntitle={CoF-CoT: Enhancing Large Language Models with Coarse-to-Fine Chain-of-Thought Prompting for Multi-domain {NLU} Tasks},\nauthor={Hoang H Nguyen and Ye Liu and Chenwei Zhang and TAO ZHANG and Philip S. Yu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=G3IjhUERrD}\n}", "github": "", "project": "", "reviewers": "ZdgB;ajre;GkrJ", "site": "https://openreview.net/forum?id=G3IjhUERrD", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;2;2", "excitement": "3;3;3", "reproducibility": "4;4;3", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 2.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-3491-5968", "linkedin": ";;;tao-zhang-a61273224/;", "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "University of Illinois at Chicago;Salesforce;Amazon", "aff_unique_dep": ";;Amazon.com, Inc.", "aff_unique_url": "https://www.uic.edu;https://www.salesforce.com;https://www.amazon.com", "aff_unique_abbr": "UIC;Salesforce;Amazon", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Chicago;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "G6E3uzABf1", "title": "Improving Consistency for Text Summarization with Energy Functions", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Current abstractive summarization models often generate inconsistent content, i.e. texts that are not directly inferable from the source document, are not consistent with respect to world knowledge, or are self-contradictory. These inconsistencies motivate a new consistency taxonomy that we define as faithfulness, factuality, and self-supportiveness. However, most recent work on reducing inconsistency in document summarization only focuses on faithfulness detection and correction while ignoring other inconsistency phenomena, which limits the model's scalability. To improve the general consistency we introduce EnergySum, where we apply the Residual Energy-based Model by designing energy scorers that reflect each type of consistency. These energy scores are utilized in candidate re-ranking during the sampling process. Experiments on XSUM and CNN/DM datasets show that EnergySum mitigates the trade-off between accuracy and consistency.", "keywords": "Document Summarization;Consistent Summarization", "primary_area": "", "supplementary_material": "", "author": "Qi Zeng;Qingyu Yin;Zheng Li;Yifan Gao;Sreyashi Nag;Zhengyang Wang;Bing Yin;Heng Ji;Chao Zhang", "authorids": "~Qi_Zeng1;~Qingyu_Yin2;~Zheng_Li9;~Yifan_Gao1;~Sreyashi_Nag1;~Zhengyang_Wang1;~Bing_Yin1;~Heng_Ji3;~Chao_Zhang15", "gender": "F;M;Not Specified;F;M;M;F;;M", "homepage": "http://www.vickizeng.com/;;http://yifan-gao.github.io;;;;http://blender.cs.illinois.edu/hengji.html;http://chaozhang.org/;https://hsqmlzno1.github.io/", "dblp": "39/7992-1;179/2542;79/3190-1;;;;;94/3019-14;10/1143-18", "google_scholar": "lOEEhwgAAAAJ;P-mBKNYAAAAJ;https://scholar.google.com.hk/citations?user=erdMFJwAAAAJ;https://scholar.google.com/citations?hl=en;A4fNBtEAAAAJ;qSOxydEAAAAJ;z7GCqT4AAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=P6fwn4AAAAAJ", "or_profile": "~Qi_Zeng1;~Qingyu_Yin2;~Yifan_Gao1;~Sreyashi_Nag1;~Zhengyang_Wang1;~Bing_Yin1;~Heng_Ji3;~Chao_Zhang15;~zheng_li4", "aff": "University of Illinois, Urbana Champaign;Amazon;Amazon;Amazon;Amazon;Amazon;University of Illinois, Urbana-Champaign;Georgia Institute of Technology;Amazon", "aff_domain": "illinois.edu;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;uiuc.edu;gatech.edu;amazon.com", "position": "PhD student;Researcher;Researcher;Applied Scientist;Researcher;Senior Science Manager;Full Professor;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nzeng2023improving,\ntitle={Improving Consistency for Text Summarization with Energy Functions},\nauthor={Qi Zeng and Qingyu Yin and Zheng Li and Yifan Gao and Sreyashi Nag and Zhengyang Wang and Bing Yin and Heng Ji and Chao Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=G6E3uzABf1}\n}", "github": "", "project": "", "reviewers": "2Tfi;zq8F;qsTq", "site": "https://openreview.net/forum?id=G6E3uzABf1", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;3;4", "reproducibility": "4;2;5", "correctness": "4;2;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-5146-2884;0000-0002-5890-0031;;0000-0003-3009-598X;", "linkedin": ";;yi-fan-gao/;sreyashi-nag/;;bingyin;;;", "aff_unique_index": "0;1;1;1;1;1;2;3;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;Amazon;University of Illinois;Georgia Institute of Technology", "aff_unique_dep": ";Amazon.com, Inc.;;", "aff_unique_url": "https://illinois.edu;https://www.amazon.com;https://illinois.edu;https://www.gatech.edu", "aff_unique_abbr": "UIUC;Amazon;UIUC;Georgia Tech", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "G6gj7Dydc5", "title": "HEAR: Hearing Enhanced Audio Response for Video-grounded Dialogue", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Video-grounded Dialogue (VGD) aims to answer questions regarding a given multi-modal input comprising video, audio, and dialogue history. Although there have been numerous efforts in developing VGD systems to improve the quality of their responses, existing systems are competent only to incorporate the information in the video and text and tend to struggle in extracting the necessary information from the audio when generating appropriate responses to the question. The VGD system seems to be deaf, and thus, we coin this symptom of current systems' ignoring audio data as a deaf response. To overcome the deaf response problem, Hearing Enhanced Audio Response (HEAR) framework is proposed to perform sensible listening by selectively attending to audio whenever the question requires it. The HEAR framework enhances the accuracy and audibility of VGD systems in a model-agnostic manner. HEAR is validated on VGD datasets (i.e., AVSD@DSTC7 and AVSD@DSTC8) and shows effectiveness with various VGD systems.", "keywords": "Video-grounded Dialouge;Video Scene Understanding;Open-ended Video Question Answering", "primary_area": "", "supplementary_material": "", "author": "Sunjae Yoon;DaHyun Kim;Eunseop Yoon;Hee Suk Yoon;Junyeong Kim;Chang D. Yoo", "authorids": "~Sunjae_Yoon1;~DaHyun_Kim2;~Eunseop_Yoon1;~Hee_Suk_Yoon1;~Junyeong_Kim2;~Chang_D._Yoo1", "gender": "M;M;F;M;M;M", "homepage": "https://dbstjswo505.github.io/;https://slsp.kaist.ac.kr;https://esyoon7.github.io/;https://hee-suk-yoon.github.io/;https://sites.google.com/view/junyeongkim/;https://sanctusfactory.com/family.php", "dblp": "273/3911;;331/3764;331/3851;28/9716;31/7819", "google_scholar": "2A2lRoUAAAAJ;;QbEnxx0AAAAJ;eJ_iOQEAAAAJ;pAww37cAAAAJ;gFWgUQEAAAAJ", "or_profile": "~Sunjae_Yoon1;~DaHyun_Kim2;~Eunseop_Yoon1;~Hee_Suk_Yoon1;~Junyeong_Kim2;~Chang_D._Yoo1", "aff": "Korea Advanced Institute of Science and Technology (KAIST);;KAIST;Korea Advanced Institute of Science & Technology;Chung-Ang University;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;;ee.kaist.ac.kr;kaist.ac.kr;cau.ac.kr;kaist.ac.kr", "position": "PhD student;;MS student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nyoon2023hear,\ntitle={{HEAR}: Hearing Enhanced Audio Response for Video-grounded Dialogue},\nauthor={Sunjae Yoon and DaHyun Kim and Eunseop Yoon and Hee Suk Yoon and Junyeong Kim and Chang D. Yoo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=G6gj7Dydc5}\n}", "github": "", "project": "", "reviewers": "Zsad;9PUj;c3vW", "site": "https://openreview.net/forum?id=G6gj7Dydc5", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;4", "excitement": "3;4;2", "reproducibility": "3;4;4", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-7458-5273;;0000-0002-5580-5354;0000-0003-2115-8459;0000-0002-7871-9627;0000-0002-0756-7179", "linkedin": "sunjae-yoon-133294333/;;;https://www.linkedin.com/mwlite/in/hee-suk-yoon-262935137;;", "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Chung-Ang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;http://www.cau.ac.kr", "aff_unique_abbr": "KAIST;CAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "G7IbRKrAOE", "title": "Calc-X and Calcformers: Empowering Arithmetical Chain-of-Thought through Interaction with Symbolic Systems", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Despite outstanding performance in many tasks, language models are notoriously inclined to make factual errors in tasks requiring arithmetic computation. We address this deficiency by creating Calc-X, a collection of datasets that demonstrates the appropriate use of a calculator in reasoning chains. Calc-X is suitable for teaching language models to offload computations to a symbolic system.\nWe survey and unify several existing chain-of-thought datasets into a proposed format, resulting in a standard collection of over 300,000 samples requiring arithmetic reasoning. Finally, we use the new Calc-X collection to train open-source calculator-using models and show that these models approximately double the accuracy of generating correct results compared to vanilla language model baselines.", "keywords": "arithmetic reasoning;multistep reasoning;dataset;generation", "primary_area": "", "supplementary_material": "", "author": "Marek Kadl\u010d\u00edk;Michal \u0160tef\u00e1nik;Ondrej Sotolar;Vlastimil Martinek", "authorids": "~Marek_Kadl\u010d\u00edk1;~Michal_\u0160tef\u00e1nik1;~Ondrej_Sotolar1;~Vlastimil_Martinek1", "gender": "M;;M;M", "homepage": "https://prompteus.dev;https://ondrejsotolar.github.io;;https://michal-stefanik.github.io", "dblp": "334/7865.html;300/9366;362/8091;255/9301", "google_scholar": ";vge6XdEAAAAJ;yzYsqL0AAAAJ;9p-110IAAAAJ", "or_profile": "~Marek_Kadl\u010d\u00edk1;~Ondrej_Sotolar1;~Vlastimil_Martinek1;~Michal_Stefanik1", "aff": "Masaryk University;Masaryk University;Masaryk University;Masaryk University", "aff_domain": "muni.cz;muni.cz;muni.cz;muni.cz", "position": "MS student;PhD student;PhD student;PhD student", "bibtex": "@inproceedings{\nkadl{\\v{c}}{\\'\\i}k2023calcx,\ntitle={Calc-X and Calcformers: Empowering Arithmetical Chain-of-Thought through Interaction with Symbolic Systems},\nauthor={Marek Kadl{\\v{c}}{\\'\\i}k and Michal {\\v{S}}tef{\\'a}nik and Ondrej Sotolar and Vlastimil Martinek},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=G7IbRKrAOE}\n}", "github": "", "project": "", "reviewers": "jPuE;yZbJ;ZPay", "site": "https://openreview.net/forum?id=G7IbRKrAOE", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "3;3;4", "reproducibility": "3;4;4", "correctness": "2;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-8293-7184;0000-0001-6452-2295;0000-0002-3204-1830;0000-0003-1766-5538", "linkedin": "prompteus/;ondrejsotolar/;;stefanikm", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Masaryk University", "aff_unique_dep": "", "aff_unique_url": "https://www.muni.cz", "aff_unique_abbr": "MU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Czech Republic" }, { "id": "GDPMVALXqv", "title": "Using In-Context Learning to Improve Dialogue Safety", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "While large neural-based conversational models have become increasingly proficient dialogue agents, recent work has highlighted safety issues with these systems. For example, these systems can be goaded into generating toxic content, often perpetuating social biases or stereotypes. We investigate a retrieval-based approach for reducing bias and toxicity in responses from chatbots. It uses in-context learning to steer a model towards safer generations. Concretely, to generate a response to an unsafe dialogue context, we retrieve demonstrations of safe responses to similar dialogue contexts. We find our method performs competitively with existing approaches to dialogue safety without requiring training. We also show, using automatic and human evaluation, that reductions in toxicity obtained using our approach are not at the cost engagingness or coherency. Finally, we note our method can be used in compliment to existing dialogue safety approaches, such as RLHF.", "keywords": "Dialogue Safety;Toxicity in NLP;Bias in NLP;Dialogue Systems;In-Context Learning;Retrieval", "primary_area": "", "supplementary_material": "", "author": "Nicholas Meade;Spandana Gella;Devamanyu Hazarika;Prakhar Gupta;Di Jin;Siva Reddy;Yang Liu;Dilek Hakkani-Tur", "authorids": "~Nicholas_Meade1;~Spandana_Gella2;~Devamanyu_Hazarika1;~Prakhar_Gupta1;~Di_Jin1;~Siva_Reddy1;~Yang_Liu60;~Dilek_Hakkani-Tur1", "gender": ";F;M;M;M;M;F;F", "homepage": "https://ncmeade.github.io;https://scholar.google.com/citations?user=fChTW6MAAAAJ&hl=en&oi=ao;https://devamanyu.com;https://prakharguptaz.github.io/;https://jind11.github.io/;http://sivareddy.in;;https://siebelschool.illinois.edu/about/people/faculty/dilek", "dblp": "244/9969;146/3968.html;188/5874;121/0747;;64/8153;51/3710-4;h/DilekZHakkaniTur", "google_scholar": "-aLqCbgAAAAJ;fChTW6MAAAAJ;nUCWRZAAAAAJ;YuFcRF0AAAAJ;x5QTK9YAAAAJ;;w90wOucAAAAJ;GMcL_9kAAAAJ", "or_profile": "~Nicholas_Meade1;~Spandana_Gella2;~Devamanyu_Hazarika1;~Prakhar_Gupta1;~Di_Jin1;~Siva_Reddy1;~Yang_Liu60;~Dilek_Hakkani_Tur1", "aff": "McGill University;Amazon;Amazon Alexa AI;Carnegie Mellon University;Amazon;Mila, McGill University;Amazon;Amazon", "aff_domain": "mcgill.ca;amazon.com;amazon.com;cmu.edu;amazon.com;mila.quebec;amazon.com;amazon.com", "position": "PhD student;Research Scientist;Researcher;PhD student;Researcher;Assistant Professor;Principal Researcher;Snr Principal Scientist", "bibtex": "@inproceedings{\nmeade2023using,\ntitle={Using In-Context Learning to Improve Dialogue Safety},\nauthor={Nicholas Meade and Spandana Gella and Devamanyu Hazarika and Prakhar Gupta and Di Jin and Siva Reddy and Yang Liu and Dilek Hakkani-Tur},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=GDPMVALXqv}\n}", "github": "", "project": "", "reviewers": "QjSk;kDkA;yFp7", "site": "https://openreview.net/forum?id=GDPMVALXqv", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;5;4", "excitement": "3;3;3", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-0241-7163;;;;;0000-0001-5246-2117", "linkedin": ";spandana-gella-313b7019/;devamanyu/;prakhar-gupta-100/;;;yang-liu-8555143/;dilek-hakkani-tur-9517543/", "aff_unique_index": "0;1;1;2;1;0;1;1", "aff_unique_norm": "McGill University;Amazon;Carnegie Mellon University", "aff_unique_dep": ";Amazon.com, Inc.;", "aff_unique_url": "https://www.mcgill.ca;https://www.amazon.com;https://www.cmu.edu", "aff_unique_abbr": "McGill;Amazon;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;1;1;1;1;0;1;1", "aff_country_unique": "Canada;United States" }, { "id": "GEZW6VqQNg", "title": "Can ChatGPT Defend its Belief in Truth? Evaluating LLM Reasoning via Debate", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models (LLMs) such as ChatGPT and GPT-4 have shown impressive performance in complex reasoning tasks. However, it is difficult to know whether the models are reasoning based on deep understandings of truth and logic, or leveraging their memorized patterns in a relatively superficial way. In this work, we explore testing LLMs' reasoning by engaging with them in a debate-like conversation, where given a question, the LLM and the user need to discuss to make the correct decision starting from opposing arguments. Upon mitigating the Clever Hans effect, our task requires the LLM to not only achieve the correct answer on its own, but also be able to hold and defend its belief instead of blindly believing or getting misled by the user's (invalid) arguments and critiques, thus testing in greater depth whether the LLM grasps the essence of the reasoning required to solve the problem. Across a range of complex reasoning benchmarks spanning math, commonsense, logic and BIG-Bench tasks, we find that despite their impressive performance as reported in existing work on generating correct step-by-step solutions in the beginning, LLMs like ChatGPT cannot maintain their beliefs in truth for a significant portion of examples when challenged by oftentimes absurdly invalid arguments. Our work points to danger zones of model alignment, and also suggests more careful treatments and interpretations of the recent findings that LLMs can improve their responses based on feedback.", "keywords": "Reasoning;Large Language Models;ChatGPT", "primary_area": "", "supplementary_material": "", "author": "Boshi Wang;Xiang Yue;Huan Sun", "authorids": "~Boshi_Wang2;~Xiang_Yue1;~Huan_Sun1", "gender": "M;;F", "homepage": "https://boshi-wang.github.io/;;https://u.osu.edu/ihudas/people/", "dblp": "216/7905;;33/2952-1.html", "google_scholar": "https://scholar.google.com/citations?hl=en;;wIFkulcAAAAJ", "or_profile": "~Boshi_Wang2;~Xiang_Yue1;~Huan_Sun1", "aff": "Ohio State University;;The Ohio State University, Columbus", "aff_domain": "osu.edu;;osu.edu", "position": "PhD student;;Associate Professor", "bibtex": "@inproceedings{\nwang2023can,\ntitle={Can Chat{GPT} Defend its Belief in Truth? Evaluating {LLM} Reasoning via Debate},\nauthor={Boshi Wang and Xiang Yue and Huan Sun},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=GEZW6VqQNg}\n}", "github": "", "project": "", "reviewers": "rAor;bfJW;mGbg", "site": "https://openreview.net/forum?id=GEZW6VqQNg", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;4", "excitement": "4;3;3", "reproducibility": "4;3;4", "correctness": "4;2;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;huan-sun-81527924/?originalSubdomain=cn", "aff_unique_index": "0;0", "aff_unique_norm": "Ohio State University", "aff_unique_dep": "", "aff_unique_url": "https://www.osu.edu", "aff_unique_abbr": "OSU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Columbus", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "GFgPmhLVhC", "title": "Syntax Matters: Towards Spoken Language Understanding via Syntax-Aware Attention", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Spoken Language Understanding (SLU), a crucial component of task-oriented dialogue systems, has consistently garnered attention from both academic and industrial communities. Although incorporating syntactic information into models has the potential to enhance the comprehension of user utterances and yield impressive results, its application in SLU systems remains largely unexplored. In this paper, we propose a carefully designed model termed Syntax-aware attention (SAT) to enhance SLU, where attention scopes are constrained based on relationships within the syntactic structure. Experimental results on three datasets show that our model achieves substantial improvements and excellent performance. Moreover, SAT can be integrated into other BERT-based language models to further boost their performance.", "keywords": "Spoken Language understanding;Syntactic Dependency Parsing;Feature Fusion", "primary_area": "", "supplementary_material": "", "author": "Yifeng Xie;Zhihong Zhu;Xuxin Cheng;Zhiqi Huang;Dongsheng Chen", "authorids": "~Yifeng_Xie1;~Zhihong_Zhu1;~Xuxin_Cheng3;~Zhiqi_Huang2;~Dongsheng_Chen1", "gender": "M;;;M;M", "homepage": "https://evfidiw.github.io/;;;https://zhiqi-huang.github.io/;", "dblp": "216/2644;;;;", "google_scholar": ";;;5JGMGCsAAAAJ;https://scholar.google.com.hk/citations?user=2sI1wsoAAAAJ", "or_profile": "~Yifeng_Xie1;~Zhihong_Zhu1;~Xuxin_Cheng3;~Zhiqi_Huang2;~Dongsheng_Chen1", "aff": "Guangdong University of Technology;;;Tencent Game;Peking University", "aff_domain": "gdut.edu.cn;;;tencent.com;pku.edu.cn", "position": "Undergrad student;;;Researcher;MS student", "bibtex": "@inproceedings{\nxie2023syntax,\ntitle={Syntax Matters: Towards Spoken Language Understanding via Syntax-Aware Attention},\nauthor={Yifeng Xie and Zhihong Zhu and Xuxin Cheng and Zhiqi Huang and Dongsheng Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=GFgPmhLVhC}\n}", "github": "", "project": "", "reviewers": "ajUT;EFty;ifvP", "site": "https://openreview.net/forum?id=GFgPmhLVhC", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;3;3", "reproducibility": "5;3;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;zhiqi-huang-133499142/;", "aff_unique_index": "0;1;2", "aff_unique_norm": "Guangdong University of Technology;Tencent;Peking University", "aff_unique_dep": ";Tencent Game;", "aff_unique_url": "http://www.gdut.edu.cn;https://www.tencent.com;http://www.pku.edu.cn", "aff_unique_abbr": "GDUT;Tencent;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "GLA4ablO3M", "title": "FActScore: Fine-grained Atomic Evaluation of Factual Precision in Long Form Text Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Evaluating the factuality of long-form text generated by large language models (LMs) is non-trivial because (1) generations often contain a mixture of supported and unsupported pieces of information, making binary judgments of quality inadequate, and (2) human evaluation is time-consuming and costly. In this paper, we introduce FACTSCORE, a new evaluation that breaks a generation into a series of atomic facts and computes the percentage of atomic facts supported by a reliable knowledge source. We conduct an extensive human evaluation to obtain FACTSCOREs of people biographies generated by several state-of-the-art commercial LMs\u2014InstructGPT, ChatGPT, and the retrieval-augmented PerplexityAI\u2014and report new analysis demonstrating the need for such a fine-grained score (e.g., ChatGPT only achieves 58%). Since human evaluation is costly, we also introduce an automated model that estimates FACTSCORE using retrieval and a strong language model, with less than a 2% error rate. Finally, we use this automated metric to evaluate 6,500 generations from a new set of 13 recent LMs that would have cost $26K if evaluated by humans, with various findings: GPT-4 and ChatGPT are more factual than public models, and Vicuna and Alpaca are some of the best public models. FACTSCORE is available for public use via `pip install factscore`.", "keywords": "Text Generation;Factuality;Evaluation;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "Sewon Min;Kalpesh Krishna;Xinxi Lyu;Mike Lewis;Wen-tau Yih;Pang Wei Koh;Mohit Iyyer;Luke Zettlemoyer;Hannaneh Hajishirzi", "authorids": "~Sewon_Min1;~Kalpesh_Krishna1;~Xinxi_Lyu1;~Mike_Lewis1;~Wen-tau_Yih1;~Pang_Wei_Koh1;~Mohit_Iyyer1;~Luke_Zettlemoyer1;~Hannaneh_Hajishirzi1", "gender": "F;M;M;M;M;M;M;M;F", "homepage": "https://www.sewonmin.com;http://martiansideofthemoon.github.io/;;;http://scottyih.org;http://cs.stanford.edu/~pangwei;http://cs.umass.edu/~miyyer;https://www.cs.washington.edu/people/faculty/lsz/;https://homes.cs.washington.edu/~hannaneh/", "dblp": "203/9401;207/8485;314/6814;19/6214;07/7129;10/10453;148/9178;21/6793;52/1296", "google_scholar": "https://scholar.google.ca/citations?user=jU4IZs4AAAAJ;https://scholar.google.com/citations?hl=en;;SnQnQicAAAAJ;8rDNIMsAAAAJ;Nn990CkAAAAJ;rBVA5tcAAAAJ;https://scholar.google.com.tw/citations?user=UjpbO6IAAAAJ;LOV6_WIAAAAJ", "or_profile": "~Sewon_Min1;~Kalpesh_Krishna1;~Xinxi_Lyu1;~Mike_Lewis1;~Wen-tau_Yih1;~Pang_Wei_Koh1;~Mohit_Iyyer1;~Luke_Zettlemoyer1;~Hannaneh_Hajishirzi1", "aff": "Meta Facebook;University of Massachusetts Amherst;University of Washington;Facebook AI Research;Meta Platforms, Inc.;Google;University of Massachusetts Amherst;Meta;University of Washington", "aff_domain": "fb.com;cs.umass.edu;uw.edu;fb.com;meta.com;google.com;cs.umass.edu;meta.com;uw.edu", "position": "PhD student;PhD student;MS student;Research Scientist;Research Scientist;Researcher;Assistant Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\nmin2023factscore,\ntitle={{FA}ctScore: Fine-grained Atomic Evaluation of Factual Precision in Long Form Text Generation},\nauthor={Sewon Min and Kalpesh Krishna and Xinxi Lyu and Mike Lewis and Wen-tau Yih and Pang Wei Koh and Mohit Iyyer and Luke Zettlemoyer and Hannaneh Hajishirzi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=GLA4ablO3M}\n}", "github": "", "project": "", "reviewers": "4um9;Bgsm;4fZt", "site": "https://openreview.net/forum?id=GLA4ablO3M", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "3;4;4", "reproducibility": "3;3;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0003-4263-395X;;;;", "linkedin": ";kalpesh-krishna-6b3827a6/;;;scottyih/;;;luke-zettlemoyer-a0109b226/;", "aff_unique_index": "0;1;2;0;0;3;1;0;2", "aff_unique_norm": "Meta;University of Massachusetts Amherst;University of Washington;Google", "aff_unique_dep": "Meta Platforms, Inc.;;;Google", "aff_unique_url": "https://meta.com;https://www.umass.edu;https://www.washington.edu;https://www.google.com", "aff_unique_abbr": "Meta;UMass Amherst;UW;Google", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Amherst;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "GOBxWdRpfz", "title": "Re-ViLM: Retrieval-Augmented Visual Language Model for Zero and Few-Shot Image Captioning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Augmenting pretrained language models (LMs) with a vision encoder~(e.g., Flamingo) has obtained state-of-the-art results in image-to-text generation. However, these models store all the knowledge within their parameters, thus often requiring enormous model parameters to model the abundant visual concepts and very rich text descriptions. Additionally, they are inefficient in incorporating new data, requiring a computational-expensive fine-tuning process. In this work, we introduce a Retrieval-augmented Visual Language Model, Re-ViLM, built upon the Flamingo, that supports retrieving the relevant knowledge from the external database for zero and in-context few-shot image-to-text generations. By storing certain knowledge explicitly in the external database, our approach reduces the number of model parameters and can easily accommodate new data during evaluation by simply updating the database. We also construct an interleaved image and text data that facilitates in-context few-shot learning capabilities.We demonstrate that Re-ViLM significantly boosts performance for image-to-text generation tasks, especially for zero-shot and few-shot generation in out-of-domain settings with 4x less parameters compared with baseline methods.", "keywords": "Retrieval;Visual Language Model;Image-to-Text", "primary_area": "", "supplementary_material": "", "author": "Zhuolin Yang;Wei Ping;Zihan Liu;Vijay Anand Korthikanti;Weili Nie;De-An Huang;Linxi Fan;Zhiding Yu;Shiyi Lan;Bo Li;Mohammad Shoeybi;Ming-Yu Liu;Yuke Zhu;Bryan Catanzaro;Chaowei Xiao;Anima Anandkumar", "authorids": "~Zhuolin_Yang1;~Wei_Ping1;~Zihan_Liu2;~Vijay_Anand_Korthikanti1;~Weili_Nie1;~De-An_Huang1;~Linxi_Fan2;~Zhiding_Yu1;~Shiyi_Lan3;~Bo_Li19;~Mohammad_Shoeybi1;~Ming-Yu_Liu1;~Yuke_Zhu1;~Bryan_Catanzaro1;~Chaowei_Xiao2;~Anima_Anandkumar1", "gender": "M;M;M;;M;M;;;M;F;M;M;M;M;;", "homepage": "https://lucas110550.github.io/about;https://wpingnet.github.io/;https://zliucr.github.io;;https://weilinie.github.io/;http://ai.stanford.edu/~dahuang/;;;https://voidrank.github.com;http://boli.cs.illinois.edu/;;http://mingyuliu.net;https://cs.utexas.edu/~yukez/;https://ctnzr.io;;", "dblp": ";08/8399.html;46/9231;;147/4786;119/0335;154/6778;;192/1813;50/3402-26;53/9742;17/8368-1;133/1772;14/4826;;", "google_scholar": "BvSv-C0AAAAJ;6gKEYRgAAAAJ;LPabcsYAAAAJ;;zW7BH7oAAAAJ;HEY3UzgAAAAJ;sljtWIUAAAAJ;;https://scholar.google.com/citations?hl=en;K8vJkTcAAAAJ;62ElavIAAAAJ;y-f-MZgAAAAJ;mWGyYMsAAAAJ;UZ6kI2AAAAAJ;;", "or_profile": "~Zhuolin_Yang1;~Wei_Ping1;~Zihan_Liu2;~Vijay_Anand_Korthikanti1;~Weili_Nie1;~De-An_Huang1;~Linxi_Fan2;~Zhiding_Yu1;~Shiyi_Lan3;~Bo_Li19;~Mohammad_Shoeybi1;~Ming-Yu_Liu1;~Yuke_Zhu1;~Bryan_Catanzaro1;~Chaowei_Xiao2;~Anima_Anandkumar1", "aff": "University of Illinois at Urbana Champaign;NVIDIA;NVIDIA;NVIDIA;NVIDIA;NVIDIA;NVIDIA;;NVIDIA;University of Illinois, Urbana Champaign;NVIDIA;NVIDIA;Computer Science Department, University of Texas, Austin;NVIDIA;;", "aff_domain": "illinois.edu;nvidia.com;nvidia.com;nvidia.com;nvidia.com;nvidia.com;nvidia.com;;nvidia.com;illinois.edu;nvidia.com;nvidia.com;cs.utexas.edu;nvidia.com;;", "position": "PhD student;Principal Researcher;Researcher;Researcher;Research Scientist;Research Scientist;Researcher;;Researcher;Assistant Professor;Director of Applied Resesrch;Researcher;Assistant Professor;Vice President;;", "bibtex": "@inproceedings{\nyang2023revilm,\ntitle={Re-Vi{LM}: Retrieval-Augmented Visual Language Model for Zero and Few-Shot Image Captioning},\nauthor={Zhuolin Yang and Wei Ping and Zihan Liu and Vijay Anand Korthikanti and Weili Nie and De-An Huang and Linxi Fan and Zhiding Yu and Shiyi Lan and Bo Li and Mohammad Shoeybi and Ming-Yu Liu and Yuke Zhu and Bryan Catanzaro and Chaowei Xiao and Anima Anandkumar},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=GOBxWdRpfz}\n}", "github": "", "project": "", "reviewers": "FVoX;uG9w;ag8H", "site": "https://openreview.net/forum?id=GOBxWdRpfz", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;3;3", "reproducibility": "3;3;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 16, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;;;;;0000-0002-2951-2398;;0000-0003-0034-7728;;", "linkedin": ";wei-ping/;;vijay-anand-korthikanti-558a456/;;;;;;;shoeybi/;mingyuliu/;;bryancatanzaro/;;", "aff_unique_index": "0;1;1;1;1;1;1;1;0;1;1;2;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;NVIDIA;University of Texas at Austin", "aff_unique_dep": ";NVIDIA Corporation;Computer Science Department", "aff_unique_url": "https://illinois.edu;https://www.nvidia.com;https://www.utexas.edu", "aff_unique_abbr": "UIUC;NVIDIA;UT Austin", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Urbana-Champaign;;Austin", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "GQ1rtVVIy2", "title": "Identifying {Early Maladaptive Schemas} from Mental Health Question Texts", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "In Psychotherapy, {maladaptive schemas}-- negative perceptions that {an \nindividual has of the self, others, or the world that endure despite objective reality}--\noften lead to resistance to treatments and relapse of mental health issues such as depression, anxiety, panic attacks etc. Identification of early maladaptive schemas (EMS) is thus a crucial \nstep during Schema Therapy-based counseling sessions, \nwhere patients \ngo through a detailed and lengthy EMS questionnaire. However, such an approach is not practical in \"offline\" counseling scenarios, such as community QA forums which are gaining popularity for people seeking mental health support. In this paper, we \ninvestigate both LLM (Large Language Models) and non-LLM approaches for identifying EMS labels using resources from Schema Therapy. \nOur evaluation indicates that \nrecent LLMs can be effective for identifying EMS but their predictions lack explainability and are too sensitive to precise `prompts'. \nBoth LLM and non-LLM methods are unable to reliably address the {null} cases, i.e. cases with no EMS labels. However, we posit that the two approaches show complementary properties and together, \nthey can be used to further devise techniques\nfor EMS identification.", "keywords": "Mental Health;Schema Therapy;Early Maladaptive Schema;Personality Disorders;Classification", "primary_area": "", "supplementary_material": "", "author": "Sujatha Das Gollapalli;Beng Heng Ang;See-Kiong Ng", "authorids": "~Sujatha_Das_Gollapalli2;~Beng_Heng_Ang1;~See-Kiong_Ng1", "gender": "F;;M", "homepage": "https://sites.google.com/site/sujathadas/homepage-sujatha-das-gollapalli/publications;;https://www.comp.nus.edu.sg/~ngsk/", "dblp": "03/6702;;00/5480", "google_scholar": ";;https://scholar.google.com.tw/citations?user=_wsommYAAAAJ", "or_profile": "~Sujatha_Das_Gollapalli2;~Beng_Heng_Ang1;~See-Kiong_Ng1", "aff": "National University of Singapore;;National University of Singapore", "aff_domain": "nus.edu.sg;;nus.edu.sg", "position": "Researcher;;Full Professor", "bibtex": "@inproceedings{\ngollapalli2023identifying,\ntitle={Identifying \\{Early Maladaptive Schemas\\} from Mental Health Question Texts},\nauthor={Sujatha Das Gollapalli and Beng Heng Ang and See-Kiong Ng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=GQ1rtVVIy2}\n}", "github": "", "project": "", "reviewers": "ikMh;14Gv;Gb3g;CaET", "site": "https://openreview.net/forum?id=GQ1rtVVIy2", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;4;4", "excitement": "3;2;2;3", "reproducibility": "0;3;3;4", "correctness": "3;3;3;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 2.5, "reproducibility_avg": 2.5, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-6565-7511", "linkedin": ";;seekiong/?originalSubdomain=sg", "aff_unique_index": "0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "id": "GSNoZKqHgO", "title": "Let's Synthesize Step by Step: Iterative Dataset Synthesis with Large Language Models by Extrapolating Errors from Small Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "*Data Synthesis* is a promising way to train a small model with very little labeled data. One approach for data synthesis is to leverage the rich knowledge from large language models to synthesize pseudo training examples for small models, making it possible to achieve both data and compute efficiency at the same time. However, a key challenge in data synthesis is that the synthesized dataset often suffers from a large distributional discrepancy from the *real task* data distribution. Thus, in this paper, we propose *Synthesis Step by Step* (**S3**), a data synthesis framework that shrinks this distribution gap by iteratively extrapolating the errors made by a small model trained on the synthesized dataset on a small real-world validation dataset using a large language model. Extensive experiments on multiple NLP tasks show that our approach improves the performance of a small model by reducing the gap between the synthetic dataset and the real data, resulting in significant improvement compared to several baselines: 9.48% improvement compared to ZeroGen and 2.73% compared to GoldGen, and at most 15.17% improvement compared to the small model trained on human-annotated data.", "keywords": "Dataset Synthesis;large language models;efficiency", "primary_area": "", "supplementary_material": "", "author": "Ruida WANG;Wangchunshu Zhou;Mrinmaya Sachan", "authorids": "~Ruida_WANG1;~Wangchunshu_Zhou1;~Mrinmaya_Sachan3", "gender": "M;M;M", "homepage": "https://rickyskywalker.com/;https://michaelzhouwang.github.io;https://sites.google.com/site/mrinsachan/", "dblp": "357/3060;245/8640.html;86/10440.html", "google_scholar": "SVAJKx4AAAAJ;UebIjuQAAAAJ;Tpp9ZjoAAAAJ", "or_profile": "~Ruida_WANG1;~Wangchunshu_Zhou1;~MRINMAYA_SACHAN2", "aff": "ETHZ - ETH Zurich;Department of Computer Science, ETHZ - ETH Zurich;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;inf.ethz.ch;ethz.ch", "position": "Undergrad student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwang2023lets,\ntitle={Let's Synthesize Step by Step: Iterative Dataset Synthesis with Large Language Models by Extrapolating Errors from Small Models},\nauthor={Ruida WANG and Wangchunshu Zhou and Mrinmaya Sachan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=GSNoZKqHgO}\n}", "github": "", "project": "", "reviewers": "fvWF;s5eU;UzEm", "site": "https://openreview.net/forum?id=GSNoZKqHgO", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;3", "excitement": "3;3;3", "reproducibility": "0;3;5", "correctness": "4;4;4", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0005-1497-6914;;", "linkedin": "%E7%91%9E%E8%BE%BE-%E7%8E%8B-01961121a/;;", "aff_unique_index": "0;0;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "1", "aff_campus_unique": ";Zurich", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "id": "GSnAO2qUHy", "title": "Multiview Clickbait Detection via Jointly Modeling Subjective and Objective Preference", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Clickbait posts tend to spread inaccurate or misleading information to manipulate people's attention and emotions, which greatly harms the credibility of social media. Existing clickbait detection models rely on analyzing the objective semantics in posts or correlating posts with article content only. However, these models fail to identify and exploit the manipulation intention of clickbait from a user's subjective perspective, leading to limited capability to explore comprehensive clues of clickbait. To address such a issue, we propose a multiview clickbait detection model, named MCDM, to model subjective and objective preferences simultaneously. MCDM introduces two novel complementary modules for modeling subjective feeling and objective content relevance, respectively. The subjective feeling module adopts a user-centric approach to capture subjective features of posts, such as language patterns and emotional inclinations. The objective module explores news elements from posts and models article content correlations to capture objective clues for clickbait detection. Extensive experimental results on two real-world datasets show that our proposed MCDM outperforms state-of-the-art approaches for clickbait detection, verifying the effectiveness of integrating subjective and objective preferences for detecting clickbait.", "keywords": "Clickbait Detection;Subjective Feeling;Objective Content Relevance;Heterogeneous Dynamic Graph Network", "primary_area": "", "supplementary_material": "", "author": "Chongyang Shi;Yijun Yin;Qi Zhang;Liang Xiao;Usman Naseem;Shoujin Wang;Liang Hu", "authorids": "~Chongyang_Shi1;~Yijun_Yin1;~Qi_Zhang25;~Liang_Xiao4;~Usman_Naseem1;~Shoujin_Wang1;~Liang_Hu1", "gender": "M;F;M;M;;M;M", "homepage": "https://cs.bit.edu.cn/szdw/jsml2/rjznyrjgcyjs2/3c137ad5c6484e8d931719b1612dd35c.htm;https://yin1jun.top;https://sites.google.com/view/qizhang-bit-uts/home;https://github.com/ElleryJallet;https://usmaann.github.io/;https://shoujinwang1.github.io/;https://sites.google.com/view/lianghu/home", "dblp": "68/7942-1.html;;52/323-20;;253/6972.html;16/8492;48/5388-4", "google_scholar": ";;8UAk1p4AAAAJ;;https://scholar.google.com.au/citations?hl=en;BQ0mBRIAAAAJ;https://scholar.google.com.au/citations?user=cj6wAgYAAAAJ", "or_profile": "~Chongyang_Shi1;~Yijun_Yin1;~Qi_Zhang25;~Liang_Xiao4;~Usman_Naseem1;~Shoujin_Wang1;~Liang_Hu1", "aff": "Beijing Institute of Technology;Beijing Institute of Technology;Tongji University;Beijing Institute of Technology;University of Sydney;University of Technology Sydney;Tongji University", "aff_domain": "bit.edu.cn;bit.edu.cn;tongji.edu.cn;bit.edu.cn;sydney.edu.au;uts.edu.au;tongji.edu.cn", "position": "Associate Professor;MS student;Researcher;MS student;PhD student;Lecturer;Full Professor", "bibtex": "@inproceedings{\nshi2023multiview,\ntitle={Multiview Clickbait Detection via Jointly Modeling Subjective and Objective Preference},\nauthor={Chongyang Shi and Yijun Yin and Qi Zhang and Liang Xiao and Usman Naseem and Shoujin Wang and Liang Hu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=GSnAO2qUHy}\n}", "github": "", "project": "", "reviewers": "7Cfx;FUSD;mx4P", "site": "https://openreview.net/forum?id=GSnAO2qUHy", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "4;4;4", "reproducibility": "4;4;3", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-1037-1361;;0000-0003-0191-7171;0000-0003-1133-9379;", "linkedin": ";;;;usman-naseem-a1568a139/;;", "aff_unique_index": "0;0;1;0;2;3;1", "aff_unique_norm": "Beijing Institute of Technology;Tongji University;University of Sydney;University of Technology Sydney", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.bit.edu.cn/;https://www.tongji.edu.cn;https://www.sydney.edu.au;https://www.uts.edu.au", "aff_unique_abbr": "BIT;Tongji;USYD;UTS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;1;0", "aff_country_unique": "China;Australia" }, { "id": "GTqt0X2Swn", "title": "Affective and Dynamic Beam Search for Story Generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Storytelling's captivating potential makes it a fascinating research area, with implications for entertainment, education, therapy, and cognitive studies. In this paper, we propose Affective Story Generator (AffGen) for generating interesting narratives. AffGen introduces `intriguing twists' in narratives by employing two novel techniques\u2014Dynamic Beam Sizing and Affective Reranking. Dynamic Beam Sizing encourages less predictable, more captivating word choices using a contextual multi-arm bandit model. Affective Reranking prioritizes sentence candidates based on affect intensity. Our empirical evaluations, both automatic and human, demonstrate AffGen's superior performance over existing baselines in generating affectively charged and interesting narratives. Our ablation study and analysis provide insights into the strengths and weaknesses of AffGen.", "keywords": "Discourse Analysis;Sentiment Analysis;Affective Computing", "primary_area": "", "supplementary_material": "", "author": "Tenghao Huang;Ehsan Qasemi;Bangzheng Li;He Wang;Faeze Brahman;Muhao Chen;Snigdha Chaturvedi", "authorids": "~Tenghao_Huang1;~Ehsan_Qasemi1;~Bangzheng_Li1;~He_Wang14;~Faeze_Brahman1;~Muhao_Chen1;~Snigdha_Chaturvedi2", "gender": "M;M;;F;F;M;F", "homepage": ";http://ehsanqasemi.com/;;http://ealac.columbia.edu/selina-wang/;https://fabrahman.github.io;https://muhaochen.github.io/;https://sites.google.com/site/snigdhac/", "dblp": "79/11059;161/4621.html;;;276/6005;173/2608;77/8700", "google_scholar": "cZKQGyQAAAAJ;2snRpBQAAAAJ;UcegV-cAAAAJ;;viCG2ikAAAAJ;k79yEZkAAAAJ;gZD3EesAAAAJ", "or_profile": "~Tenghao_Huang1;~Ehsan_Qasemi1;~Bangzheng_Li1;~He_Wang14;~Faeze_Brahman1;~Muhao_Chen1;~Snigdha_Chaturvedi2", "aff": "University of Southern California;USC/ISI;University of Southern California;Columbia University;Allen Institute for AI;University of Southern California;Department of Computer Science, University of North Carolina, Chapel Hill", "aff_domain": "usc.edu;isi.edu;usc.edu;columbia.edu;allenai.org;usc.edu;cs.unc.edu", "position": "PhD student;Researcher;PhD student;PhD student;Postdoc;Assistant Research Professor;Assistant Professor", "bibtex": "@inproceedings{\nhuang2023affective,\ntitle={Affective and Dynamic Beam Search for Story Generation},\nauthor={Tenghao Huang and Ehsan Qasemi and Bangzheng Li and He Wang and Faeze Brahman and Muhao Chen and Snigdha Chaturvedi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=GTqt0X2Swn}\n}", "github": "", "project": "", "reviewers": "Y6wU;pHn9;ME8S", "site": "https://openreview.net/forum?id=GTqt0X2Swn", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;3", "excitement": "3;3;3", "reproducibility": "4;3;2", "correctness": "4;4;2", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0003-0118-3147;", "linkedin": ";;;;;;", "aff_unique_index": "0;0;0;1;2;0;3", "aff_unique_norm": "University of Southern California;Columbia University;Allen Institute for AI;University of North Carolina", "aff_unique_dep": ";;;Department of Computer Science", "aff_unique_url": "https://www.usc.edu;https://www.columbia.edu;https://allenai.org;https://www.unc.edu", "aff_unique_abbr": "USC;Columbia;AI2;UNC", "aff_campus_unique_index": "0;1;0;0;3", "aff_campus_unique": "Los Angeles;ISI;;Chapel Hill", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "GWOCiRkjCF", "title": "Precedent-Enhanced Legal Judgment Prediction with LLM and Domain-Model Collaboration", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Legal Judgment Prediction (LJP) has become an increasingly crucial task in Legal AI, i.e., predicting the judgment of the case in terms of case fact description. Precedents are the previous legal cases with similar facts, which are the basis for the judgment of the subsequent case in national legal systems. Thus, it is worthwhile to explore the utilization of precedents in the LJP. Recent advances in deep learning have enabled a variety of techniques to be used to solve the LJP task. These can be broken down into two categories: large language models (LLMs) and domain-specific models. LLMs are capable of interpreting and generating complex natural language, while domain models are efficient in learning task-specific information. In this paper, we propose the precedent-enhanced LJP framework (PLJP) \u2013 a system that leverages the strength of both LLM and domain models in the context of precedents. Specifically, the domain models are designed to provide candidate labels and find the proper precedents efficiently, and the large models will make the final prediction with an in-context precedents comprehension. Experiments on the real-world dataset demonstrate the effectiveness of our PLJP. Moreover, our work shows a promising direction for LLM and domain-model collaboration that can be generalized to other vertical domains.", "keywords": "NLP;Large Language Model;Model Collaboration", "primary_area": "", "supplementary_material": "", "author": "Yiquan Wu;Siying Zhou;Yifei Liu;Weiming Lu;Xiaozhong Liu;Yating Zhang;Changlong Sun;Fei Wu;Kun Kuang", "authorids": "~Yiquan_Wu3;~Siying_Zhou1;~Yifei_Liu1;~Weiming_Lu1;~Xiaozhong_Liu2;~Yating_Zhang1;~Changlong_Sun2;~Fei_Wu2;~Kun_Kuang1", "gender": "M;F;M;;M;F;M;;M", "homepage": "https://wuyiquan.github.io/;https://github.com/JosieZhou00;;;https://www.wpi.edu/people/faculty/xliu14;;;https://person.zju.edu.cn/wufei;http://kunkuang.github.io", "dblp": ";336/0378;;;11/6389.html;29/5889;https://dblp.uni-trier.de/pers/hd/s/Sun:Changlong;84/3254-1;194/4245", "google_scholar": "ZTampvYAAAAJ;https://scholar.google.com/citations?view_op=list_works;VsSb4OoAAAAJ;;1BUByMcAAAAJ;;https://scholar.google.com/citations?;XJLn4MYAAAAJ;https://scholar.google.com.hk/citations?user=FOsNiMQAAAAJ", "or_profile": "~Yiquan_Wu3;~Siying_Zhou1;~Yifei_Liu1;~Weiming_Lu1;~Xiaozhong_Liu2;~Yating_Zhang1;~Changlong_Sun2;~Fei_Wu2;~Kun_Kuang1", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;;Worcester Polytechnic Institute;;Alibaba Group;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;;wpi.edu;;alibaba-inc.com;zju.edu.cn;zju.edu.cn", "position": "PhD student;PhD student;MS student;;Associate Professor;;Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nwu2023precedentenhanced,\ntitle={Precedent-Enhanced Legal Judgment Prediction with {LLM} and Domain-Model Collaboration},\nauthor={Yiquan Wu and Siying Zhou and Yifei Liu and Weiming Lu and Xiaozhong Liu and Yating Zhang and Changlong Sun and Fei Wu and Kun Kuang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=GWOCiRkjCF}\n}", "github": "", "project": "", "reviewers": "ZR8e;GcEk;9C2g", "site": "https://openreview.net/forum?id=GWOCiRkjCF", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;5;4", "excitement": "4;4;4", "reproducibility": "4;3;3", "correctness": "2;4;3", "rating_avg": 4.0, "confidence_avg": 4.666666666666667, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0007-3251-0733;;;;;;;0009-0000-7528-8131", "linkedin": ";;;;;;;;", "aff_unique_index": "0;0;0;1;2;0;0", "aff_unique_norm": "Zhejiang University;Worcester Polytechnic Institute;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;https://www.wpi.edu;https://www.alibaba.com", "aff_unique_abbr": "ZJU;WPI;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "China;United States" }, { "id": "GbApUL7sDL", "title": "Do \u201cEnglish\u201d Named Entity Recognizers Work Well on Global Englishes?", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The vast majority of the popular English named entity recognition (NER) datasets contain American or British English data, despite the existence of many global varieties of English. As such, it is unclear whether they generalize for analyzing use of English globally. To test this, we build a newswire dataset, the Worldwide English NER Dataset, to analyze NER model performance on low-resource English variants from around the world. We test widely used NER toolkits and transformer models, including models using the pre-trained contextual models RoBERTa and ELECTRA, on three datasets: a commonly used British English newswire dataset, CoNLL 2003, a more American focused dataset OntoNotes, and our global dataset. All models trained on the CoNLL or OntoNotes datasets experienced significant performance drops---over 10 F1 in some cases---when tested on the Worldwide English dataset. Upon examination of region-specific errors, we observe the greatest performance drops for Oceania and Africa, while Asia and the Middle East had comparatively strong performance. Lastly, we find that a combined model trained on the Worldwide dataset and either CoNLL or OntoNotes lost only 1-2 F1 on both test sets.", "keywords": "NER;global english", "primary_area": "", "supplementary_material": "", "author": "Alexander Shan;John Bauer;Riley Carlson;Christopher D Manning", "authorids": "~Alexander_Shan1;~John_Bauer1;~Riley_Carlson1;~Christopher_D_Manning1", "gender": "M;M;F;M", "homepage": ";;;https://nlp.stanford.edu/~manning/", "dblp": ";07/7435;;m/ChristopherDManning", "google_scholar": ";;;1zmDOdwAAAAJ", "or_profile": "~Alexander_Shan1;~John_Bauer1;~Riley_Carlson1;~Christopher_D_Manning1", "aff": "Stanford University;Stanford University;Stanford University;Computer Science Department, Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;cs.stanford.edu", "position": "Undergrad student;Researcher;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nshan2023do,\ntitle={Do {\\textquotedblleft}English{\\textquotedblright} Named Entity Recognizers Work Well on Global Englishes?},\nauthor={Alexander Shan and John Bauer and Riley Carlson and Christopher D Manning},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=GbApUL7sDL}\n}", "github": "", "project": "", "reviewers": "cg6A;nGXt;Mety", "site": "https://openreview.net/forum?id=GbApUL7sDL", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "4;3;3", "reproducibility": "4;3;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-6155-649X", "linkedin": "alexander-shan-446321202/;;riley-carlson;christopher-manning-011575/", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "GeFFYOCkvS", "title": "Multilingual Coarse Political Stance Classification of Media. The Editorial Line of a ChatGPT and Bard Newspaper", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Neutrality is difficult to achieve and, in politics, subjective. Traditional media typically adopt an editorial line that can be used by their potential readers as an indicator of the media bias. Several platforms currently rate news outlets according to their political bias. The editorial line and the ratings help readers in gathering a balanced view of news. But in the advent of instruction-following language models, tasks such as writing a newspaper article can be delegated to computers. Without imposing a biased persona, where would an AI-based news outlet lie within the bias ratings? In this work, we use the ratings of authentic news outlets to create a multilingual corpus of news with coarse stance annotations (Left and Right) along with automatically extracted topic annotations. We show that classifiers trained on this data are able to identify the editorial line of most unseen newspapers in English, German, Spanish and Catalan. We then apply the classifiers to 101 newspaper-like articles written by ChatGPT and Bard in the 4 languages at different time periods. We observe that, similarly to traditional newspapers, ChatGPT editorial line evolves with time and, being a data-driven system, the stance of the generated articles differs among languages.", "keywords": "political bias;news;stance classification;instruction-following language models;chatGPT;Bard", "primary_area": "", "supplementary_material": "", "author": "Cristina Espa\u00f1a-Bonet", "authorids": "~Cristina_Espa\u00f1a-Bonet1", "gender": "F", "homepage": "https://www.cs.upc.edu/~cristinae/CV/cv.php", "dblp": "59/7935", "google_scholar": "", "or_profile": "~Cristina_Espa\u00f1a-Bonet1", "aff": "German Research Center for AI", "aff_domain": "dfki.de", "position": "Researcher", "bibtex": "@inproceedings{\nespa{\\~n}a-bonet2023multilingual,\ntitle={Multilingual Coarse Political Stance Classification of Media. The Editorial Line of a Chat{GPT} and Bard Newspaper},\nauthor={Cristina Espa{\\~n}a-Bonet},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=GeFFYOCkvS}\n}", "github": "", "project": "", "reviewers": "3VUk;75sN;2C5A", "site": "https://openreview.net/forum?id=GeFFYOCkvS", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;3;4", "reproducibility": "3;2;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 1, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-5414-4710", "linkedin": "", "aff_unique_index": "0", "aff_unique_norm": "German Research Center for Artificial Intelligence", "aff_unique_dep": "", "aff_unique_url": "https://www.dfki.de/", "aff_unique_abbr": "DFKI", "aff_country_unique_index": "0", "aff_country_unique": "Germany" }, { "id": "GgriuyaTZU", "title": "Ultra-Fine Entity Typing with Prior Knowledge about Labels: A Simple Clustering Based Strategy", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Ultra-fine entity typing (UFET) is the task of inferring the semantic types from a large set of fine-grained candidates that apply to a given entity mention. This task is especially challenging because we only have a small number of training examples for many types, even with distant supervision strategies. State-of-the-art models, therefore, have to rely on prior knowledge about the type labels in some way. In this paper, we show that the performance of existing methods can be improved using a simple technique: we use pre-trained label embeddings to cluster the labels into semantic domains and then treat these domains as additional types. We show that this strategy consistently leads to improved results as long as high-quality label embeddings are used. Furthermore, we use the label clusters as part of a simple post-processing technique, which results in further performance gains. Both strategies treat the UFET model as a black box and can thus straightforwardly be used to improve a wide range of existing models.", "keywords": "ultra-fine entity typing;word embeddings;conceptual neighbourhood", "primary_area": "", "supplementary_material": "", "author": "Na Li;Zied Bouraoui;Steven Schockaert", "authorids": "~Na_Li10;~Zied_Bouraoui1;~Steven_Schockaert2", "gender": "F;M;M", "homepage": ";;https://www.cardiff.ac.uk/people/view/133772-schockaert-steven", "dblp": "18/3173-18;134/4606;29/3972.html", "google_scholar": "JZUxMuwAAAAJ;f_6RpYEAAAAJ;https://scholar.google.co.uk/citations?user=hNCN09AAAAAJ", "or_profile": "~Na_Li10;~Zied_Bouraoui1;~Steven_Schockaert1", "aff": "School of Optical-Electrical and Computer Engineering, University of Shanghai for Science and Technology;;Cardiff University", "aff_domain": "usst.edu.cn;;cardiff.ac.uk", "position": "Assistant Professor;;Full Professor", "bibtex": "@inproceedings{\nli2023ultrafine,\ntitle={Ultra-Fine Entity Typing with Prior Knowledge about Labels: A Simple Clustering Based Strategy},\nauthor={Na Li and Zied Bouraoui and Steven Schockaert},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=GgriuyaTZU}\n}", "github": "", "project": "", "reviewers": "h56F;AV7U;QZ94", "site": "https://openreview.net/forum?id=GgriuyaTZU", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "3;3;3", "reproducibility": "4;3;3", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0000-6776-3908;0000-0002-1662-4163;", "linkedin": ";;", "aff_unique_index": "0;1", "aff_unique_norm": "University of Shanghai for Science and Technology;Cardiff University", "aff_unique_dep": "School of Optical-Electrical and Computer Engineering;", "aff_unique_url": "https://www.usst.edu.cn;https://www.cardiff.ac.uk", "aff_unique_abbr": "USST;Cardiff", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United Kingdom" }, { "id": "GnEGvlOcwr", "title": "Error Detection for Text-to-SQL Semantic Parsing", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Despite remarkable progress in text-to-SQL semantic parsing in recent years, the performance of existing parsers is still far from perfect. \nSpecifically, modern text-to-SQL parsers based on deep learning are often over-confident, thus casting doubt on their trustworthiness when deployed for real use. In this paper, we propose a parser-independent error detection model for text-to-SQL semantic parsing.\nUsing a language model of code as its bedrock, we enhance our error detection model with graph neural networks that learn structural features of both natural language questions and SQL queries. We train our model on realistic parsing errors collected from a cross-domain setting, which leads to stronger generalization ability. Experiments with three strong text-to-SQL parsers featuring different decoding mechanisms show that our approach outperforms parser-dependent uncertainty metrics. Our model could also effectively improve the performance and usability of text-to-SQL semantic parsers regardless of their architectures.", "keywords": "Semantic Parsing;Text-to-SQL;Error Detection", "primary_area": "", "supplementary_material": "", "author": "Shijie Chen;Ziru Chen;Huan Sun;Yu Su", "authorids": "~Shijie_Chen1;~Ziru_Chen1;~Huan_Sun1;~Yu_Su2", "gender": "M;M;F;M", "homepage": "https://chensj98.github.io/;https://ronch99.github.io/;https://u.osu.edu/ihudas/people/;http://ysu1989.github.io", "dblp": ";200/8335;33/2952-1.html;38/1070-1", "google_scholar": "KXSlX3sAAAAJ;1-pt7zMAAAAJ;wIFkulcAAAAJ;rIh5OqoAAAAJ", "or_profile": "~Shijie_Chen1;~Ziru_Chen1;~Huan_Sun1;~Yu_Su2", "aff": "Ohio State University, Columbus;Ohio State University, Columbus;The Ohio State University, Columbus;Microsoft", "aff_domain": "osu.edu;osu.edu;osu.edu;microsoft.com", "position": "PhD student;PhD student;Associate Professor;Senior Researcher", "bibtex": "@inproceedings{\nchen2023error,\ntitle={Error Detection for Text-to-{SQL} Semantic Parsing},\nauthor={Shijie Chen and Ziru Chen and Huan Sun and Yu Su},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=GnEGvlOcwr}\n}", "github": "", "project": "", "reviewers": "jWP3;nSMo;12YA", "site": "https://openreview.net/forum?id=GnEGvlOcwr", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;3;4", "excitement": "4;3;2", "reproducibility": "4;3;4", "correctness": "5;3;3", "rating_avg": 2.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;huan-sun-81527924/?originalSubdomain=cn;", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Ohio State University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.osu.edu;https://www.microsoft.com", "aff_unique_abbr": "OSU;Microsoft", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Columbus;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "Gp8EmdJLUj", "title": "Simplicity Level Estimate (SLE): A Learned Reference-Less Metric for Sentence Simpli\ufb01cation", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Automatic evaluation for sentence simplification remains a challenging problem. Most popular evaluation metrics require multiple high-quality references -- something not readily available for simplification -- which makes it difficult to test performance on unseen domains. Furthermore, most existing metrics conflate simplicity with correlated attributes such as fluency or meaning preservation. We propose a new learned evaluation metric --- SLE --- which focuses on simplicity, outperforming almost all existing metrics in terms of correlation with human judgements.", "keywords": "simplification;evaluation;quality estimation", "primary_area": "", "supplementary_material": "", "author": "Liam Cripwell;Jo\u00ebl Legrand;Claire Gardent", "authorids": "~Liam_Cripwell1;~Jo\u00ebl_Legrand1;~Claire_Gardent1", "gender": ";M;F", "homepage": ";joel-legrand.fr;https://members.loria.fr/CGardent/", "dblp": "204/0078;;71/6819", "google_scholar": "s3DDjLYAAAAJ;https://scholar.google.fr/citations?user=hY-NCjsAAAAJ;gHC1paQAAAAJ", "or_profile": "~Liam_Cripwell1;~Jo\u00ebl_Legrand1;~Claire_Gardent1", "aff": "LORIA;Swiss Federal Institute of Technology Lausanne;CNRS", "aff_domain": "loria.fr;epfl.ch;cnrs.fr", "position": "PhD student;PhD student;Principal Researcher", "bibtex": "@inproceedings{\ncripwell2023simplicity,\ntitle={Simplicity Level Estimate ({SLE}): A Learned Reference-Less Metric for Sentence Simplification},\nauthor={Liam Cripwell and Jo{\\\"e}l Legrand and Claire Gardent},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Gp8EmdJLUj}\n}", "github": "", "project": "", "reviewers": "1Hnr;eCtA;SwXe;bXBN", "site": "https://openreview.net/forum?id=Gp8EmdJLUj", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;1;4;4", "excitement": "4;4;4;3", "reproducibility": "4;5;4;4", "correctness": "3;4;4;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.75, "reproducibility_avg": 4.25, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-3805-6662", "linkedin": ";;claire-gardent-70116341/?originalSubdomain=fr", "aff_unique_index": "0;1;2", "aff_unique_norm": "LORIA;Swiss Federal Institute of Technology Lausanne;Centre National de la Recherche Scientifique", "aff_unique_dep": ";;", "aff_unique_url": "https://www.loria.fr;https://www.epfl.ch;https://www.cnrs.fr", "aff_unique_abbr": ";EPFL;CNRS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Lausanne", "aff_country_unique_index": "0;1;0", "aff_country_unique": "France;Switzerland" }, { "id": "GprvtTwOxy", "title": "Unlearn What You Want to Forget: Efficient Unlearning for LLMs", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) have achieved significant progress from pre-training on and memorizing a wide range of textual data, however, this process might suffer from privacy issues and violations of data protection regulations. As a result, the ability to easily remove data related to individual users from such models while not deteriorating their predictive quality after the removal becomes increasingly important. To address these issues, in this work, we propose an efficient unlearning framework that could efficiently update LLMs without having to retrain the whole model after data removals, by introducing lightweight unlearning layers learned with a selective teacher-student objective into the transformers. In addition, we introduce a fusion mechanism to effectively combine different unlearning layers that learns to forget different sets of data to handle a sequence of forgetting operations. Experiments on classification and generation tasks demonstrate the effectiveness of our proposed methods compared to the state-of-the-art baselines.", "keywords": "Efficient Unlearning;Large Language Model", "primary_area": "", "supplementary_material": "", "author": "Jiaao Chen;Diyi Yang", "authorids": "~Jiaao_Chen2;~Diyi_Yang2", "gender": "M;F", "homepage": "https://cs.stanford.edu/people/jiaaoc/;https://cs.stanford.edu/~diyiy/", "dblp": "230/3663;70/11145", "google_scholar": "Pi9IVvUAAAAJ;j9jhYqQAAAAJ", "or_profile": "~Jiaao_Chen2;~Diyi_Yang2", "aff": "Georgia Institute of Technology;Stanford University", "aff_domain": "gatech.edu;stanford.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nchen2023unlearn,\ntitle={Unlearn What You Want to Forget: Efficient Unlearning for {LLM}s},\nauthor={Jiaao Chen and Diyi Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=GprvtTwOxy}\n}", "github": "", "project": "", "reviewers": "41eo;sR7i;wd9K", "site": "https://openreview.net/forum?id=GprvtTwOxy", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;3;4", "excitement": "3;3;4", "reproducibility": "3;3;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;1", "aff_unique_norm": "Georgia Institute of Technology;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.gatech.edu;https://www.stanford.edu", "aff_unique_abbr": "Georgia Tech;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "Grj9GJUcuZ", "title": "SimCSE++: Improving Contrastive Learning for Sentence Embeddings from Two Perspectives", "track": "main", "status": "Long Main", "tldr": "", "abstract": "This paper improves contrastive learning for sentence embeddings from two perspectives: handling dropout noise and addressing feature corruption. Specifically, for the first perspective, we identify that the dropout noise from negative pairs affects the model's performance. Therefore, we propose a simple yet effective method to deal with such type of noise. Secondly, we pinpoint the rank bottleneck of current solutions to feature corruption and propose a dimension-wise contrastive learning objective to address this issue. Both proposed methods are generic and can be applied to any contrastive learning based models for sentence embeddings. Experimental results on standard benchmarks demonstrate that combining both proposed methods leads to a gain of 1.8 points compared to the strong baseline SimCSE configured with BERT base. Furthermore, applying the proposed method to DiffCSE, another strong contrastive learning based baseline, results in a gain of 1.4 points.", "keywords": "sentence embedding;contrastive learning;dimention-wise contrastive learning", "primary_area": "", "supplementary_material": "", "author": "Jiahao Xu;Wei Shao;Lihui Chen;Lemao Liu", "authorids": "~Jiahao_Xu1;~Wei_Shao5;~Lihui_Chen1;~Lemao_Liu3", "gender": ";M;Unspecified;M", "homepage": "http://jiahao004.github.io/;;;https://lemaoliu.github.io/homepage/", "dblp": "205/4200-1;;56/1277;41/10887.html", "google_scholar": "FlsBVrIAAAAJ;4o57IEAAAAAJ;;", "or_profile": "~Jiahao_Xu1;~Wei_Shao5;~Lihui_Chen1;~lemao_liu1", "aff": "Nanyang Technological University;City University of Hong Kong;Nanyang Technological University;Tencent", "aff_domain": "ntu.edu.sg;cityu.edu.hk;ntu.edu.sg;tencent.com", "position": "PhD student;PhD student;Associate Professor;Researcher", "bibtex": "@inproceedings{\nxu2023simcse,\ntitle={Sim{CSE}++: Improving Contrastive Learning for Sentence Embeddings from Two Perspectives},\nauthor={Jiahao Xu and Wei Shao and Lihui Chen and Lemao Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Grj9GJUcuZ}\n}", "github": "", "project": "", "reviewers": "RiEf;Amxx;3KpX", "site": "https://openreview.net/forum?id=Grj9GJUcuZ", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;5;3", "excitement": "4;4;4", "reproducibility": "4;2;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-6776-8215;;;", "linkedin": ";;;", "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Nanyang Technological University;City University of Hong Kong;Tencent", "aff_unique_dep": ";;Tencent Holdings Limited", "aff_unique_url": "https://www.ntu.edu.sg;https://www.cityu.edu.hk;https://www.tencent.com", "aff_unique_abbr": "NTU;CityU;Tencent", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "Singapore;China" }, { "id": "Gzuzpl4Jje", "title": "Continual Learning for Multilingual Neural Machine Translation via Dual Importance-based Model Division", "track": "main", "status": "Long Main", "tldr": "", "abstract": "A persistent goal of multilingual neural machine translation (MNMT) is to continually adapt the model to support new language pairs or improve some current language pairs without accessing the previous training data. To achieve this, the existing methods primarily focus on preventing catastrophic forgetting by making compromises between the original and new language pairs, leading to sub-optimal performance on both translation tasks. To mitigate this problem, we propose a dual importance-based model division method to divide the model parameters into two parts and separately model the translation of the original and new tasks. Specifically, we first remove the parameters that are negligible to the original tasks but essential to the new tasks to obtain a pruned model, which is responsible for the original translation tasks. Then we expand the pruned model with external parameters and fine-tune the newly added parameters with new training data. The whole fine-tuned model will be used for the new translation tasks. Experimental results show that our method can efficiently adapt the original model to various new translation tasks while retaining the performance of the original tasks. Further analyses demonstrate that our method consistently outperforms several strong baselines under different incremental translation scenarios.", "keywords": "Multilingual Neural Machine Translation;Continual Learning;Model Pruning", "primary_area": "", "supplementary_material": "", "author": "Junpeng Liu;Kaiyu Huang;Hao Yu;Jiuyi Li;Jinsong Su;Degen Huang", "authorids": "~Junpeng_Liu1;~Kaiyu_Huang1;~Hao_Yu16;~Jiuyi_Li1;~Jinsong_Su1;~Degen_Huang1", "gender": "M;M;M;F;M;M", "homepage": ";https://kaiyuhwang.github.io/;;https://github.com/lijiuy;https://cdmc.xmu.edu.cn/info/1010/1054.htm;", "dblp": "125/9435;191/2871;;273/1293;05/9013;67/5547", "google_scholar": "https://scholar.google.com/citations?hl=en;qAp-hS4AAAAJ;EnL3RtwAAAAJ;;;", "or_profile": "~Junpeng_Liu1;~Kaiyu_Huang1;~Hao_Yu16;~Jiuyi_Li1;~Jinsong_Su1;~Degen_Huang1", "aff": ";Tsinghua University;Dalian University of Technology;Dalian University of Technology;Xiamen University;", "aff_domain": ";tsinghua.edu.cn;mail.dlut.edu.cn;mail.dlut.edu.cn;xmu.edu.cn;", "position": ";Postdoc;PhD student;PhD student;Researcher;", "bibtex": "@inproceedings{\nliu2023continual,\ntitle={Continual Learning for Multilingual Neural Machine Translation via Dual Importance-based Model Division},\nauthor={Junpeng Liu and Kaiyu Huang and Hao Yu and Jiuyi Li and Jinsong Su and Degen Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Gzuzpl4Jje}\n}", "github": "", "project": "", "reviewers": "eCJC;5Yj3;Tywa;5LWe", "site": "https://openreview.net/forum?id=Gzuzpl4Jje", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;3;4;3", "excitement": "4;2;4;3", "reproducibility": "5;4;3;3", "correctness": "4;3;3;3", "rating_avg": 4.0, "confidence_avg": 3.5, "excitement_avg": 3.25, "reproducibility_avg": 3.75, "correctness_avg": 3.25, "replies_avg": 13, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-6779-1810;0009-0004-2060-6816;;;", "linkedin": ";;;;;", "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Tsinghua University;Dalian University of Technology;Xiamen University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.dlut.edu.cn/;https://www.xmu.edu.cn", "aff_unique_abbr": "THU;DUT;XMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "H0SoE2ch5l", "title": "Context Quality Matters in Training Fusion-in-Decoder for Extractive Open-Domain Question Answering", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Retrieval-augmented generation models augment knowledge encoded in a language model by providing additional relevant external knowledge (context) during generation. Although it has been shown that the quantity and quality of context impact the performance of retrieval-augmented generation models during inference, limited research explores how these characteristics affect model training. This paper explores how context quantity and quality during model training affect the performance of Fusion-in-Decoder (FiD), the state-of-the-art retrieval-augmented generation model, in extractive open-domain question answering tasks. Experimental results suggest that FiD models overfit to context quality during training and show suboptimal performance when evaluated on different context quality. Through the experimental results, we also reveal FiD models trained with different context quality have different cross-attention distribution patterns. Specifically, as context quality during training increases, FiD models tend to attend more uniformly to each passage in context. Finally, based on these observations, we propose a method to mitigate overfitting to specific context quality by introducing bias to the cross-attention distribution, which we demonstrate to be effective in improving the performance of FiD models on different context quality.", "keywords": "retrieval-augmented generation models;fusion-in-decoder;question answering", "primary_area": "", "supplementary_material": "", "author": "Kosuke Akimoto;Kunihiro Takeoka;Masafumi Oyamada", "authorids": "~Kosuke_Akimoto1;~Kunihiro_Takeoka1;~Masafumi_Oyamada1", "gender": "M;M;M", "homepage": ";https://kuni88.github.io/;https://mooz.github.io/", "dblp": "224/2069;245/3596;28/11004", "google_scholar": ";https://scholar.google.co.jp/citations?user=A5N_jNsAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Kosuke_Akimoto1;~Kunihiro_Takeoka1;~Masafumi_Oyamada1", "aff": "NEC Corporation;NEC;NEC", "aff_domain": "nec.com;nec.com;nec.com", "position": "Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nakimoto2023context,\ntitle={Context Quality Matters in Training Fusion-in-Decoder for Extractive Open-Domain Question Answering},\nauthor={Kosuke Akimoto and Kunihiro Takeoka and Masafumi Oyamada},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=H0SoE2ch5l}\n}", "github": "", "project": "", "reviewers": "wXc7;t9yE;mQfV", "site": "https://openreview.net/forum?id=H0SoE2ch5l", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "4;3;3", "reproducibility": "4;5;4", "correctness": "3;4;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-4045-7350", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "NEC Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.nec.com", "aff_unique_abbr": "NEC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "id": "H5vtCpKisA", "title": "Visually-Situated Natural Language Understanding with Contrastive Reading Model and Frozen Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recent advances in Large Language Models (LLMs) have stimulated a surge of research aimed at extending their applications to the visual domain. While these models exhibit promise in generating abstract image captions and facilitating natural conversations, their performance on text-rich images still requires improvement. In this paper, we introduce Contrastive Reading Model (Cream), a novel neural architecture designed to enhance the language-image understanding capability of LLMs by capturing intricate details that are often overlooked in existing methods. Cream combines vision and auxiliary encoders, fortified by a contrastive feature alignment technique, to achieve a more effective comprehension of language information in visually situated contexts within the images. Our approach bridges the gap between vision and language understanding, paving the way for the development of more sophisticated Document Intelligence Assistants. Through rigorous evaluations across diverse visually-situated language understanding tasks that demand reasoning capabilities, we demonstrate the compelling performance of Cream, positioning it as a prominent model in the field of visual document understanding. We provide our codebase and newly-generated datasets at https://github.com/naver-ai/cream.", "keywords": "Visual Document Understanding;OCR;Contrastive Learning;Multimodal Language Models;Document Visual Question Answering;Transformer-based Models", "primary_area": "", "supplementary_material": "", "author": "Geewook Kim;Hodong Lee;Daehee Kim;Haeji Jung;Sanghee Park;Yoonsik Kim;Sangdoo Yun;Taeho Kil;Bado Lee;Seunghyun Park", "authorids": "~Geewook_Kim1;~Hodong_Lee2;~Daehee_Kim1;~Haeji_Jung1;~Sanghee_Park1;~Yoonsik_Kim2;~Sangdoo_Yun1;~Taeho_Kil1;~Bado_Lee1;~Seunghyun_Park2", "gender": ";M;M;F;M;M;M;M;M;M", "homepage": "https://geewook.kim;;;https://letme-hj.github.io/;;;https://sangdooyun.github.io/;;https://scholar.google.com/citations?user=UAcfGOgAAAAJ&hl=en;", "dblp": "227/2171;44/4663;31/373-3;348/5971;180/6059;194/2556;124/3009.html;315/9685;83/8373;", "google_scholar": "1a2QbgEAAAAJ;XRuGyvkAAAAJ;x_tWgpsAAAAJ;wPT3kwkAAAAJ;_32kh3MAAAAJ;nuxd_BsAAAAJ;o0qtjzYAAAAJ;https://scholar.google.co.kr/citations?user=cV4h5MsAAAAJ;;iowjmTwAAAAJ", "or_profile": "~Geewook_Kim1;~Hodong_Lee2;~Daehee_Kim1;~Haeji_Jung1;~Sanghee_Park1;~Yoonsik_Kim2;~Sangdoo_Yun1;~Taeho_Kil1;~Bado_Lee1;~Seunghyun_Park2", "aff": "NAVER;NAVER;NAVER Cloud;NAVER;NAVER \bCloud;NAVER Cloud;NAVER;NAVER;NAVER Cloud;NAVER Cloud", "aff_domain": "navercorp.com;navercorp.com;navercorp.com;navercorp.com;navercorp.com;navercorp.com;navercorp.com;navercorp.com;navercorp.com;navercorp.com", "position": "Researcher;Researcher;Researcher;Intern;Researcher;Researcher;Research Scientist;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nkim2023visuallysituated,\ntitle={Visually-Situated Natural Language Understanding with Contrastive Reading Model and Frozen Large Language Models},\nauthor={Geewook Kim and Hodong Lee and Daehee Kim and Haeji Jung and Sanghee Park and Yoonsik Kim and Sangdoo Yun and Taeho Kil and Bado Lee and Seunghyun Park},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=H5vtCpKisA}\n}", "github": "", "project": "", "reviewers": "ZnEz;h5KP;oQuR", "site": "https://openreview.net/forum?id=H5vtCpKisA", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "4;4;4", "reproducibility": "4;4;3", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 10, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-1833-663X;0000-0001-9676-9604;0009-0008-8347-7432;;0000-0001-8023-8278;;0000-0003-1607-2079;;0000-0002-8509-9163", "linkedin": ";;;haeji-jung-561099240;;yoonsik-kim-b31a0b14b?originalSubdomain=kr;;;;seunghyun-park-716a1514a/?locale=en_US", "aff_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_unique_norm": "NAVER Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.naver.com", "aff_unique_abbr": "NAVER", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "HFbtrmefx7", "title": "FedTherapist: Mental Health Monitoring with User-Generated Linguistic Expressions on Smartphones via Federated Learning", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Psychiatrists diagnose mental disorders via the linguistic use of patients. Still, due to data privacy, existing passive mental health monitoring systems use alternative features such as activity, app usage, and location via mobile devices. We propose FedTherapist, a mobile mental health monitoring system that utilizes continuous speech and keyboard input in a privacy-preserving way via federated learning. We explore multiple model designs by comparing their performance and overhead for FedTherapist to overcome the complex nature of on-device language model training on smartphones. We further propose a Context-Aware Language Learning (CALL) methodology to effectively utilize smartphones' large and noisy text for mental health signal sensing. Our IRB-approved evaluation of the prediction of self-reported depression, stress, anxiety, and mood from 46 participants shows higher accuracy of FedTherapist compared with the performance with non-language features, achieving 0.15 AUROC improvement and 8.21% MAE reduction.", "keywords": "mental health monitoring;federated learning;mobile healthcare;on-device ML;speech", "primary_area": "", "supplementary_material": "", "author": "Jaemin Shin;Hyungjun Yoon;Seungjoo Lee;Sungjoon Park;Yunxin Liu;Jinho D. Choi;Sung-Ju Lee", "authorids": "~Jaemin_Shin1;~Hyungjun_Yoon1;~Seungjoo_Lee1;~Sungjoon_Park1;~Yunxin_Liu2;~Jinho_D._Choi1;~Sung-Ju_Lee1", "gender": "M;M;M;M;;M;M", "homepage": "https://jaemin-shin.github.io/;https://www.hyungjun-yoon.com/;;https://sungjoonpark.github.io;;http://www.cs.emory.edu/~choi;https://nmsl.kaist.ac.kr/sjlee", "dblp": ";;;63/1326;;17/8156;28/1552", "google_scholar": "d94q-zQAAAAJ;https://scholar.google.com/citations?hl=en;;bDihJCQAAAAJ;;xdddblAAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Jaemin_Shin1;~Hyungjun_Yoon1;~Seungjoo_Lee1;~Sungjoon_Park1;~Yunxin_Liu2;~Jinho_D._Choi1;~Sung-Ju_Lee1", "aff": "Cisco;Korea Advanced Institute of Science & Technology;KAIST;Korea Advanced Institute of Science & Technology;;Emory University;Korea Advanced Institute of Science & Technology", "aff_domain": "cisco.com;kaist.edu;ee.kaist.ac.kr;kaist.ac.kr;;emory.edu;kaist.ac.kr", "position": "Intern;PhD student;MS student;PhD student;;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nshin2023fedtherapist,\ntitle={FedTherapist: Mental Health Monitoring with User-Generated Linguistic Expressions on Smartphones via Federated Learning},\nauthor={Jaemin Shin and Hyungjun Yoon and Seungjoo Lee and Sungjoon Park and Yunxin Liu and Jinho D. Choi and Sung-Ju Lee},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=HFbtrmefx7}\n}", "github": "", "project": "", "reviewers": "PGBC;YJZA;Y3PY", "site": "https://openreview.net/forum?id=HFbtrmefx7", "pdf_size": 0, "rating": "5;5;5", "confidence": "1;4;3", "excitement": "3;4;3", "reproducibility": "2;3;2", "correctness": "3;3;3", "rating_avg": 5.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0009-0003-3359-3842;;;0000-0003-2693-6934;0000-0002-5518-2126", "linkedin": ";;seungjoolee99/;sungjoon-park-815b6456/;;jinho-choi/;sungjulee/", "aff_unique_index": "0;1;1;1;2;1", "aff_unique_norm": "Cisco Systems;Korea Advanced Institute of Science and Technology;Emory University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cisco.com;https://www.kaist.ac.kr;https://www.emory.edu", "aff_unique_abbr": "Cisco;KAIST;Emory", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0;1", "aff_country_unique": "United States;South Korea" }, { "id": "HGLvAAKNKx", "title": "An Empirical Study of Translation Hypothesis Ensembling with Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) are becoming a one-fits-many solution, but they sometimes hallucinate or produce unreliable output. In this paper, we investigate how hypothesis ensembling can improve the quality of the generated text for the specific problem of LLM-based machine translation. We experiment with several techniques for ensembling hypotheses produced by LLMs such as ChatGPT, LLaMA, and Alpaca. We provide a comprehensive study along multiple dimensions, including the method to generate hypotheses (multiple prompts, temperature-based sampling, and beam search) and the strategy to produce the final translation (instruction-based, quality-based reranking, and minimum Bayes risk (MBR) decoding). Our results show that MBR decoding is a very effective method, that translation quality can be improved using a small number of samples, and that instruction tuning has a strong impact on the relation between the diversity of the hypotheses and the sampling temperature.", "keywords": "Translation;Large Language Models;hypothesis ensembling;reranking;MBR decoding", "primary_area": "", "supplementary_material": "", "author": "Ant\u00f3nio Farinhas;Jos\u00e9 G. C. de Souza;Andre Martins", "authorids": "~Ant\u00f3nio_Farinhas1;~Jos\u00e9_G._C._de_Souza1;~Andre_Martins1", "gender": "M;M;M", "homepage": ";https://andre-martins.github.io/;", "dblp": "267/5345;m/AndreFTMartins;66/1087", "google_scholar": "yK5wIPkAAAAJ;https://scholar.google.pt/citations?user=mT7ppvwAAAAJ;20ApDosAAAAJ", "or_profile": "~Ant\u00f3nio_Farinhas1;~Andre_Martins1;~Jos\u00e9_Guilherme_Camargo_de_Souza2", "aff": "Instituto Superior T\u00e9cnico;Unbabel;Unbabel", "aff_domain": "tecnico.ulisboa.pt;unbabel.com;unbabel.com", "position": "PhD student;Research Scientist;Researcher", "bibtex": "@inproceedings{\nfarinhas2023an,\ntitle={An Empirical Study of Translation Hypothesis Ensembling with Large Language Models},\nauthor={Ant{\\'o}nio Farinhas and Jos{\\'e} G. C. de Souza and Andre Martins},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=HGLvAAKNKx}\n}", "github": "", "project": "", "reviewers": "UV4z;aEq9;uVRh", "site": "https://openreview.net/forum?id=HGLvAAKNKx", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;5", "excitement": "3;4;4", "reproducibility": "4;4;5", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-6344-7633", "linkedin": ";;josesouza/", "aff_unique_index": "0;1;1", "aff_unique_norm": "Instituto Superior T\u00e9cnico;Unbabel", "aff_unique_dep": ";", "aff_unique_url": "https://www.ist.utl.pt;https://www.unbabel.com", "aff_unique_abbr": "IST;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Portugal" }, { "id": "HIBDxkl5n4", "title": "Continual Event Extraction with Semantic Confusion Rectification", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We study continual event extraction, which aims to extract incessantly emerging event information while avoiding forgetting. We observe that the semantic confusion on event types stems from the annotations of the same text being updated over time. The imbalance between event types even aggravates this issue. This paper proposes a novel continual event extraction model with semantic confusion rectification. We mark pseudo labels for each sentence to alleviate semantic confusion. We transfer pivotal knowledge between current and previous models to enhance the understanding of event types. Moreover, we encourage the model to focus on the semantics of long-tailed event types by leveraging other associated types. Experimental results show that our model outperforms state-of-the-art baselines and is proficient in imbalanced datasets.", "keywords": "Event extraction;Continual learning;Semantic confusion;Knowledge transfer", "primary_area": "", "supplementary_material": "", "author": "Zitao Wang;Xinyi Wang;Wei Hu", "authorids": "~Zitao_Wang1;~Xinyi_Wang5;~Wei_Hu7", "gender": "M;;M", "homepage": "https://github.com/njuwzt;http://ws2.nju.edu.cn/kgwiki/;http://ws.nju.edu.cn/~whu", "dblp": ";14/7249-10;https://dblp.uni-trier.de/pid/52/173-7", "google_scholar": "hntRWkIAAAAJ;;iWs168sAAAAJ", "or_profile": "~Zitao_Wang1;~Xinyi_Wang5;~Wei_Hu7", "aff": "Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nwang2023continual,\ntitle={Continual Event Extraction with Semantic Confusion Rectification},\nauthor={Zitao Wang and Xinyi Wang and Wei Hu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=HIBDxkl5n4}\n}", "github": "", "project": "", "reviewers": "ssjz;4NER;p7bH", "site": "https://openreview.net/forum?id=HIBDxkl5n4", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "4;4;4", "reproducibility": "3;3;3", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-3635-6335", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "HIPPG2SH3u", "title": "Unified Representation for Non-compositional and Compositional Expressions", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Accurate processing of non-compositional language relies on generating good representations for such expressions. In this work, we study the representation of language non-compositionality by proposing a language model, PIER+, that builds on BART and can create semantically meaningful and contextually appropriate representations for English potentially idiomatic expressions (PIEs). PIEs are characterized by their non-compositionality and contextual ambiguity in their literal and idiomatic interpretations. Via intrinsic evaluation on embedding quality and extrinsic evaluation on PIE processing and NLU tasks, we show that representations generated by PIER+ result in 33\\% higher homogeneity score for embedding clustering than BART, whereas 3.12\\% and 3.29\\% gains in accuracy and sequence accuracy for PIE sense classification and span detection compared to the state-of-the-art IE representation model, GIEA. These gains are achieved without sacrificing PIER+'s performance on NLU tasks (+/- 1\\% accuracy) compared to BART.", "keywords": "Potentially Idiomatic Expression; Non-compositionality; Phrase Embedding; Idiomatic Expression Processing", "primary_area": "", "supplementary_material": "", "author": "Ziheng Zeng;Suma Bhat", "authorids": "~Ziheng_Zeng1;~Suma_Bhat1", "gender": "M;", "homepage": ";", "dblp": ";66/9013", "google_scholar": ";https://scholar.google.com/citations?hl=en", "or_profile": "~Ziheng_Zeng1;~Suma_Bhat1", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;illinois.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzeng2023unified,\ntitle={Unified Representation for Non-compositional and Compositional Expressions},\nauthor={Ziheng Zeng and Suma Bhat},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=HIPPG2SH3u}\n}", "github": "", "project": "", "reviewers": "ZgHk;uedZ;mdhy", "site": "https://openreview.net/forum?id=HIPPG2SH3u", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;5", "excitement": "3;3;3", "reproducibility": "3;3;5", "correctness": "2;4;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "HJTbcidL5a", "title": "TokenDrop + BucketSampler: Towards Efficient Padding-free Fine-tuning of Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The great success of Language Models (LMs) for various Natural Language Processing (NLP) tasks is accompanied by computational challenges during both pre-training and fine-tuning. Pre-training has attracted significant attention due to its huge computational footprint. We focus on the fine-tuning of pre-trained LMs, which is expected to be performed much more frequently as the pre-trained models are adapted to downstream tasks. During fine-tuning, the presence of variable-length input sequences necessitates the use of padding tokens when batching sequences. These padding tokens lead to ineffectual computations, adversely impacting the efficiency of fine-tuning. We also observe that LMs memorize the limited task-specific training data despite the use of known regularization methods. Based on these insights, we present TokenDrop + BucketSampler, a framework that simultaneously improves efficiency and accuracy of LM fine-tuning. BucketSampler generates batches of samples with lower variance in sequence lengths to reduce the number of padding tokens, but does so without the accompanying accuracy drop seen in previous approaches. TokenDrop is a new regularizer that prunes a random subset of insignificant tokens from each input sequence in every epoch to prevent overfitting. TokenDrop drops more tokens from the longer sequences in each batch to further reduce variance in input lengths and the need for padding. TokenDrop + BucketSampler accelerates fine-tuning on diverse downstream tasks by up to 10.61X, while also producing models that are up to 1.17% more accurate compared to conventional fine-tuning. Code is available at https://github.com/amrnag/TokenDrop-BucketSampler.\n\n.", "keywords": "Efficient training of LLMs;Efficient fine-tuning of LLMs;Padding-free variable sequence length batching;Token pruning;Regularizer for LLM training;TokenDrop;BucketSampler", "primary_area": "", "supplementary_material": "", "author": "Amrit Nagarajan;Anand Raghunathan", "authorids": "~Amrit_Nagarajan1;~Anand_Raghunathan1", "gender": "M;", "homepage": ";https://engineering.purdue.edu/~araghu/", "dblp": ";74/3747.html", "google_scholar": ";OP7F8jEAAAAJ", "or_profile": "~Amrit_Nagarajan1;~Anand_Raghunathan1", "aff": "Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nnagarajan2023tokendrop,\ntitle={TokenDrop + BucketSampler: Towards Efficient Padding-free Fine-tuning of Language Models},\nauthor={Amrit Nagarajan and Anand Raghunathan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=HJTbcidL5a}\n}", "github": "", "project": "", "reviewers": "4fLH;4LK5;QVce", "site": "https://openreview.net/forum?id=HJTbcidL5a", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;4;3", "reproducibility": "4;5;3", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": "https://in.linkedin.com/in/amrit-nagarajan-8a99b0152;", "aff_unique_index": "0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "HKMvR1UaWH", "title": "SYMPTOMIFY: Transforming Symptom Annotations with Language Model Knowledge Harvesting", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Given the high-stakes nature of healthcare decision-making, we aim to improve the efficiency of human annotators rather than replacing them with fully automated solutions. We introduce a new comprehensive resource, SYMPTOMIFY, a dataset of annotated vaccine adverse reaction reports detailing individual vaccine reactions. The dataset, consisting of over 800k reports, surpasses previous datasets in size. Notably, it features reasoning-based explanations alongside background knowledge obtained via language model knowledge harvesting. We evaluate performance across various methods and learning paradigms, paving the way for future comparisons and benchmarking.", "keywords": "Symptom Recognition", "primary_area": "", "supplementary_material": "", "author": "Bosung Kim;Ndapa Nakashole", "authorids": "~Bosung_Kim1;~Ndapa_Nakashole2", "gender": "F;", "homepage": "http://bosung.github.io;", "dblp": ";", "google_scholar": "gbFNtPUAAAAJ;", "or_profile": "~Bosung_Kim1;~Ndapa_Nakashole2", "aff": "University of California, San Diego;", "aff_domain": "ucsd.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\nkim2023symptomify,\ntitle={{SYMPTOMIFY}: Transforming Symptom Annotations with Language Model Knowledge Harvesting},\nauthor={Bosung Kim and Ndapa Nakashole},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=HKMvR1UaWH}\n}", "github": "", "project": "", "reviewers": "jY5d;7Q9D;Y8AL", "site": "https://openreview.net/forum?id=HKMvR1UaWH", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "3;4;4", "reproducibility": "3;4;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "HMVNu8oKAK", "title": "Enhancing Textbooks with Visuals from the Web for Improved Learning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Textbooks are one of the main mediums for delivering high-quality education to students.\nIn particular, explanatory and illustrative visuals play a key role in retention, comprehension and general transfer of knowledge.\nHowever, many textbooks lack these interesting visuals to support student learning.\nIn this paper, we investigate the effectiveness of vision-language models to automatically enhance textbooks with images from the web.\nWe collect a dataset of e-textbooks in the math, science, social science and business domains.\nWe then set up a text-image matching task that involves retrieving and appropriately assigning web images to textbooks, which we frame as a matching optimization problem.\nThrough a crowd-sourced evaluation, we verify that (1) while the original textbook images are rated higher, automatically assigned ones are not far behind, and (2) the precise formulation of the optimization problem matters.\nWe release the dataset of textbooks with an associated image bank to inspire further research in this intersectional area of computer vision and NLP for education.", "keywords": "textbooks;learning;education;images", "primary_area": "", "supplementary_material": "", "author": "Janvijay Singh;Vil\u00e9m Zouhar;Mrinmaya Sachan", "authorids": "~Janvijay_Singh1;~Vil\u00e9m_Zouhar1;~Mrinmaya_Sachan3", "gender": "M;Not Specified;M", "homepage": "http://iamjanvijay.github.io;https://vilda.net;https://sites.google.com/site/mrinsachan/", "dblp": ";254/1832;86/10440.html", "google_scholar": ";2EUDwtkAAAAJ;Tpp9ZjoAAAAJ", "or_profile": "~Janvijay_Singh1;~Vil\u00e9m_Zouhar1;~MRINMAYA_SACHAN2", "aff": "Georgia Institute of Technology;Amazon;Swiss Federal Institute of Technology", "aff_domain": "gatech.edu;amazon.com;ethz.ch", "position": "MS student;Intern;Assistant Professor", "bibtex": "@inproceedings{\nsingh2023enhancing,\ntitle={Enhancing Textbooks with Visuals from the Web for Improved Learning},\nauthor={Janvijay Singh and Vil{\\'e}m Zouhar and Mrinmaya Sachan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=HMVNu8oKAK}\n}", "github": "", "project": "", "reviewers": "r6d1;Ymez;XsLy", "site": "https://openreview.net/forum?id=HMVNu8oKAK", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;3", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "5;3;3", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";vil%C3%A9m-zouhar-192988288/;", "aff_unique_index": "0;1;2", "aff_unique_norm": "Georgia Institute of Technology;Amazon;Swiss Federal Institute of Technology", "aff_unique_dep": ";Amazon.com, Inc.;", "aff_unique_url": "https://www.gatech.edu;https://www.amazon.com;https://www.ethz.ch", "aff_unique_abbr": "Georgia Tech;Amazon;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Switzerland" }, { "id": "HNfwD7QOaq", "title": "Large-scale similarity search with Optimal Transport", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Wasserstein distance is a powerful tool for comparing probability distributions and is widely used for document classification and retrieval tasks in NLP. In particular, it is known as the word mover's distance (WMD) in the NLP community. WMD exhibits excellent performance for various NLP tasks; however, one of its limitations is its computational cost and thus is not useful for large-scale distribution comparisons. In this study, we propose a simple and effective nearest neighbor search based on the Wasserstein distance. Specifically, we employ the L1 embedding method based on the tree-based Wasserstein approximation and subsequently used the nearest neighbor search to efficiently find the $k$-nearest neighbors. Through benchmark experiments, we demonstrate that the proposed approximation has comparable performance to the vanilla Wasserstein distance and can be computed three orders of magnitude faster than the vanilla Wasserstein distance.", "keywords": "Optimal transport;Wasserstein distance;Document Classification", "primary_area": "", "supplementary_material": "", "author": "Cl\u00e9a Mehnia Laouar;Yuki Takezawa;Makoto Yamada", "authorids": "~Cl\u00e9a_Mehnia_Laouar1;~Yuki_Takezawa1;~Makoto_Yamada3", "gender": "F;M;M", "homepage": ";https://yukitakezawa.github.io/;https://groups.oist.jp/mlds", "dblp": ";284/1294;56/4937", "google_scholar": ";eaKQb8IAAAAJ;1cKNu1gAAAAJ", "or_profile": "~Cl\u00e9a_Mehnia_Laouar1;~Yuki_Takezawa1;~Makoto_Yamada3", "aff": "Okinawa Institute of Science and Technology (OIST);Kyoto University;Kyoto University", "aff_domain": "oist.jp;kyoto-u.ac.jp;kyoto-u.ac.jp", "position": "PhD student;MS student;Associate Professor", "bibtex": "@inproceedings{\nlaouar2023largescale,\ntitle={Large-scale similarity search with Optimal Transport},\nauthor={Cl{\\'e}a Mehnia Laouar and Yuki Takezawa and Makoto Yamada},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=HNfwD7QOaq}\n}", "github": "", "project": "", "reviewers": "8vBA;Dg6r;ZksN", "site": "https://openreview.net/forum?id=HNfwD7QOaq", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "4;3;4", "reproducibility": "3;3;5", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-8532-2775;", "linkedin": "cl\u00e9a-laouar-508a121bb/;;", "aff_unique_index": "0;1;1", "aff_unique_norm": "Okinawa Institute of Science and Technology;Kyoto University", "aff_unique_dep": ";", "aff_unique_url": "https://www.oist.jp;https://www.kyoto-u.ac.jp", "aff_unique_abbr": "OIST;Kyoto U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "id": "HQfzPDZJAL", "title": "Expository Text Generation: Imitate, Retrieve, Paraphrase", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Expository documents are vital resources for conveying complex information to readers. Despite their usefulness, writing expository text by hand is a challenging process that requires careful content planning, obtaining facts from multiple sources, and the ability to clearly synthesize these facts. To ease these burdens, we propose the task of expository text generation, which seeks to automatically generate an accurate and stylistically consistent expository text for a topic by intelligently searching a knowledge source. We solve our task by developing IRP, a framework that overcomes the limitations of retrieval-augmented models and iteratively performs content planning, fact retrieval, and rephrasing. Through experiments on three diverse, newly-collected datasets, we show that IRP produces factual and organized expository texts that accurately inform readers.", "keywords": "Text Generation;Factuality;Style-guided Generation;Retrieval-augmented Generation", "primary_area": "", "supplementary_material": "", "author": "Nishant Balepur;Jie Huang;Kevin Chang", "authorids": "~Nishant_Balepur1;~Jie_Huang3;~Kevin_Chang1", "gender": "M;;M", "homepage": "https://nbalepur.github.io/;https://jeffhj.github.io/;https://siebelschool.illinois.edu/about/people/faculty/kcchang", "dblp": "346/4871;29/6643-9;c/KCCChang", "google_scholar": "G8_fojUAAAAJ;GIoPkMoAAAAJ;https://scholar.google.com.tw/citations?user=sugWZ6MAAAAJ", "or_profile": "~Nishant_Balepur1;~Jie_Huang3;~Kevin_Chang1", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "cs.illinois.edu;illinois.edu;illinois.edu", "position": "Undergrad student;PhD student;Full Professor", "bibtex": "@inproceedings{\nbalepur2023expository,\ntitle={Expository Text Generation: Imitate, Retrieve, Paraphrase},\nauthor={Nishant Balepur and Jie Huang and Kevin Chang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=HQfzPDZJAL}\n}", "github": "", "project": "", "reviewers": "Q3e3;yjk6;7sKR", "site": "https://openreview.net/forum?id=HQfzPDZJAL", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "4;4;4", "reproducibility": "5;4;5", "correctness": "5;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.666666666666667, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-0997-6803", "linkedin": "nishant-balepur-a03818107/;jie-huang-4b0104151/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "HR90GXVHUn", "title": "Once Upon a ${\\it Time}$ in ${\\it Graph}$: Relative-Time Pretraining for Complex Temporal Reasoning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Our physical world is constantly evolving over time, rendering challenges for pre-trained language models to understand and reason over the temporal contexts of texts. Existing work focuses on strengthening the direct association between a piece of text and its time-stamp. However, the knowledge-time association is usually insufficient for the downstream tasks that require reasoning over temporal dependencies between knowledge. In this work, we make use of the underlying nature of time, all temporally-scoped sentences are strung together through a one-dimensional time axis, and suggest creating a graph structure based on the relative placements of events along the time axis. Inspired by the graph view, we propose \\textsc{RemeMo} ($\\underline{Re}lative Ti\\underline{me} \\underline{Mo}deling$), which explicitly connects all temporally-scoped facts by modeling the time relations between any two sentences. Experimental results show that \\textsc{RemeMo} outperforms the baseline T5 on multiple temporal question answering datasets under various settings. Further analysis suggests that \\textsc{RemeMo} is especially good at modeling long-range complex temporal dependencies.", "keywords": "Temporal Question Answering;Time-aware Pre-training", "primary_area": "", "supplementary_material": "", "author": "Sen Yang;Xin Li;Lidong Bing;Wai Lam", "authorids": "~Sen_Yang6;~Xin_Li40;~Lidong_Bing2;~Wai_Lam1", "gender": ";M;;M", "homepage": ";https://lixin4ever.github.io/;;http://www.se.cuhk.edu.hk/~textmine", "dblp": ";09/1365-56.html;;48/1707", "google_scholar": ";https://scholar.google.com.hk/citations?user=syD9lxQAAAAJ;;ewA4NAcAAAAJ", "or_profile": "~Sen_Yang6;~Xin_Li40;~Lidong_Bing2;~Wai_Lam1", "aff": ";Alibaba Group;;The Chinese University of Hong Kong", "aff_domain": ";alibaba-inc.com;;cuhk.edu.hk", "position": ";Researcher;;Professor", "bibtex": "@inproceedings{\nyang2023once,\ntitle={Once Upon a \\$\\{{\\textbackslash}it Time\\}\\$ in \\$\\{{\\textbackslash}it Graph\\}\\$: Relative-Time Pretraining for Complex Temporal Reasoning},\nauthor={Sen Yang and Xin Li and Lidong Bing and Wai Lam},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=HR90GXVHUn}\n}", "github": "", "project": "", "reviewers": "r1Nz;ZZGY;bSAa", "site": "https://openreview.net/forum?id=HR90GXVHUn", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;2;4", "excitement": "3;4;4", "reproducibility": "4;4;5", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;1", "aff_unique_norm": "Alibaba Group;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.alibaba.com;https://www.cuhk.edu.hk", "aff_unique_abbr": "Alibaba;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "HS5BWSqK5I", "title": "Efficient Cross-Task Prompt Tuning for Few-Shot Conversational Emotion Recognition", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Emotion Recognition in Conversation (ERC) has been widely studied due to its importance in developing emotion-aware empathetic machines. The rise of pre-trained language models (PLMs) has further pushed the limit of ERC performance. However, most recent works on ERC using PLMs are heavily data-driven, and requires fine-tuning the entire PLMs. To improve both sample and computational efficiency, we propose a derivative-free optimization method called Cross-Task Prompt Tuning (CTPT) for few-shot conversational emotion recognition. Unlike existing methods that learn independent knowledge from individual tasks, CTPT leverages sharable cross-task knowledge by exploiting external knowledge from other source tasks to improve learning performance under the few-shot setting. Moreover, CTPT only needs to optimize a vector under the low intrinsic dimensionality without gradient, which is highly parameter-efficient compared with existing approaches. Experiments on five different contextual conversation datasets demonstrate that our CTPT method has superior results on both few-shot scenarios and zero-shot transfers.", "keywords": "Prompt Tuning;Emotion Recognition in Conversation;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "Yige Xu;Zhiwei Zeng;Zhiqi Shen", "authorids": "~Yige_Xu1;~Zhiwei_Zeng1;~Zhiqi_Shen2", "gender": "M;F;M", "homepage": "https://xuyige.github.io;https://www.researchgate.net/profile/Zhiwei-Zeng-2;https://dr.ntu.edu.sg/cris/rp/rp00227", "dblp": "224/1771-1;;03/1554-1.html", "google_scholar": "ZIWFQ-gAAAAJ;6eiLXmcAAAAJ;https://scholar.google.com.sg/citations?user=EA2T_lwAAAAJ", "or_profile": "~Yige_Xu1;~Zhiwei_Zeng1;~Zhiqi_Shen2", "aff": "Nanyang Technological University;Nanyang Technological University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu.sg;ntu.edu.sg", "position": "PhD student;Postdoc;Lecturer", "bibtex": "@inproceedings{\nxu2023efficient,\ntitle={Efficient Cross-Task Prompt Tuning for Few-Shot Conversational Emotion Recognition},\nauthor={Yige Xu and Zhiwei Zeng and Zhiqi Shen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=HS5BWSqK5I}\n}", "github": "", "project": "", "reviewers": "q8Be;ESEW;iGEX", "site": "https://openreview.net/forum?id=HS5BWSqK5I", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "4;2;3", "reproducibility": "4;3;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-6718-1251;0000-0002-7787-5644;0000-0001-7626-7295", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Nanyang Technological University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.sg", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "id": "HUzbEPMd6v", "title": "SPT: Learning to Selectively Insert Prompts for Better Prompt Tuning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Prompt tuning prepends a soft prompt to the input embeddings or hidden states and only optimizes the prompt to adapt pretrained models (PTMs) to downstream tasks. The previous work manually selects prompt layers which are far from optimal and failed to exploit the potential of prompt tuning. In this work, we propose a novel framework, \\underline{S}elective \\underline{P}rompt \\underline{T}uning (SPT), that learns to select the proper prompt layers by inserting a prompt controlled by a learnable probabilistic gate at each intermediate layer. We further propose a novel bi-level optimization framework, SPT-DARTS, that can better optimize the learnable gates and improve the final prompt tuning performances of the learned prompt layer settings. We conduct extensive experiments with ten benchmark datasets under the full-data and few-shot scenarios. The results demonstrate that our SPT framework can perform better than the previous state-of-the-art PETuning baselines with comparable or fewer tunable parameters. \\footnote{Codes will be publicly available upon acceptance. }", "keywords": "Prompt tuning;neural architecture search;parameter efficient tuning", "primary_area": "", "supplementary_material": "", "author": "Wei Zhu;Ming Tan", "authorids": "~Wei_Zhu7;~Ming_Tan3", "gender": "M;", "homepage": "https://www.researchgate.net/profile/Wei-Zhu-111;", "dblp": "83/4805-16.html;", "google_scholar": "EF5J_BYAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Wei_Zhu7;~Ming_Tan3", "aff": "University of Hong Kong;", "aff_domain": "hku.hk;", "position": "Researcher;", "bibtex": "@inproceedings{\nzhu2023spt,\ntitle={{SPT}: Learning to Selectively Insert Prompts for Better Prompt Tuning},\nauthor={Wei Zhu and Ming Tan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=HUzbEPMd6v}\n}", "github": "", "project": "", "reviewers": "yfyq;XNTB;AMiu", "site": "https://openreview.net/forum?id=HUzbEPMd6v", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "3;3;4", "reproducibility": "4;3;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-6389-6866;", "linkedin": ";", "aff_unique_index": "0", "aff_unique_norm": "University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.hku.hk", "aff_unique_abbr": "HKU", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "HYxJoAWLgT", "title": "Decoding Stumpers: Large Language Models vs. Human Problem-Solvers", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "This paper investigates the problem-solving capabilities of Large Language Models (LLMs) by evaluating their performance on stumpers, unique single-step intuition problems that pose challenges for human solvers but are easily verifiable. We compare the performance of four state-of-the-art LLMs (Davinci-2, Davinci-3, GPT-3.5-Turbo, GPT-4) to human participants. Our findings reveal that the new-generation LLMs excel in solving stumpers and surpass human performance. However, humans exhibit superior skills in verifying solutions to the same problems. This research enhances our understanding of LLMs' cognitive abilities and provides insights for enhancing their problem-solving potential across various domains.", "keywords": "Large Language Models;Problem-solving abilities;Stumpers;Cognitive abilities;Human performance;Riddles", "primary_area": "", "supplementary_material": "", "author": "Alon Goldstein;Miriam Havin;Roi Reichart;Ariel Goldstein", "authorids": "~Alon_Goldstein1;~Miriam_Havin1;~Roi_Reichart1;~Ariel_Goldstein1", "gender": "M;F;M;M", "homepage": "http://www.xoltar.com;;https://roireichart.com/;https://www.deepcognitionlab.com/", "dblp": ";;96/5429;", "google_scholar": ";;https://scholar.google.co.il/citations?user=xXJIsh4AAAAJ;p8hQgVuVOTgC", "or_profile": "~Alon_Goldstein1;~Miriam_Havin1;~Roi_Reichart1;~Ariel_Goldstein1", "aff": "XOLTAR;Hebrew University of Jerusalem;Technion, Israel Institute of Technology;Hebrew University of Jerusalem", "aff_domain": "xoltar.com;huji.ac.il;technion.ac.il;huji.ac.il", "position": "Chief Behavioral Officer;Undergrad student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\ngoldstein2023decoding,\ntitle={Decoding Stumpers: Large Language Models vs. Human Problem-Solvers},\nauthor={Alon Goldstein and Miriam Havin and Roi Reichart and Ariel Goldstein},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=HYxJoAWLgT}\n}", "github": "", "project": "", "reviewers": "WvWv;zMZm;2ch8", "site": "https://openreview.net/forum?id=HYxJoAWLgT", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;3;4", "excitement": "3;3;4", "reproducibility": "3;4;5", "correctness": "2;3;3", "rating_avg": 2.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-8320-1670;;;", "linkedin": "alongo-phd;miriam-havin-846a49244;roi-reichart-ba2a8a7/;", "aff_unique_index": "0;1;2;1", "aff_unique_norm": "XOLTAR;Hebrew University of Jerusalem;Israel Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": ";https://www.huji.ac.il;https://www.technion.ac.il/en/", "aff_unique_abbr": ";HUJI;Technion", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Jerusalem", "aff_country_unique_index": "1;1;1", "aff_country_unique": ";Israel" }, { "id": "HaSS8a3Oe7", "title": "Can Language Models Understand Physical Concepts?", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Language models (LMs) gradually become general-purpose interfaces in the interactive and embodied world, where the understanding of physical concepts is an essential prerequisite.\nHowever, it is unclear whether LMs can understand physical concepts in the human world. \nTo investigate this, we design a benchmark VEC that covers the tasks of (i) Visual concepts, such as the shape and material of objects, and (ii) Embodied Concepts, learned from the interaction with the world such as the temperature of objects. \nOur zero (few)-shot prompting results show that\nthe understanding of certain visual concepts\nemerges as scaling up LMs, but there are still\nbasic concepts to which the scaling law does not apply.\nFor example, OPT-175B performs close to humans with a zero-shot accuracy of $85$\\% on the material concept, yet behaves like random guessing on the mass concept.\nInstead, vision-augmented LMs such as CLIP and BLIP achieve a human-level understanding of embodied concepts.\nAnalysis indicates that the rich semantics in visual representation can serve as a valuable source of embodied knowledge. Inspired by this, we propose a distillation method to transfer embodied knowledge from VLMs to LMs, achieving performance gain comparable with that by scaling up parameters of LMs $134\\times$. Our dataset is available at https://github.com/TobiasLee/VEC.", "keywords": "large language models;embodied concept understanding", "primary_area": "", "supplementary_material": "", "author": "Lei Li;Jingjing Xu;Qingxiu Dong;Ce Zheng;Xu Sun;Lingpeng Kong;Qi Liu", "authorids": "~Lei_Li14;~Jingjing_Xu1;~Qingxiu_Dong1;~Ce_Zheng2;~Xu_Sun1;~Lingpeng_Kong1;~Qi_Liu5", "gender": "F;F;M;M;M;M;M", "homepage": ";https://dqxiu.github.io/;;https://xusun.org/;https://ikekonglp.github.io/;http://leuchine.github.io/;https://lilei-nlp.github.io", "dblp": "25/624;284/0673;99/6967;37/1971-1;144/7656;;13/7007-39", "google_scholar": ";ibcR7VkAAAAJ;r7qFs7UAAAAJ;https://scholar.google.com/citations?hl=en;f1hBi5wAAAAJ;Y-OeKMwAAAAJ;MeV4GGsAAAAJ", "or_profile": "~Jingjing_Xu1;~Qingxiu_Dong1;~Ce_Zheng2;~Xu_Sun1;~Lingpeng_Kong1;~Qi_Liu5;~Tobias_Lee1", "aff": ";Peking University;Peking University;Peking University;Department of Computer Science, The University of Hong Kong;University of Hong Kong;Peking University", "aff_domain": ";pku.edu.cn;pku.edu.cn;pku.edu.cn;cs.hku.hk;hku.hk;pku.edu.cn", "position": ";PhD student;MS student;Associate Professor;Assistant Professor;Assistant Professor;MS student", "bibtex": "@inproceedings{\nli2023can,\ntitle={Can Language Models Understand Physical Concepts?},\nauthor={Lei Li and Jingjing Xu and Qingxiu Dong and Ce Zheng and Xu Sun and Lingpeng Kong and Qi Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=HaSS8a3Oe7}\n}", "github": "", "project": "", "reviewers": "MVNy;kY1c;zxLC", "site": "https://openreview.net/forum?id=HaSS8a3Oe7", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;4;4", "reproducibility": "5;4;5", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 4.666666666666667, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0003-4608-5778;0009-0008-6984-5104", "linkedin": ";qingxiu-dong-a3758a199/;;;;;", "aff_unique_index": "0;0;0;1;1;0", "aff_unique_norm": "Peking University;University of Hong Kong", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "http://www.pku.edu.cn;https://www.hku.hk", "aff_unique_abbr": "Peking U;HKU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "Hbqsmv4jqY", "title": "Beyond Labels: Empowering Human Annotators with Natural Language Explanations through a Novel Active-Learning Architecture", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Real-world domain experts (e.g., doctors) rarely annotate only a decision label in their day-to-day workflow without providing explanations. Yet, existing low-resource learning techniques, such as Active Learning (AL), that aim to support human annotators mostly focus on the label while neglecting the natural language explanation of a data point. This work proposes a novel AL architecture to support experts' real-world need for label and explanation annotations in low-resource scenarios. Our AL architecture leverages an explanation-generation model to produce explanations guided by human explanations, a prediction model that utilizes generated explanations toward prediction faithfully, and a novel data diversity-based AL sampling strategy that benefits from the explanation annotations. Automated and human evaluations demonstrate the effectiveness of incorporating explanations into AL sampling and the improved human annotation efficiency and trustworthiness with our AL architecture. Additional ablation studies illustrate the potential of our AL architecture for transfer learning, generalizability, and integration with large language models (LLMs). \nWhile LLMs exhibit exceptional explanation-generation capabilities for relatively simple tasks, their effectiveness in complex real-world tasks warrants further in-depth study.", "keywords": "active learning;low-resource learning;explanation generation;natural language explanations", "primary_area": "", "supplementary_material": "", "author": "Bingsheng Yao;Ishan Jindal;Lucian Popa;Yannis Katsis;Sayan Ghosh;Lihong He;Yuxuan Lu;Shashank Srivastava;Yunyao Li;James Hendler;Dakuo Wang", "authorids": "~Bingsheng_Yao1;~Ishan_Jindal1;~Lucian_Popa1;~Yannis_Katsis1;~Sayan_Ghosh2;~Lihong_He2;~Yuxuan_Lu2;~Shashank_Srivastava1;~Yunyao_Li2;~James_Hendler1;~Dakuo_Wang1", "gender": "M;M;M;;M;F;M;M;;M;M", "homepage": ";https://ijindal.github.io/;;;https://sgdgp.github.io/;;https://yuxuan.lu;https://www.ssriva.com/;;http://www.cs.rpi.edu/~hendler/;https://www.dakuowang.com", "dblp": "256/9562;159/1866;https://dblp.dagstuhl.de/pers/hd/p/Popa_0001:Lucian;;http://dblp.uni-trier.de/pers/hd/g/Ghosh_0002:Sayan;;245/9896-3;;60/2319;h/JamesAHendler;161/3389", "google_scholar": "hJlsDfAAAAAJ;https://scholar.google.ca/citations?user=TNrWFecAAAAJ;;;https://scholar.google.com/citations?hl=en;UIgh3TkAAAAJ;t_KJvIYAAAAJ;-vKI5s0AAAAJ;;https://scholar.google.com.tw/citations?user=JNPbTdIAAAAJ;Uno8dugAAAAJ", "or_profile": "~Bingsheng_Yao1;~Ishan_Jindal1;~Lucian_Popa1;~Yannis_Katsis1;~Sayan_Ghosh2;~Lihong_He2;~Yuxuan_Lu2;~Shashank_Srivastava1;~Yunyao_Li2;~James_Hendler1;~Dakuo_Wang1", "aff": "Rensselaer Polytechnic Institute;IBM Research;International Business Machines;;Department of Computer Science, University of North Carolina, Chapel Hill;International Business Machines;Beijing University of Technology;University of North Carolina at Chapel Hill;Apple;Rensselaer Polytechnic Institute;Northeastern University", "aff_domain": "rpi.edu;ibm.com;ibm.com;;cs.unc.edu;ibm.com;bjut.edu.cn;unc.edu;apple.com;rpi.edu;northeastern.edu", "position": "PhD student;Researcher;Principal Research Staff Member;;PhD student;Researcher;Undergrad student;Assistant Professor;Head of Machine Learning;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nyao2023beyond,\ntitle={Beyond Labels: Empowering Human Annotators with Natural Language Explanations through a Novel Active-Learning Architecture},\nauthor={Bingsheng Yao and Ishan Jindal and Lucian Popa and Yannis Katsis and Sayan Ghosh and Lihong He and Yuxuan Lu and Shashank Srivastava and Yunyao Li and James Hendler and Dakuo Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Hbqsmv4jqY}\n}", "github": "", "project": "", "reviewers": "DYu3;BmAS;LfJH", "site": "https://openreview.net/forum?id=Hbqsmv4jqY", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;5", "excitement": "4;2;3", "reproducibility": "3;4;3", "correctness": "4;4;2", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 11, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0004-8329-4610;;;;;;0000-0002-8520-0540;;;;0000-0001-9371-9441", "linkedin": ";ishan-jindal/;;;;;yuxuan-lu;;;;dakuowang/", "aff_unique_index": "0;1;2;3;2;4;3;5;0;6", "aff_unique_norm": "Rensselaer Polytechnic Institute;IBM;International Business Machines Corporation;University of North Carolina;Beijing University of Technology;Apple;Northeastern University", "aff_unique_dep": ";IBM Research;;Department of Computer Science;;Apple Inc.;", "aff_unique_url": "https://www.rpi.edu;https://www.ibm.com/research;https://www.ibm.com;https://www.unc.edu;http://www.bjut.edu.cn;https://www.apple.com;https://www.northeastern.edu", "aff_unique_abbr": "RPI;IBM;IBM;UNC;BJUT;Apple;NEU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Chapel Hill", "aff_country_unique_index": "0;0;0;0;0;1;0;0;0;0", "aff_country_unique": "United States;China" }, { "id": "HewtRLig9V", "title": "Cabbage Sweeter than Cake? Analysing the Potential of Large Language Models for Learning Conceptual Spaces", "track": "main", "status": "Short Main", "tldr": "", "abstract": "The theory of Conceptual Spaces is an influential cognitive-linguistic framework for representing the meaning of concepts. Conceptual spaces are constructed from a set of quality dimensions, which essentially correspond to primitive perceptual features (e.g. hue or size). These quality dimensions are usually learned from human judgements, which means that applications of conceptual spaces tend to be limited to narrow domains (e.g. modelling colour or taste). Encouraged by recent findings about the ability of Large Language Models (LLMs) to learn perceptually grounded representations, we explore the potential of such models for learning conceptual spaces. Our experiments show that LLMs can indeed be used for learning meaningful representations to some extent. However, we also find that fine-tuned models of the BERT family are able to match or even outperform the largest GPT-3 model, despite being 2 to 3 orders of magnitude smaller.", "keywords": "conceptual spaces;large language models;knowledge representation", "primary_area": "", "supplementary_material": "", "author": "Usashi Chatterjee;Amit Gajbhiye;Steven Schockaert", "authorids": "~Usashi_Chatterjee1;~Amit_Gajbhiye2;~Steven_Schockaert2", "gender": ";;", "homepage": ";;", "dblp": ";;", "google_scholar": ";;", "or_profile": "~Usashi_Chatterjee1;~Amit_Gajbhiye2;~Steven_Schockaert2", "aff": ";;", "aff_domain": ";;", "position": ";;", "bibtex": "@inproceedings{\nchatterjee2023cabbage,\ntitle={Cabbage Sweeter than Cake? Analysing the Potential of Large Language Models for Learning Conceptual Spaces},\nauthor={Usashi Chatterjee and Amit Gajbhiye and Steven Schockaert},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=HewtRLig9V}\n}", "github": "", "project": "", "reviewers": "bmxE;k2sy;BTQu", "site": "https://openreview.net/forum?id=HewtRLig9V", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "4;2;3", "reproducibility": "4;4;4", "correctness": "4;2;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;" }, { "id": "HhoG04UD3E", "title": "PMIndiaSum: Multilingual and Cross-lingual Headline Summarization for Languages in India", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "This paper introduces PMIndiaSum, a multilingual and massively parallel summarization corpus focused on languages in India. Our corpus provides a training and testing ground for four language families, 14 languages, and the largest to date with 196 language pairs. We detail our construction workflow including data acquisition, processing, and quality assurance. Furthermore, we publish benchmarks for monolingual, cross-lingual, and multilingual summarization by fine-tuning, prompting, as well as translate-and-summarize. Experimental results confirm the crucial role of our data in aiding summarization between Indian languages. Our dataset is publicly available and can be freely modified and re-distributed.", "keywords": "multilingual;cross-lingual;summarization;dataset;benchmark;languages in India", "primary_area": "", "supplementary_material": "", "author": "Ashok Urlana;Pinzhen Chen;Zheng Zhao;Shay B Cohen;Manish Shrivastava;Barry Haddow", "authorids": "~Ashok_Urlana1;~Pinzhen_Chen1;~Zheng_Zhao2;~Shay_B_Cohen1;~Manish_Shrivastava1;~Barry_Haddow1", "gender": "M;Not Specified;M;M;M;", "homepage": "https://ashokurlana.github.io/;https://pinzhenchen.github.io/;http://www.inf.ed.ac.uk/people/students/Zheng_Zhao.html;http://homepages.inf.ed.ac.uk/scohen;https://www.iiit.ac.in/people/faculty/m.shrivastava/;https://homepages.inf.ed.ac.uk/bhaddow/", "dblp": "210/6496;268/1225;75/6680-5;04/5629;65/3881;12/5915", "google_scholar": "v5wiUEsAAAAJ;m_HgJe0AAAAJ;UO0MJeQAAAAJ;;https://scholar.google.co.in/citations?user=sIvMnGQAAAAJ;6NqRjRYAAAAJ", "or_profile": "~Ashok_Urlana1;~Pinzhen_Chen1;~Zheng_Zhao2;~Shay_B_Cohen1;~Manish_Shrivastava1;~Barry_Haddow1", "aff": "International Institute of Information Technology, Hyderabad;University of Edinburgh;University of Edinburgh, University of Edinburgh;University of Edinburgh;International Institute of Information Technology Hyderabad, India;University of Edinburgh", "aff_domain": "ac.in;ed.ac.uk;ed.ac.uk;ed.ac.uk;iiit.ac.in;ed.ac.uk", "position": "MS student;PhD student;PhD student;Reader;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nurlana2023pmindiasum,\ntitle={{PMI}ndiaSum: Multilingual and Cross-lingual Headline Summarization for Languages in India},\nauthor={Ashok Urlana and Pinzhen Chen and Zheng Zhao and Shay B Cohen and Manish Shrivastava and Barry Haddow},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=HhoG04UD3E}\n}", "github": "", "project": "", "reviewers": "Jf8L;2wEo;hbSQ", "site": "https://openreview.net/forum?id=HhoG04UD3E", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "4;3;2", "reproducibility": "4;5;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-7770-4354;;;0000-0003-4753-8353;0000-0001-8705-6637;", "linkedin": "ashokurlana/;pinzhenchen/;;;manishrivastava/;", "aff_unique_index": "0;1;1;1;0;1", "aff_unique_norm": "International Institute of Information Technology;University of Edinburgh", "aff_unique_dep": ";", "aff_unique_url": "https://iiit Hyderabad.ac.in;https://www.ed.ac.uk", "aff_unique_abbr": "IIIT Hyderabad;Edinburgh", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hyderabad;", "aff_country_unique_index": "0;1;1;1;0;1", "aff_country_unique": "India;United Kingdom" }, { "id": "HickNiCqk9", "title": "Detrimental Contexts in Open-Domain Question Answering", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "For knowledge intensive NLP tasks, it has been widely accepted that accessing more information is a contributing factor to improvements in the model's end-to-end performance. However, counter-intuitively, too much context can have a negative impact on the model when evaluated on common question answering (QA) datasets. In this paper, we analyze how passages can have a detrimental effect on retrieve-then-read architectures used in question answering. Our empirical evidence indicates that the current read architecture does not fully leverage the retrieved passages and significantly degrades its performance when using the whole passages compared to utilizing subsets of them. \nOur findings demonstrate that model accuracy can be improved by 10\\% on two popular QA datasets by filtering out detrimental passages. Additionally, these outcomes are attained by utilizing existing retrieval methods without further training or data. We further highlight the challenges associated with identifying the detrimental passages. First, even with the correct context, the model can make an incorrect prediction, posing a challenge in determining which passages are most influential. Second, evaluation typically considers lexical matching, which is not robust to variations of correct answers. Despite these limitations, our experimental results underscore the pivotal role of identifying and removing these detrimental passages for the context-efficient retrieve-then-read pipeline.", "keywords": "Retrieval;Reasoning;Question Answering;DPR;KILT;Fusion In Decoder", "primary_area": "", "supplementary_material": "", "author": "Philhoon Oh;James Thorne", "authorids": "~Philhoon_Oh1;~James_Thorne1", "gender": "M;", "homepage": "https://github.com/philhoonoh;https://jamesthorne.com", "dblp": ";204/1380", "google_scholar": ";hao9RrgAAAAJ", "or_profile": "~Philhoon_Oh1;~James_Thorne1", "aff": "Korea Advanced Institute of Science & Technology;KAIST", "aff_domain": "kaist.edu;kaist.ac.kr", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\noh2023detrimental,\ntitle={Detrimental Contexts in Open-Domain Question Answering},\nauthor={Philhoon Oh and James Thorne},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=HickNiCqk9}\n}", "github": "", "project": "", "reviewers": "LMuo;5NS3;RWj7", "site": "https://openreview.net/forum?id=HickNiCqk9", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;3", "excitement": "4;3;3", "reproducibility": "4;3;3", "correctness": "2;3;4", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "HjBDSop3ME", "title": "Consonant is all you need: a compact representation of English text for efficient NLP", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In natural language processing (NLP), the representation of text plays a crucial role in various tasks such as language modeling, sentiment analysis, and machine translation. The standard approach is to represent text in the same way as we, as humans, read and write. In this paper, we propose a novel approach to represent text with only consonants which presents a compact representation of English text that offers improved efficiency without sacrificing performance. We exploit the fact that consonants are more discriminative than vowels and by representing text using consonants, we can significantly reduce the overall memory and compute footprint required for storing and processing textual data.\n\nWe present two alternative representations: 'consonants-only', where we completely remove the vowels from the text, and 'masked-vowels', where we mask all the vowels into one special symbol. To evaluate our approaches, we conducted experiments on various NLP tasks, including text classification, part-of-speech (POS) tagging, named-entity recognition (NER), and neural machine translation (NMT), in addition to language modeling. Our results demonstrate that the proposed consonant-based representation achieves comparable performance compared to the standard text representation while requiring significantly fewer computational resources. Furthermore, we show that our representation can be seamlessly integrated with existing NLP models and frameworks, providing a practical solution for efficient text processing.\n\nLast but not the least, we present a technique to retrieve the vowel information from our processed text representation keeping in mind the need to reproduce text in human readable form in some NLP applications.", "keywords": "Text Representation;Tokenization;Language Modeling;Text Classification;POS Tagging;NER;NMT", "primary_area": "", "supplementary_material": "", "author": "Maged S. Al-shaibani;Irfan Ahmad", "authorids": "~Maged_S._Al-shaibani1;~Irfan_Ahmad2", "gender": "M;M", "homepage": "https://faculty.kfupm.edu.sa/ics/irfanics/;https://scholar.google.com/citations?user=Uo91Tz0AAAAJ&hl=en", "dblp": "05/6418-1;", "google_scholar": "L75sJxAAAAAJ;Uo91Tz0AAAAJ", "or_profile": "~Irfan_Ahmad2;~Maged_Saeed1", "aff": "King Fahad University of Petroleum and Minerals;King Fahad University of Petroleum and Minerals", "aff_domain": "kfupm.edu.sa;kfupm.edu.sa", "position": "Assistant Professor;MS student", "bibtex": "@inproceedings{\nal-shaibani2023consonant,\ntitle={Consonant is all you need: a compact representation of English text for efficient {NLP}},\nauthor={Maged S. Al-shaibani and Irfan Ahmad},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=HjBDSop3ME}\n}", "github": "", "project": "", "reviewers": "bMmj;6XDg;yLpJ", "site": "https://openreview.net/forum?id=HjBDSop3ME", "pdf_size": 0, "rating": "2;2;2", "confidence": "3;3;3", "excitement": "3;3;3", "reproducibility": "4;3;3", "correctness": "3;4;2", "rating_avg": 2.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-8311-1731;", "linkedin": "irfanahmads/;", "aff_unique_index": "0;0", "aff_unique_norm": "King Fahad University of Petroleum and Minerals", "aff_unique_dep": "", "aff_unique_url": "https://www.kfupm.edu.sa", "aff_unique_abbr": "KFUPM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Saudi Arabia" }, { "id": "HkXbOUaL4W", "title": "Back Transcription as a Method for Evaluating Robustness of Natural Language Understanding Models to Speech Recognition Errors", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In a spoken dialogue system, an NLU model is preceded by a speech recognition system that can deteriorate the performance of natural language understanding.\nThis paper proposes a method for investigating the impact of speech recognition errors on the performance of natural language understanding models.\nThe proposed method combines the back transcription procedure with a fine-grained technique for categorizing the errors that affect the performance of NLU models.\nThe method relies on the usage of synthesized speech for NLU evaluation.\nWe show that the use of synthesized speech in place of audio recording does not change the outcomes of the presented technique in a significant way.", "keywords": "ASR;NLU;Speech Recognition;TTS;Back Transcription;Evaluation;Robustness", "primary_area": "", "supplementary_material": "", "author": "Marek Kubis;Pawe\u0142 Marek Sk\u00f3rzewski;Marcin Sowanski;TOMASZ ZI\u0118TKIEWICZ", "authorids": "~Marek_Kubis1;~Pawe\u0142_Marek_Sk\u00f3rzewski1;~Marcin_Sowanski1;~TOMASZ_ZI\u0118TKIEWICZ1", "gender": ";M;;M", "homepage": "https://marekkubis.com;https://skorzewski.github.io;;", "dblp": "15/8157;78/8936;273/5500;", "google_scholar": "FMa0NlkAAAAJ;https://scholar.google.pl/citations?user=B3ggAfoAAAAJ;KIiWs55Sm38C;", "or_profile": "~Marek_Kubis1;~Pawe\u0142_Marek_Sk\u00f3rzewski1;~Marcin_Sowanski1;~TOMASZ_ZI\u0118TKIEWICZ1", "aff": "Adam Mickiewicz University of Poznan;Adam Mickiewicz University of Poznan;Warsaw University of Technology;Samsung", "aff_domain": "amu.edu.pl;amu.edu.pl;pw.edu.pl;samsung.com", "position": "Assistant Professor;Assistant Professor;PhD student;Researcher", "bibtex": "@inproceedings{\nkubis2023back,\ntitle={Back Transcription as a Method for Evaluating Robustness of Natural Language Understanding Models to Speech Recognition Errors},\nauthor={Marek Kubis and Pawe{\\l} Marek Sk{\\'o}rzewski and Marcin Sowanski and TOMASZ ZI{\\k{E}}TKIEWICZ},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=HkXbOUaL4W}\n}", "github": "", "project": "", "reviewers": "ki6r;jqhQ;PdVj;mLMm", "site": "https://openreview.net/forum?id=HkXbOUaL4W", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;3;4;4", "excitement": "2;4;4;2", "reproducibility": "4;3;4;3", "correctness": "2;4;4;2", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 3.0, "reproducibility_avg": 3.5, "correctness_avg": 3.0, "replies_avg": 13, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-2016-2598;0000-0002-5056-2808;;0000-0002-2594-4660", "linkedin": "marekkubis/;pawe%C5%82-sk%C3%B3rzewski-44ba9782;;tzietkiewicz/", "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Adam Mickiewicz University;Warsaw University of Technology;Samsung", "aff_unique_dep": ";;Samsung", "aff_unique_url": "https://www.amu.edu.pl;https://www.pw.edu.pl;https://www.samsung.com", "aff_unique_abbr": "AMU;WUT;Samsung", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Poznan;", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Poland;South Korea" }, { "id": "Hkj3WyR1JB", "title": "EconBERTa: Towards Robust Extraction of Named Entities in Economics", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Adapting general-purpose language models has proven to be effective in tackling downstream tasks within specific domains. In this paper, we address the task of extracting entities from the economics literature on impact evaluation. To this end, we release EconBERTa, a large language model pretrained on scientific publications in economics, and ECON-IE, a new expert-annotated dataset of economics abstracts for Named Entity Recognition (NER). We find that EconBERTa reaches state-of-the-art performance on our downstream NER task. Additionally, we extensively analyze the model's generalization capacities, finding that most errors correspond to detecting only a subspan of an entity or failure to extrapolate to longer sequences. This limitation is primarily due to an inability to detect part-of-speech sequences unseen during training, and this effect diminishes when the number of unique instances in the training set increases. Examining the generalization abilities of domain-specific language models paves the way towards improving the robustness of NER models for causal knowledge extraction.", "keywords": "Named Entity Recognition;Large Language Model;Domain Adaptation;Generalization", "primary_area": "", "supplementary_material": "", "author": "Karim Lasri;Pedro Vitor Quinta de Castro;Mona Schirmer;Luis Eduardo San Martin;Linxi Wang;Tom\u00e1\u0161 Dulka;Haaya Naushan;John Pougu\u00e9-Biyong;Arianna Legovini;Samuel Fraiberger", "authorids": "~Karim_Lasri1;~Pedro_Vitor_Quinta_de_Castro1;~Mona_Schirmer1;~Luis_Eduardo_San_Martin1;~Linxi_Wang1;~Tom\u00e1\u0161_Dulka1;~Haaya_Naushan1;~John_Pougu\u00e9-Biyong1;~Arianna_Legovini1;~Samuel_Fraiberger1", "gender": ";M;F;M;F;;;Not Specified;F;", "homepage": ";;https://monasch.github.io/;https://blogs.worldbank.org/team/luis-eduardo-luise-san-martin;;;;;http://www.worldbank.org;", "dblp": ";;307/2876;;;362/8681;;;;", "google_scholar": ";aoyIGdoAAAAJ;https://scholar.google.com/citations?hl=en;;;;;QR7un6AAAAAJ;;", "or_profile": "~Karim_Lasri1;~Pedro_Vitor_Quinta_de_Castro1;~Mona_Schirmer1;~Luis_Eduardo_San_Martin1;~Linxi_Wang1;~Tom\u00e1\u0161_Dulka1;~Haaya_Naushan1;~John_Pougu\u00e9-Biyong1;~Arianna_Legovini1;~Samuel_Fraiberger1", "aff": ";Universidade Federal de Goi\u00e1s;University of Amsterdam;;World Bank;World Bank;;University of Oxford;The world bank ;", "aff_domain": ";ufg.br;uva.nl;;worldbank.org;worldbank.org;;ox.ac.uk;worldbank.org;", "position": ";PhD student;PhD student;;Researcher;Researcher;;PhD student;Director;", "bibtex": "@inproceedings{\nlasri2023econberta,\ntitle={Econ{BERT}a: Towards Robust Extraction of Named Entities in Economics},\nauthor={Karim Lasri and Pedro Vitor Quinta de Castro and Mona Schirmer and Luis Eduardo San Martin and Linxi Wang and Tom{\\'a}{\\v{s}} Dulka and Haaya Naushan and John Pougu{\\'e}-Biyong and Arianna Legovini and Samuel Fraiberger},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Hkj3WyR1JB}\n}", "github": "", "project": "", "reviewers": "Cyad;iiaL;XXaT;GfJT", "site": "https://openreview.net/forum?id=Hkj3WyR1JB", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;4;4", "excitement": "3;4;3;4", "reproducibility": "4;2;4;4", "correctness": "4;3;3;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.5, "reproducibility_avg": 3.5, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 10, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;;;", "linkedin": ";;https://linkedin.com/in/mona-schirmer-731365119;;linxi-wang-35093324/;tomasdulka/;;;;", "aff_unique_index": "0;1;2;2;3;2", "aff_unique_norm": "Universidade Federal de Goi\u00e1s;University of Amsterdam;World Bank;University of Oxford", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.ufg.br;https://www.uva.nl;https://www.worldbank.org;https://www.ox.ac.uk", "aff_unique_abbr": "UFG;UvA;WB;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;2;3;2", "aff_country_unique": "Brazil;Netherlands;United States;United Kingdom" }, { "id": "HsGirsKN5l", "title": "Addressing the Length Bias Challenge in Document-Level Neural Machine Translation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Document-level neural machine translation (DNMT) has shown promising results by incorporating context information through increased maximum lengths of source and target sentences. However, this approach also introduces a length bias problem, whereby DNMT suffers from significant translation quality degradation when decoding sentences that are much shorter or longer than the maximum sentence length during training, i.e., the length bias problem. To prevent the model from neglecting shorter sentences, we sample the training data to ensure a more uniform distribution across different sentence lengths while progressively increasing the maximum sentence length during training. Additionally, we introduce a length-normalized attention mechanism to aid the model in focusing on target information, mitigating the issue of attention divergence when processing longer sentences. Furthermore, during the decoding stage of DNMT, we propose a sliding decoding strategy that limits the length of target sentences to not exceed the maximum length encountered during training. The experimental results indicate that our method can achieve state-of-the-art results on several open datasets, and further analysis shows that our method can significantly alleviate the length bias problem.", "keywords": "Document;Machine Translation;Length Bias", "primary_area": "", "supplementary_material": "", "author": "Zhang Zhuocheng;Shuhao Gu;Min zhang;Yang Feng", "authorids": "~Zhang_Zhuocheng1;~Shuhao_Gu1;~Min_zhang14;~Yang_Feng4", "gender": "M;M;M;", "homepage": "https://github.com/salvation-z;;https://zhangmin-nlp-ai.github.io/;http://people.ucas.edu.cn/~yangfeng?language=en", "dblp": ";239/5079;83/5342-?;07/6095-4.html", "google_scholar": ";PED7pDIAAAAJ;https://scholar.google.com/citations?;https://scholar.google.com/citations?hl=en", "or_profile": "~Zhang_Zhuocheng1;~Shuhao_Gu1;~Min_zhang14;~Yang_Feng4", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;, Chinese Academy of Sciences;Harbin Institute of Technology;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;ict.ac.cn;hit.edu.cn;ict.ac.cn", "position": "PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhuocheng2023addressing,\ntitle={Addressing the Length Bias Challenge in Document-Level Neural Machine Translation},\nauthor={Zhang Zhuocheng and Shuhao Gu and Min zhang and Yang Feng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=HsGirsKN5l}\n}", "github": "", "project": "", "reviewers": "ap6j;VHhW;Wr8v;J2RQ", "site": "https://openreview.net/forum?id=HsGirsKN5l", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;4;3;3", "excitement": "3;4;3;3", "reproducibility": "4;4;3;3", "correctness": "3;3;4;3", "rating_avg": 4.0, "confidence_avg": 3.25, "excitement_avg": 3.25, "reproducibility_avg": 3.5, "correctness_avg": 3.25, "replies_avg": 13, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-3895-5510;", "linkedin": ";;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Chinese Academy of Sciences;Harbin Institute of Technology", "aff_unique_dep": "Institute of Computing Technology;", "aff_unique_url": "http://www.ict.ac.cn;http://www.hit.edu.cn/", "aff_unique_abbr": "CAS;HIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "HsvZUde6wT", "title": "Asking Clarification Questions to Handle Ambiguity in Open-Domain QA", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Ambiguous questions persist in open-domain question answering, because formulating a precise question with a unique answer is often challenging. Previous works have tackled this issue by asking disambiguated questions for all possible interpretations of the ambiguous question. Instead, we propose to ask a clarification question, where the user's response will help identify the interpretation that best aligns with the user's intention. We first present CAmbigNQ, a dataset consisting of 5,653 ambiguous questions, each with relevant passages, possible answers, and a clarification question. The clarification questions were efficiently created by generating them using InstructGPT and manually revising them as necessary. We then define a pipeline of three tasks---(1) ambiguity detection, (2) clarification question generation, and (3) clarification-based QA. In the process, we adopt or design appropriate evaluation metrics to facilitate sound research. Lastly, we achieve F1 of 61.3, 25.1, and 40.5 on the three tasks, demonstrating the need for further improvements while providing competitive baselines for future work.", "keywords": "Clarification Question;Open-domain Question Answering", "primary_area": "", "supplementary_material": "", "author": "Dongryeol Lee;Segwang Kim;Minwoo Lee;Hwanhee Lee;Joonsuk Park;Sang-Woo Lee;Kyomin Jung", "authorids": "~Dongryeol_Lee2;~Segwang_Kim1;~Minwoo_Lee2;~Hwanhee_Lee1;~Joonsuk_Park1;~Sang-Woo_Lee1;~Kyomin_Jung1", "gender": "M;M;M;M;M;M;M", "homepage": "https://dongryeollee96.github.io/;https://segwangkim.github.io/;;https://hwanheelee1993.github.io/;http://www.joonsuk.org;https://www.sang-woo-lee.com/;http://milab.snu.ac.kr/kjung/index.html", "dblp": ";220/3735;;218/5402;50/9717;31/5983-1;48/3867", "google_scholar": "vgTGZ10AAAAJ;;;eRM8zHkAAAAJ;3SPMM3oAAAAJ;https://scholar.google.co.kr/citations?user=TMTTMuQAAAAJ;https://scholar.google.co.kr/citations?user=u3uMl4MAAAAJ", "or_profile": "~Dongryeol_Lee2;~Segwang_Kim1;~Minwoo_Lee2;~Hwanhee_Lee1;~Joonsuk_Park1;~Sang-Woo_Lee1;~Kyomin_Jung1", "aff": "Seoul National University;;Seoul National University;Seoul National University;University of Richmond;Korea Advanced Institute of Science & Technology;Seoul National University", "aff_domain": "snu.ac.kr;;snu.ac.kr;snu.ac.kr;richmond.edu;kaist.ac.kr;snu.ac.kr", "position": "PhD student;;PhD student;Postdoc;Assistant Professor;Adjunct Professor;Full Professor", "bibtex": "@inproceedings{\nlee2023asking,\ntitle={Asking Clarification Questions to Handle Ambiguity in Open-Domain {QA}},\nauthor={Dongryeol Lee and Segwang Kim and Minwoo Lee and Hwanhee Lee and Joonsuk Park and Sang-Woo Lee and Kyomin Jung},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=HsvZUde6wT}\n}", "github": "", "project": "", "reviewers": "SP89;1HwZ;Sdsh", "site": "https://openreview.net/forum?id=HsvZUde6wT", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "3;2;4", "reproducibility": "3;4;2", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-1182-4836;;", "linkedin": ";segwang-kim-9620a2149/;;hwanhee-lee-69a435133/?originalSubdomain=;;;", "aff_unique_index": "0;0;0;1;2;0", "aff_unique_norm": "Seoul National University;University of Richmond;Korea Advanced Institute of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.snu.ac.kr;https://www.richmond.edu;https://www.kaist.ac.kr", "aff_unique_abbr": "SNU;UR;KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "South Korea;United States" }, { "id": "HtNQXg979A", "title": "Models See Hallucinations: Evaluating the Factuality in Video Captioning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Video captioning aims to describe events in a video with natural language. In recent years, many works have focused on improving captioning models' performance. However, like other text generation tasks, it risks introducing factual errors not supported by the input video. Factual errors can seriously affect the quality of the generated text, sometimes making it completely unusable. Although factual consistency has received much research attention in text-to-text tasks (e.g., summarization), it is less studied in vision-based text generation. In this work, we conduct the first human evaluation of the factuality in video captioning and annotate two factuality datasets. We find that 56\\% of the model-generated sentences have factual errors, indicating it is a severe problem in this field, but existing evaluation metrics show little correlation with human factuality annotation. We further propose a weakly-supervised, model-based factuality metric FactVC, which outperforms previous metrics on factuality evaluation of video captioning.", "keywords": "Hallucination;Factuality Evaluation;Video Captioning", "primary_area": "", "supplementary_material": "", "author": "Hui Liu;Xiaojun Wan", "authorids": "~Hui_Liu11;~Xiaojun_Wan1", "gender": "M;M", "homepage": "https://pkuliuhui.github.io/personal-page/;https://wanxiaojun.github.io", "dblp": "93/4010-34;07/1521", "google_scholar": ";lTTeBdkAAAAJ", "or_profile": "~Hui_Liu11;~Xiaojun_Wan1", "aff": "Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nliu2023models,\ntitle={Models See Hallucinations: Evaluating the Factuality in Video Captioning},\nauthor={Hui Liu and Xiaojun Wan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=HtNQXg979A}\n}", "github": "", "project": "", "reviewers": "nfxG;joFz;1XK7", "site": "https://openreview.net/forum?id=HtNQXg979A", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "3;4;4", "reproducibility": "3;4;4", "correctness": "5;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-3905-0668;", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "HtQvhCRTxo", "title": "CORE: A Few-Shot Company Relation Classification Dataset for Robust Domain Adaptation.", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We introduce CORE, a dataset for few-shot relation classification (RC) focused on company relations and business entities. CORE includes 4,708 instances of 12 relation types with corresponding textual evidence extracted from company Wikipedia pages. Company names and business entities pose a challenge for few-shot RC models due to the rich and diverse information associated with them. For example, a company name may represent the legal entity, products, people, or business divisions depending on the context. Therefore, deriving the relation type between entities is highly dependent on textual context. To evaluate the performance of state-of-the-art RC models on the CORE dataset, we conduct experiments in the few-shot domain adaptation setting. Our results reveal substantial performance gaps, confirming that models trained on different domains struggle to adapt to CORE. Interestingly, we find that models trained on CORE showcase improved out-of-domain performance, which highlights the importance of high-quality data for robust domain generalization. Specifically, the information richness embedded in business entities allows models to focus on contextual nuances, reducing their reliance on superficial clues such as relation-specific verbs. In addition to the dataset, we provide relevant code snippets to facilitate reproducibility and encourage further research in the field. The CORE dataset and code are publicly available at \\url{https://anonymous.4open.science/r/CORE-D377}.", "keywords": "Few-shot learning;Relation classification;Business application of NLP;Information extraction", "primary_area": "", "supplementary_material": "", "author": "Philipp Borchert;Jochen De Weerdt;Kristof Coussement;Arno De Caigny;Marie-Francine Moens", "authorids": "~Philipp_Borchert1;~Jochen_De_Weerdt1;~Kristof_Coussement1;~Arno_De_Caigny1;~Marie-Francine_Moens1", "gender": "M;M;M;M;F", "homepage": "https://icma.ieseg.fr/philipp-borchert/;http://www.jochendeweerdt.com/;http://www.kristofcoussement.com/;;https://people.cs.kuleuven.be/~sien.moens/", "dblp": "338/1017;41/9119.html;;;m/MarieFrancineMoens", "google_scholar": "efKKfygAAAAJ;26i8eZMAAAAJ;t-rrqeQAAAAJ;;https://scholar.google.com.tw/citations?user=O9hYMUUAAAAJ", "or_profile": "~Philipp_Borchert1;~Jochen_De_Weerdt1;~Kristof_Coussement1;~Arno_De_Caigny1;~Marie-Francine_Moens1", "aff": "KU Leuven;KU Leuven;I\u00c9SEG School of Management;I\u00c9SEG School of Management;KU Leuven, KU Leuven", "aff_domain": "kuleuven.be;kuleuven.be;ieseg.fr;ieseg.fr;cs.kuleuven.be", "position": "PhD student;Associate Professor;Full Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nborchert2023core,\ntitle={{CORE}: A Few-Shot Company Relation Classification Dataset for Robust Domain Adaptation.},\nauthor={Philipp Borchert and Jochen De Weerdt and Kristof Coussement and Arno De Caigny and Marie-Francine Moens},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=HtQvhCRTxo}\n}", "github": "", "project": "", "reviewers": "kwUM;J5cv;qrfS", "site": "https://openreview.net/forum?id=HtQvhCRTxo", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;4", "excitement": "3;3;3", "reproducibility": "5;3;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-5533-4281;0000-0001-6151-0504;0000-0003-1346-9425;;0000-0002-3732-9323", "linkedin": ";;;arnodecaigny/;marie-francine-moens-8175a56/?originalSubdomain=be", "aff_unique_index": "0;0;1;1;2", "aff_unique_norm": "Katholieke Universiteit Leuven;I\u00c9SEG School of Management;KU Leuven", "aff_unique_dep": ";;", "aff_unique_url": "https://www.kuleuven.be;https://www.ieseg.fr;https://www.kuleuven.be", "aff_unique_abbr": "KU Leuven;I\u00c9SEG;KU Leuven", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "Belgium;France" }, { "id": "HvYxdKPqYt", "title": "A Frustratingly Easy Plug-and-Play Detection-and-Reasoning Module for Chinese Spelling Check", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In recent years, Chinese Spelling Check (CSC) has been greatly improved by designing task-specific pre-training methods or introducing auxiliary tasks, which mostly solve this task in an end-to-end fashion. In this paper, we propose to decompose the CSC workflow into detection, reasoning, and searching subtasks so that the rich external knowledge about the Chinese language can be leveraged more directly and efficiently. Specifically, we design a plug-and-play detection-and-reasoning module that is compatible with existing SOTA non-autoregressive CSC models to further boost their performance. We find that the detection-and-reasoning module trained for one model can also benefit other models. We also study the primary interpretability provided by the task decomposition. Extensive experiments and detailed analyses demonstrate the effectiveness and competitiveness of the proposed module.", "keywords": "natural language processing;chinese spelling check", "primary_area": "", "supplementary_material": "", "author": "Haojing Huang;Jingheng Ye;Qingyu Zhou;Yinghui Li;Yangning Li;Feng Zhou;Hai-Tao Zheng", "authorids": "~Haojing_Huang1;~Jingheng_Ye1;~Qingyu_Zhou1;~Yinghui_Li1;~Yangning_Li1;~Feng_Zhou10;~Hai-Tao_Zheng2", "gender": ";M;M;M;M;;M", "homepage": ";https://github.com/yejh123;https://res.qyzhou.me/;https://github.com/geekjuruo;https://github.com/HUSTLyn;;https://www.sigs.tsinghua.edu.cn/fg3/105069.jhtml", "dblp": ";331/8438.html;199/2091;243/8822.html;315/0403;;20/134-2", "google_scholar": ";Zm_L_EUAAAAJ;buLOsq0AAAAJ;xTM9pKsAAAAJ;https://scholar.google.com.hk/citations?user=BmX7lQkAAAAJ;;https://scholar.google.com.hk/citations?user=7VPeORoAAAAJ", "or_profile": "~Haojing_Huang1;~Jingheng_Ye1;~Qingyu_Zhou1;~Yinghui_Li1;~Yangning_Li1;~Feng_Zhou10;~Hai-Tao_Zheng2", "aff": ";Tsinghua University;OPPO Research Institute;Tsinghua University;Tsinghua University;oppo;Tsinghua University", "aff_domain": ";mail.tsinghua.edu.cn;oppo.com;tsinghua.edu.cn;tsinghua.edu.cn;oppo.com;tsinghua.edu.cn", "position": ";MS student;Researcher;PhD student;PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nhuang2023a,\ntitle={A Frustratingly Easy Plug-and-Play Detection-and-Reasoning Module for Chinese Spelling Check},\nauthor={Haojing Huang and Jingheng Ye and Qingyu Zhou and Yinghui Li and Yangning Li and Feng Zhou and Hai-Tao Zheng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=HvYxdKPqYt}\n}", "github": "", "project": "", "reviewers": "frro;cyLs;iyvk", "site": "https://openreview.net/forum?id=HvYxdKPqYt", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "3;2;4", "reproducibility": "4;3;4", "correctness": "3;2;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0005-9366-4985;0000-0002-4389-1582;;;;0000-0001-5128-5649", "linkedin": ";;;;;https://www.linkedin.cn/incareer/in/ACoAAAZ3yxsBrldyOHYFyES7S-dIaH008xk5NLc;", "aff_unique_index": "0;1;0;0;2;0", "aff_unique_norm": "Tsinghua University;OPPO Research Institute;OPPO", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.oppo.com/en;https://www.oppo.com", "aff_unique_abbr": "THU;OPPO RI;Oppo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "HzecOxOGAS", "title": "KeFVP: Knowledge-enhanced Financial Volatility Prediction", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Financial volatility prediction is vital for indicating a company's risk profile. Transcripts of companies' earnings calls are important unstructured data sources to be utilized to access companies' performance and risk profiles. However, current works ignore the role of financial metrics knowledge (such as EBIT, EPS, and ROI) in transcripts, which is crucial for understanding companies' performance, and little consideration is given to integrating text and price information. In this work, we statistic common financial metrics and make a special dataset based on these metrics. Then, we introduce a knowledge-enhanced financial volatility prediction method (KeFVP) to inject knowledge of financial metrics into text comprehension by knowledge-enhanced adaptive pre-training (KePt) and effectively incorporating text and price information by introducing a conditional time series prediction module. We conduct extensive experiments on three real-world public datasets, and the results indicate that KeFVP is effective and outperforms all the state-of-the-art methods.", "keywords": "Volatility forecasting;Finance;Text mining", "primary_area": "", "supplementary_material": "", "author": "Hao Niu;Yun Xiong;Xiaosu Wang;Wenjing Yu;Yao Zhang;Weizu Yang", "authorids": "~Hao_Niu2;~Yun_Xiong1;~Xiaosu_Wang1;~Wenjing_Yu1;~Yao_Zhang6;~Weizu_Yang1", "gender": ";F;M;F;M;", "homepage": "https://scholar.google.com.sg/citations?user=UHj1UuQAAAAJ&hl=zh-CN;https://dblp.org/pid/67/4330;;https://yiyayybj.github.io/;https://github.com/yzhang1918;", "dblp": "https://dblp.uni-trier.de/pid/06/10116.html?view=by-year;67/4330;;;57/3892-9;", "google_scholar": "https://scholar.google.com.sg/citations?user=UHj1UuQAAAAJ;;;;UwKOx_IAAAAJ;", "or_profile": "~Hao_Niu2;~Yun_Xiong1;~Xiaosu_Wang1;~Wenjing_Yu1;~Yao_Zhang6;~Weizu_Yang1", "aff": "Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;", "position": "PhD student;Full Professor;PhD student;MS student;Postdoc;", "bibtex": "@inproceedings{\nniu2023kefvp,\ntitle={Ke{FVP}: Knowledge-enhanced Financial Volatility Prediction},\nauthor={Hao Niu and Yun Xiong and Xiaosu Wang and Wenjing Yu and Yao Zhang and Weizu Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=HzecOxOGAS}\n}", "github": "", "project": "", "reviewers": "8Q4Z;HZuA;QcTY", "site": "https://openreview.net/forum?id=HzecOxOGAS", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;2;4", "excitement": "3;3;4", "reproducibility": "4;4;4", "correctness": "3;4;4", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-3818-5816;0000-0002-8575-5415;0000-0002-8180-8604;;0000-0003-1481-8826;", "linkedin": ";;;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "I13VHLJjLO", "title": "Reward-Augmented Decoding: Efficient Controlled Text Generation With a Unidirectional Reward Model", "track": "main", "status": "Short Main", "tldr": "", "abstract": "While large language models have proven effective in a huge range of downstream applications, they often generate text that is problematic or lacks a desired attribute. In this paper, we introduce Reward-Augmented Decoding (RAD), a text generation procedure that uses a small unidirectional reward model to encourage a language model to generate text that has certain properties. Specifically, RAD uses the reward model to score generations as they are produced and rescales sampling probabilities to favor high-reward tokens. By using a unidirectional reward model, RAD can cache activations from prior generation steps to decrease computational overhead. Through experiments on generating non-toxic and sentiment-controlled text, we demonstrate that RAD performs best among methods that change only the generation procedure and matches the performance of state-of-the-art methods that involve re-training the language model. We further validate that RAD is effective on very large language models while incurring a minimal computational overhead.", "keywords": "controllable text generation;reward modeling;weighted decoding", "primary_area": "", "supplementary_material": "", "author": "Haikang Deng;Colin Raffel", "authorids": "~Haikang_Deng1;~Colin_Raffel1", "gender": "M;", "homepage": "https://www.linkedin.com/in/haikang-deng-178550179/;http://colinraffel.com", "dblp": "334/0361;149/0082", "google_scholar": "https://scholar.google.com/citations?hl=en;I66ZBYwAAAAJ", "or_profile": "~Haikang_Deng1;~Colin_Raffel1", "aff": "Department of Computer Science, University of North Carolina at Chapel Hill;University of North Carolina, Chapel Hill", "aff_domain": "cs.unc.edu;unc.edu", "position": "Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\ndeng2023rewardaugmented,\ntitle={Reward-Augmented Decoding: Efficient Controlled Text Generation With a Unidirectional Reward Model},\nauthor={Haikang Deng and Colin Raffel},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=I13VHLJjLO}\n}", "github": "", "project": "", "reviewers": "Ekrw;GRYE;U1EG", "site": "https://openreview.net/forum?id=I13VHLJjLO", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "4;3;4", "reproducibility": "4;4;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": "haikang-deng-178550179/;", "aff_unique_index": "0;1", "aff_unique_norm": "University of North Carolina at Chapel Hill;University of North Carolina", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.unc.edu;https://www.unc.edu", "aff_unique_abbr": "UNC Chapel Hill;UNC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Chapel Hill", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "I4BFSevtRv", "title": "Domain Adaptation for Sentiment Analysis Using Robust Internal Representations", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Sentiment analysis is a costly yet necessary task for enterprises to study the opinions of their customers to improve their products and to determine optimal marketing strategies. Due to the existence of a wide range of domains across different products and services, cross-domain sentiment analysis methods have received significant attention. These methods mitigate the domain gap between different applications by training cross-domain generalizable classifiers which relax the need for data annotation for each domain. We develop a domain adaptation method which induces large margins between data representations that belong to different classes in an embedding space. This embedding space is trained to be domain-agnostic by matching the data distributions across the domains. Large interclass margins in the source domain help to reduce the effect of ``domain shift'' in the target domain. Theoretical and empirical analysis are provided to demonstrate that the proposed method is effective.", "keywords": "domain adaptation;sentiment analysis", "primary_area": "", "supplementary_material": "", "author": "Mohammad Rostami;Digbalay Bose;Shrikanth Narayanan;Aram Galstyan", "authorids": "~Mohammad_Rostami1;~Digbalay_Bose2;~Shrikanth_Narayanan1;~Aram_Galstyan1", "gender": "M;M;M;M", "homepage": "https://viterbi.usc.edu/directory/faculty/Rostami/Mohammad;https://digbose92.github.io/;http://sail.usc.edu/people/shri.html;http://www.isi.edu/~galstyan", "dblp": "83/9890;126/2297;19/3899;16/3411", "google_scholar": "Uzx8nLoAAAAJ;https://scholar.google.co.in/citations?user=Jf6gjLUAAAAJ;8EDHmYkAAAAJ;rJTwW0MAAAAJ", "or_profile": "~Mohammad_Rostami1;~Digbalay_Bose2;~Shrikanth_Narayanan1;~Aram_Galstyan1", "aff": "USC/ISI;University of Southern California;University of Southern California;Amazon Alexa", "aff_domain": "isi.edu;usc.edu;usc.edu;amazon.com", "position": "Research Scientist;PhD student;Full Professor;Scholar", "bibtex": "@inproceedings{\nrostami2023domain,\ntitle={Domain Adaptation for Sentiment Analysis Using Robust Internal Representations},\nauthor={Mohammad Rostami and Digbalay Bose and Shrikanth Narayanan and Aram Galstyan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=I4BFSevtRv}\n}", "github": "", "project": "", "reviewers": "Nxo2;Sndy;iCKc", "site": "https://openreview.net/forum?id=I4BFSevtRv", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;2", "excitement": "4;3;2", "reproducibility": "4;4;2", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-1052-6204;", "linkedin": ";digbalay-bose-11146251/;shrikanth-narayanan/;aram-galstyan-4a01373/", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Southern California;Amazon", "aff_unique_dep": ";Amazon Alexa", "aff_unique_url": "https://isi.usc.edu;https://www.amazon.com/alexa", "aff_unique_abbr": "USC;Amazon Alexa", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "ISI;Los Angeles;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "I5BnQIgQIM", "title": "From Parse-Execute to Parse-Execute-Refine: Improving Semantic Parser for Complex Question Answering over Knowledge Base", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Parsing questions into executable logical forms has showed impressive results for knowledge-base question answering (KBQA). However, complex KBQA is a more challenging task that requires to perform complex multi-step reasoning. Recently, a new semantic parser called KoPL has been proposed to explicitly model the reasoning processes, which achieved the state-of-the-art on complex KBQA. In this paper, we further explore how to unlock the reasoning ability of semantic parsers by a simple proposed parse-execute-refine paradigm. We refine and improve the KoPL parser by demonstrating the executed intermediate reasoning steps to the KBQA model. We show that such simple strategy can significantly improve the ability of complex reasoning. Specifically, we propose three components: a parsing stage, an execution stage and a refinement stage, to enhance the ability of complex reasoning. The parser uses the KoPL to generate the transparent logical forms. Then, the execution stage aligns and executes the logical forms over knowledge base to obtain intermediate reasoning processes. Finally, the intermediate step-by-step reasoning processes are demonstrated to the KBQA model in the refinement stage. With the explicit reasoning processes, it is much easier to answer the complex questions. Experiments on benchmark dataset shows that the proposed PER-KBQA performs significantly better than the stage-of-the-art baselines on the complex KBQA.", "keywords": "KBQA;Logical Form;BART", "primary_area": "", "supplementary_material": "", "author": "Wangzhen Guo;Linyin Luo;Hanjiang Lai;Jian Yin", "authorids": "~Wangzhen_Guo1;~Linyin_Luo1;~Hanjiang_Lai3;~Jian_Yin3", "gender": "M;;M;M", "homepage": "https://www.semanticscholar.org/author/Wangzhen-Guo/2187751363;https://www.semanticscholar.org/author/Linyin-Luo/2191083713;;http://sai.sysu.edu.cn/teacher/teacher01/1385356.htm", "dblp": ";;31/9937;95/578-1", "google_scholar": ";;9LkhGDgAAAAJ;", "or_profile": "~Wangzhen_Guo1;~Linyin_Luo1;~Hanjiang_Lai3;~Jian_Yin3", "aff": "SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY", "aff_domain": "sysu.edu.cn;sysu.edu.cn;sysu.edu.cn;sysu.edu.cn", "position": "MS student;Undergrad student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nguo2023from,\ntitle={From Parse-Execute to Parse-Execute-Refine: Improving Semantic Parser for Complex Question Answering over Knowledge Base},\nauthor={Wangzhen Guo and Linyin Luo and Hanjiang Lai and Jian Yin},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=I5BnQIgQIM}\n}", "github": "", "project": "", "reviewers": "poiZ;yo9y;MbeL", "site": "https://openreview.net/forum?id=I5BnQIgQIM", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;5", "excitement": "4;4;3", "reproducibility": "4;5;4", "correctness": "5;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.333333333333333, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Sun Yat-sen University", "aff_unique_dep": "", "aff_unique_url": "http://www.sysu.edu.cn", "aff_unique_abbr": "SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "I5NWLjXbQl", "title": "ACQUIRED: A Dataset for Answering Counterfactual Questions In Real-Life Videos", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Multimodal counterfactual reasoning is a vital yet challenging ability for AI systems. It involves predicting the outcomes of hypothetical circumstances based on vision and language inputs, which enables AI models to learn from failures and explore hypothetical scenarios. Despite its importance, there are only a few datasets targeting the counterfactual reasoning abilities of multimodal models. Among them, they only cover reasoning over synthetic environments or specific types of events (e.g. traffic collisions), making them hard to reliably benchmark the model generalization ability in diverse real-world scenarios and reasoning dimensions. To overcome these limitations, we develop a video question answering dataset, ACQUIRED: it consists of 3.9K annotated videos, encompassing a wide range of event types and incorporating both first and third-person viewpoints, which ensures a focus on real-world diversity. In addition, each video is annotated with questions that span three distinct dimensions of reasoning, including physical, social, and temporal, which can comprehensively evaluate the model counterfactual abilities along multiple aspects. We benchmark our dataset against several state-of-the-art language-only and multimodal models and experimental results demonstrate a significant performance gap (>13%) between models and humans. The findings suggest that multimodal counterfactual reasoning remains an open challenge and ACQUIRED is a comprehensive and reliable benchmark for inspiring future research in this direction.", "keywords": "Counterfactual reasoning;Video Question Answering;Commonsense;Multimodal", "primary_area": "", "supplementary_material": "", "author": "Te-Lin Wu;Zi-Yi Dou;Qingyuan Hu;Yu Hou;Nischal Reddy Chandra;Marjorie Freedman;Ralph M. Weischedel;Nanyun Peng", "authorids": "~Te-Lin_Wu1;~Zi-Yi_Dou1;~Qingyuan_Hu2;~Yu_Hou1;~Nischal_Reddy_Chandra1;~Marjorie_Freedman1;~Ralph_M._Weischedel1;~Nanyun_Peng1", "gender": "M;;F;F;M;;M;F", "homepage": "https://telin0411.github.io/;https://zdou0830.github.io/;;https://houyu0930.github.io/;;https://www.isi.edu/people/mrf/about;;https://violetpeng.github.io/", "dblp": "166/3298;205/8985;;;;93/4232;15/4714;117/4036", "google_scholar": "Q5aezXQAAAAJ;RWogNsEAAAAJ;;2U1WiMMAAAAJ;https://scholar.google.co.in/citations?user=1Hqy7eMAAAAJ;bVN5VwEAAAAJ;guhccUcAAAAJ;XxRXvX0AAAAJ", "or_profile": "~Te-Lin_Wu1;~Zi-Yi_Dou1;~Qingyuan_Hu2;~Yu_Hou1;~Nischal_Reddy_Chandra1;~Marjorie_Freedman1;~Ralph_M._Weischedel1;~Nanyun_Peng1", "aff": "University of California, Los Angeles;University of California, Los Angeles;, University of California, Los Angeles;University of Maryland, College Park;University of California, Los Angeles;USC/ISI;USC/ISI;University of California, Los Angeles", "aff_domain": "cs.ucla.edu;ucla.edu;cs.ucla.edu;umd.edu;cs.ucla.edu;isi.edu;isi.edu;ucla.edu", "position": "PhD student;PhD student;MS student;PhD student;Researcher;Principal Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nwu2023acquired,\ntitle={{ACQUIRED}: A Dataset for Answering Counterfactual Questions In Real-Life Videos},\nauthor={Te-Lin Wu and Zi-Yi Dou and Qingyuan Hu and Yu Hou and Nischal Reddy Chandra and Marjorie Freedman and Ralph M. Weischedel and Nanyun Peng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=I5NWLjXbQl}\n}", "github": "", "project": "", "reviewers": "LAU8;5Kzu;sY4g", "site": "https://openreview.net/forum?id=I5NWLjXbQl", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;5;3", "excitement": "4;3;4", "reproducibility": "4;5;4", "correctness": "3;3;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-2404-7570;;;;", "linkedin": "telinwu/;;qingyuan-carol-hu;yu-hope-hou/;nischandra/;marjorie-freedman-37799722/;;", "aff_unique_index": "0;0;0;1;0;2;2;0", "aff_unique_norm": "University of California, Los Angeles;University of Maryland;University of Southern California", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucla.edu;https://www/umd.edu;https://isi.usc.edu", "aff_unique_abbr": "UCLA;UMD;USC", "aff_campus_unique_index": "0;0;0;1;0;2;2;0", "aff_campus_unique": "Los Angeles;College Park;ISI", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "I5hTganf3z", "title": "VECHR: A Dataset for Explainable and Robust Classification of Vulnerability Type in the European Court of Human Rights", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Recognizing vulnerability is crucial for understanding and implementing targeted support to empower individuals in need. This is especially important at the European Court of Human Rights (ECtHR), where the court adapts Convention standards to meet actual individual needs and thus to ensure effective human rights protection. However, the concept of vulnerability remains elusive at the ECtHR and no prior NLP research has dealt with it. To enable future research in this area, we present VECHR, a novel expert-annotated multi-label dataset comprising of vulnerability type classification and explanation rationale. We benchmark the performance of state-of-the-art models on VECHR from both prediction and explainability perspective. Our results demonstrate the challenging nature of task with lower prediction performance and limited agreement between models and experts. Further, we analyze the robustness of these models in dealing with out-of-domain (OOD) data and observe overall limited performance. Our dataset poses unique challenges offering a significant room for improvement regarding performance, explainability and robustness.", "keywords": "nlp for social good;in legal domain;vulnerability classification;rationale dataset;robustness", "primary_area": "", "supplementary_material": "", "author": "Shanshan Xu;Leon Staufer;Santosh T.Y.S.S;Oana Ichim;Corina Heri;Matthias Grabmair", "authorids": "~Shanshan_Xu1;~Leon_Staufer1;~Santosh_T.Y.S.S1;~Oana_Ichim1;~Corina_Heri1;~Matthias_Grabmair2", "gender": "F;M;M;F;F;M", "homepage": "https://sxu3.github.io/;https://leon.staufer.me/;;https://www.graduateinstitute.ch/discover-institute/oana-ichim;https://www.ius.uzh.ch/en/staff/senior-assistants/pdoc-heri/person.html;https://www.cs.cit.tum.de/lt/team/matthias-grabmair/", "dblp": ";;220/2486;;;09/1651", "google_scholar": "dSDjjCEAAAAJ;bRv4XPAAAAAJ;aYytWsAAAAAJ;;;MroPEGsAAAAJ", "or_profile": "~Shanshan_Xu1;~Leon_Staufer1;~Santosh_T.Y.S.S1;~Oana_Ichim1;~Corina_Heri1;~Matthias_Grabmair2", "aff": "Technische Universit\u00e4t M\u00fcnchen;Technische Universit\u00e4t M\u00fcnchen;Adobe Systems;University of Massachusetts at Amherst;University of Zurich;Technische Universit\u00e4t M\u00fcnchen", "aff_domain": "tum.de;tum.de;adobe.com;umass.edu;uzh.ch;tum.de", "position": "PhD student;Undergrad student;Research Intern;Postdoc;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nxu2023vechr,\ntitle={{VECHR}: A Dataset for Explainable and Robust Classification of Vulnerability Type in the European Court of Human Rights},\nauthor={Shanshan Xu and Leon Staufer and Santosh T.Y.S.S and Oana Ichim and Corina Heri and Matthias Grabmair},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=I5hTganf3z}\n}", "github": "", "project": "", "reviewers": "6aqx;DQPj;tTjb;S2UF", "site": "https://openreview.net/forum?id=I5hTganf3z", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;3;3;4", "excitement": "4;3;4;4", "reproducibility": "4;3;4;3", "correctness": "4;3;4;4", "rating_avg": 4.0, "confidence_avg": 3.5, "excitement_avg": 3.75, "reproducibility_avg": 3.5, "correctness_avg": 3.75, "replies_avg": 13, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0005-1203-1634;0000-0002-2938-4090;;;;", "linkedin": ";https://linkedin.com/in/leonstaufer;;;;matthias-grabmair-38216350/", "aff_unique_index": "0;0;1;2;3;0", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Adobe;University of Massachusetts Amherst;University of Zurich", "aff_unique_dep": ";Adobe Systems Incorporated;;", "aff_unique_url": "https://www.tum.de;https://www.adobe.com;https://www.umass.edu;https://www.unizh.ch", "aff_unique_abbr": "TUM;Adobe;UMass Amherst;UZH", "aff_campus_unique_index": "1", "aff_campus_unique": ";Amherst", "aff_country_unique_index": "0;0;1;1;2;0", "aff_country_unique": "Germany;United States;Switzerland" }, { "id": "I8VTNsq5eB", "title": "CESAR: Automatic Induction of Compositional Instructions for Multi-turn Dialogs", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Instruction-based multitasking has played a critical role in the success of large language models (LLMs) in multi-turn dialog applications. While publicly available LLMs have shown promising performance, when exposed to complex instructions with multiple constraints, they lag against state-of-the-art models like ChatGPT. In this work, we hypothesize that the availability of large-scale complex demonstrations is crucial in bridging this gap. Focusing on dialog applications, we propose a novel framework, CESAR, that unifies a large number of dialog tasks in the same format and allows programmatic induction of complex instructions without any manual effort.\n\nWe apply CESAR on InstructDial, a benchmark for instruction-based dialog tasks. We further enhance InstructDial with new datasets and tasks and utilize CESAR to induce complex tasks with compositional instructions. This results in a new benchmark called InstructDial++, which includes 63 datasets with 86 basic tasks and 68 composite tasks. Through rigorous experiments, we demonstrate the scalability of CESAR in providing rich instructions. Models trained on InstructDial++ can follow compositional prompts, such as prompts that ask for multiple stylistic constraints.", "keywords": "Instruction Tuning;Open Domain Dialog;Controlled text generation;Task unification;Unified grounding;Compositional learning;Compositional instructions;CESAR", "primary_area": "", "supplementary_material": "", "author": "Taha Aksu;Devamanyu Hazarika;Shikib Mehri;Seokhwan Kim;Dilek Hakkani-Tur;Yang Liu;Mahdi Namazifar", "authorids": "~Taha_Aksu1;~Devamanyu_Hazarika1;~Shikib_Mehri1;~Seokhwan_Kim2;~Dilek_Hakkani-Tur1;~Yang_Liu6;~Mahdi_Namazifar1", "gender": "M;M;M;M;F;M;F", "homepage": "https://devamanyu.com;http://shikib.com/;http://seokhwankim.github.io/;;https://siebelschool.illinois.edu/about/people/faculty/dilek;https://cuthalionn.github.io/;", "dblp": "188/5874;212/0069;02/2980;;h/DilekZHakkaniTur;286/8684.html;51/3710-4", "google_scholar": "nUCWRZAAAAAJ;H_OmqikAAAAJ;ygG3iXQAAAAJ;;GMcL_9kAAAAJ;9-rgPvgAAAAJ;w90wOucAAAAJ", "or_profile": "~Devamanyu_Hazarika1;~Shikib_Mehri1;~Seokhwan_Kim2;~Mahdi_Namazifar1;~Dilek_Hakkani_Tur1;~Ibrahim_Taha_Aksu1;~Yang_Liu60", "aff": "Amazon Alexa AI;Amazon;Amazon;Amazon;Amazon;National University of Singapore;Amazon", "aff_domain": "amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;nus.edu.sg;amazon.com", "position": "Researcher;Researcher;Principal Scientist;Researcher;Snr Principal Scientist;PhD student;Principal Researcher", "bibtex": "@inproceedings{\naksu2023cesar,\ntitle={{CESAR}: Automatic Induction of Compositional Instructions for Multi-turn Dialogs},\nauthor={Taha Aksu and Devamanyu Hazarika and Shikib Mehri and Seokhwan Kim and Dilek Hakkani-Tur and Yang Liu and Mahdi Namazifar},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=I8VTNsq5eB}\n}", "github": "", "project": "", "reviewers": "5Uzf;8BXP;dek5;gUjh;Lkwr", "site": "https://openreview.net/forum?id=I8VTNsq5eB", "pdf_size": 0, "rating": "4;4;4;4;4", "confidence": "3;3;4;2;4", "excitement": "3;4;3;4;4", "reproducibility": "3;4;2;3;4", "correctness": "3;2;2;4;4", "rating_avg": 4.0, "confidence_avg": 3.2, "excitement_avg": 3.6, "reproducibility_avg": 3.2, "correctness_avg": 3.0, "replies_avg": 15, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-0241-7163;;;;0000-0001-5246-2117;;", "linkedin": "devamanyu/;shikib/;;namazifar/;dilek-hakkani-tur-9517543/;ibrahim-taha-aksu-b86647114/;yang-liu-8555143/", "aff_unique_index": "0;0;0;0;0;1;0", "aff_unique_norm": "Amazon;National University of Singapore", "aff_unique_dep": "Amazon Alexa AI;", "aff_unique_url": "https://www.amazon.com;https://www.nus.edu.sg", "aff_unique_abbr": "Amazon;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;0", "aff_country_unique": "United States;Singapore" }, { "id": "I9DVeu8XKa", "title": "CodeFusion: A Pre-trained Diffusion Model for Code Generation", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Imagine a developer who can only change their last line of code\u2014how often would they have to start writing a function from scratch before it is correct? Auto-regressive models for code generation from natural language have a similar limitation: they do not easily allow reconsidering earlier tokens generated. We introduce CodeFusion, a pre-trained diffusion code generation model that addresses this limitation by iteratively denoising a complete program conditioned on the encoded natural language. We evaluate CodeFusion on the task of natural language to code generation for Bash, Python, and Microsoft Excel conditional formatting (CF) rules. Experiments show that CodeFusion (75M parameters) performs on par with state-of-the-art auto-regressive systems (350M-175B parameters) in top-1 accuracy and outperforms them in top-3 and top-5 accuracy due to its better balance in diversity versus quality.", "keywords": "Text-to-code generation;Diffusion models;Program synthesis;Language models", "primary_area": "", "supplementary_material": "", "author": "Mukul Singh;Jos\u00e9 Cambronero;Sumit Gulwani;Vu Le;Carina Suzana Negreanu;Gust Verbruggen", "authorids": "~Mukul_Singh1;~Jos\u00e9_Cambronero1;~Sumit_Gulwani1;~Vu_Le2;~Carina_Suzana_Negreanu1;~Gust_Verbruggen1", "gender": "M;M;M;M;F;M", "homepage": "https://www.microsoft.com/research/people/singhmukul;https://www.josecambronero.com;https://www.microsoft.com/en-us/research/people/sumitg/;https://www.vuminhle.com/;;", "dblp": "291/1609;;g/SumitGulwani;00/2651-2.html;276/1629;", "google_scholar": "3O7KjiIAAAAJ;;fZinJ_AAAAAJ;mijlpU4AAAAJ;63f9xyYAAAAJ;TmU3sKMAAAAJ", "or_profile": "~Mukul_Singh1;~Jos\u00e9_Cambronero1;~Sumit_Gulwani1;~Vu_Le2;~Carina_Suzana_Negreanu1;~Gust_Verbruggen1", "aff": "Microsoft;Microsoft;Microsoft Research;Microsoft;Microsoft;KU Leuven", "aff_domain": "microsoft.com;microsoft.com;research.microsoft.com;microsoft.com;microsoft.com;kuleuven.be", "position": "Researcher;Researcher;Researcher;Researcher;Researcher;PhD student", "bibtex": "@inproceedings{\nsingh2023codefusion,\ntitle={CodeFusion: A Pre-trained Diffusion Model for Code Generation},\nauthor={Mukul Singh and Jos{\\'e} Cambronero and Sumit Gulwani and Vu Le and Carina Suzana Negreanu and Gust Verbruggen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=I9DVeu8XKa}\n}", "github": "", "project": "", "reviewers": "XjFk;ADn5;9CNw", "site": "https://openreview.net/forum?id=I9DVeu8XKa", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;4", "excitement": "4;4;3", "reproducibility": "5;4;3", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-9510-4512;;0000-0002-9226-9634;0000-0003-3727-3291;;", "linkedin": "mukulsingh105/;;sumit-gulwani/;;;", "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Microsoft;Katholieke Universiteit Leuven", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://www.kuleuven.be", "aff_unique_abbr": "Microsoft;KU Leuven", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "United States;Belgium" }, { "id": "IEH9YsR5Ty", "title": "mAggretriever: A Simple yet Effective Approach to Zero-Shot Multilingual Dense Retrieval", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Multilingual information retrieval (MLIR) is a crucial yet challenging task due to the need for human annotations in multiple languages, making training data creation labor-intensive. \nIn this paper, we introduce mAggretriever, which effectively leverages semantic and lexical features from pre-trained multilingual transformers (e.g., mBERT and XLM-R) for dense retrieval. \nTo enhance training and inference efficiency, we employ approximate masked-language modeling prediction for computing lexical features, reducing 70--85\\% GPU memory requirement for mAggretriever fine-tuning. \nEmpirical results demonstrate that mAggretriever, fine-tuned solely on English training data, surpasses existing state-of-the-art multilingual dense retrieval models that undergo further training on large-scale MLIR training data.\nOur code is available at url.", "keywords": "Multilingual Dense Retrieval;Zero-Shot Language Transferability;Lexical and Semantic Matching", "primary_area": "", "supplementary_material": "", "author": "Sheng-Chieh Lin;Amin Ahmad;Jimmy Lin", "authorids": "~Sheng-Chieh_Lin1;~Amin_Ahmad1;~Jimmy_Lin2", "gender": "M;Not Specified;", "homepage": "https://jacklin64.github.io/about_me/;;https://cs.uwaterloo.ca/~jimmylin/", "dblp": "61/10361;;00/7739", "google_scholar": "https://scholar.google.com/citations?hl=en;;", "or_profile": "~Sheng-Chieh_Lin1;~Amin_Ahmad1;~Jimmy_Lin2", "aff": "Meta Platforms, Inc.;Vectara;University of Waterloo", "aff_domain": "meta.com;vectara.com;waterloo.ca", "position": "Intern;Researcher;Full Professor", "bibtex": "@inproceedings{\nlin2023maggretriever,\ntitle={mAggretriever: A Simple yet Effective Approach to Zero-Shot Multilingual Dense Retrieval},\nauthor={Sheng-Chieh Lin and Amin Ahmad and Jimmy Lin},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=IEH9YsR5Ty}\n}", "github": "", "project": "", "reviewers": "MRVN;rgyC;xtfQ", "site": "https://openreview.net/forum?id=IEH9YsR5Ty", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;3;3", "reproducibility": "3;4;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-7989-9703;;", "linkedin": "jack-lin-716a61127/;aminahmad/;", "aff_unique_index": "0;1;2", "aff_unique_norm": "Meta;Vectara;University of Waterloo", "aff_unique_dep": "Meta Platforms, Inc.;;", "aff_unique_url": "https://www.meta.com;;https://uwaterloo.ca", "aff_unique_abbr": "Meta;;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;2", "aff_country_unique": "United States;;Canada" }, { "id": "IFNbElsnCi", "title": "Generating Summaries with Controllable Readability Levels", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Readability refers to how easily a reader can understand a written text. Several factors affect the readability level, such as the complexity of the text, its subject matter, and the reader's background knowledge. Generating summaries based on different readability levels is critical for enabling knowledge consumption by diverse audiences. However, current text generation approaches lack refined control, resulting in texts that are not customized to readers' proficiency levels. In this work, we bridge this gap and study techniques to generate summaries at specified readability levels. Unlike previous methods that focus on a specific readability level (e.g., lay summarization), we generate summaries with fine-grained control over their readability. We develop three text generation techniques for controlling readability: (1) instruction-based readability control, (2) reinforcement learning to minimize the gap between requested and observed readability and (3) a decoding approach that uses lookahead to estimate the readability of upcoming decoding steps. We show that our generation methods significantly improve readability control on news summarization (CNN/DM dataset), as measured by various readability metrics and human judgement, establishing strong baselines for controllable readability in summarization.", "keywords": "readability;abstractive summarization;controllable text generation", "primary_area": "", "supplementary_material": "", "author": "Leonardo F. R. Ribeiro;Mohit Bansal;Markus Dreyer", "authorids": "~Leonardo_F._R._Ribeiro1;~Mohit_Bansal2;~Markus_Dreyer1", "gender": "M;;M", "homepage": "https://www.cs.unc.edu/~mbansal/;https://markusdreyer.org/;http://leoribeiro.github.io/", "dblp": "32/5243.html;37/4227;245/8769", "google_scholar": "DN8QtscAAAAJ;0a1AxxQAAAAJ;https://scholar.google.com.br/citations?user=92j4_4wAAAAJ", "or_profile": "~Mohit_Bansal2;~Markus_Dreyer1;~Leonardo_Filipe_Rodrigues_Ribeiro1", "aff": "University of North Carolina at Chapel Hill;Amazon;Amazon", "aff_domain": "unc.edu;amazon.com;amazon.com", "position": "Full Professor;Principal Researcher;Applied Scientist", "bibtex": "@inproceedings{\nribeiro2023generating,\ntitle={Generating Summaries with Controllable Readability Levels},\nauthor={Leonardo F. R. Ribeiro and Mohit Bansal and Markus Dreyer},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=IFNbElsnCi}\n}", "github": "", "project": "", "reviewers": "NGJK;MNBZ;RnEB", "site": "https://openreview.net/forum?id=IFNbElsnCi", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;5;4", "excitement": "4;4;3", "reproducibility": "3;4;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-2639-942X", "linkedin": ";mdreyer/;leonardofribeiro/", "aff_unique_index": "0;1;1", "aff_unique_norm": "University of North Carolina;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.unc.edu;https://www.amazon.com", "aff_unique_abbr": "UNC;Amazon", "aff_campus_unique_index": "0", "aff_campus_unique": "Chapel Hill;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "IIfdKVyeVh", "title": "Vicarious Offense and Noise Audit of Offensive Speech Classifiers: Unifying Human and Machine Disagreement on What is Offensive", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Offensive speech detection is a key component of content moderation. However, what is offensive can be highly subjective. This paper investigates how machine and human moderators disagree on what is offensive when it comes to real-world social web political discourse. We show that (1) there is extensive disagreement among the moderators (humans and machines); and (2) human and large-language-model classifiers are unable to predict how other human raters will respond, based on their political leanings. For (1), we conduct a ***noise audit*** at an unprecedented scale that combines both machine and human responses. For (2), we introduce a first-of-its-kind dataset of ***vicarious offense***. Our noise audit reveals that moderation outcomes vary wildly across different machine moderators. Our experiments with human moderators suggest that political leanings combined with sensitive issues affect both first-person and vicarious offense. The dataset is available through https://github.com/Homan-Lab/voiced.", "keywords": "human annotation;fairness;noise audit", "primary_area": "", "supplementary_material": "", "author": "Tharindu Cyril Weerasooriya;Sujan Dutta;Tharindu Ranasinghe;Marcos Zampieri;Christopher M Homan;Ashiqur R. KhudaBukhsh", "authorids": "~Tharindu_Cyril_Weerasooriya1;~Sujan_Dutta1;~Tharindu_Ranasinghe1;~Marcos_Zampieri1;~Christopher_M_Homan1;~Ashiqur_R._KhudaBukhsh1", "gender": "M;M;M;;M;M", "homepage": "https://cyrilw.com;;https://tharindu.co.uk/;https://mzampieri.com/;https://www.cs.cmu.edu/~akhudabu/;https://www.cs.rit.edu/~cmh/", "dblp": "261/3085;325/1366.html;242/4755;47/7983;29/7442;h/VPless", "google_scholar": ";https://scholar.google.ca/citations?user=Fy5SnlgAAAAJ;https://scholar.google.co.uk/citations?user=9t7WhIIAAAAJ;https://scholar.google.com.au/citations?user=vAx7VsoAAAAJ;mWyMp38AAAAJ;https://scholar.google.com.tw/citations?user=kU6puLcAAAAJ", "or_profile": "~Tharindu_Cyril_Weerasooriya1;~Sujan_Dutta1;~Tharindu_Ranasinghe1;~Marcos_Zampieri1;~Ashiqur_R._KhudaBukhsh1;~Christopher_Homan1", "aff": "Amazon;Rochester Institute of Technology;Aston University;George Mason University;Rochester Institute of Technology;Rochester Institute of Technology", "aff_domain": "amazon.com;rit.edu;aston.ac.uk;gmu.edu;rit.edu;rit.edu", "position": "Intern;PhD student;Lecturer;Assistant Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nweerasooriya2023vicarious,\ntitle={Vicarious Offense and Noise Audit of Offensive Speech Classifiers: Unifying Human and Machine Disagreement on What is Offensive},\nauthor={Tharindu Cyril Weerasooriya and Sujan Dutta and Tharindu Ranasinghe and Marcos Zampieri and Christopher M Homan and Ashiqur R. KhudaBukhsh},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=IIfdKVyeVh}\n}", "github": "", "project": "", "reviewers": "G1Gg;Vd66;Gr8n", "site": "https://openreview.net/forum?id=IIfdKVyeVh", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;4;4", "reproducibility": "4;3;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-4647-3164;;0000-0003-3207-3821;;;", "linkedin": "cyrilw/;;tharinduranasinghe/;;;christopher-homan-039862/", "aff_unique_index": "0;1;2;3;1;1", "aff_unique_norm": "Amazon;Rochester Institute of Technology;Aston University;George Mason University", "aff_unique_dep": "Amazon.com, Inc.;;;", "aff_unique_url": "https://www.amazon.com;https://www.rit.edu;https://www.aston.ac.uk;https://www.gmu.edu", "aff_unique_abbr": "Amazon;RIT;Aston;GMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "IKz1dWj0I5", "title": "Physician Detection of Clinical Harm in Machine Translation: Quality Estimation Aids in Reliance and Backtranslation Identifies Critical Errors", "track": "main", "status": "Long Main", "tldr": "", "abstract": "A major challenge in the practical use of Machine Translation (MT) is that users lack information on translation quality to make informed decisions about how to rely on outputs. Progress in quality estimation research provides techniques to automatically assess MT quality, but these techniques have primarily been evaluated in vitro by comparison against human judgments outside of a specific context of use. This paper evaluates quality estimation feedback in vivo with a human study in realistic high-stakes medical settings. Using Emergency Department discharge instructions, we study how interventions based on quality estimation versus backtranslation assist physicians in deciding whether to show MT outputs to a patient. We find that quality estimation improves appropriate reliance on MT, but backtranslation helps physicians detect more clinically harmful errors that QE alone often misses.", "keywords": "medical machine translation;clinical harm;human-centered NLP", "primary_area": "", "supplementary_material": "", "author": "Nikita Mehandru;Sweta Agrawal;Yimin Xiao;Ge Gao;Elaine C Khoong;Marine Carpuat;Niloufar Salehi", "authorids": "~Nikita_Mehandru1;~Sweta_Agrawal1;~Yimin_Xiao2;~Ge_Gao3;~Elaine_C_Khoong1;~Marine_Carpuat1;~Niloufar_Salehi1", "gender": ";F;;;;F;F", "homepage": ";https://sweta20.github.io/;;;https://profiles.ucsf.edu/elaine.khoong;http://www.cs.umd.edu/~marine/;https://niloufar.org", "dblp": ";210/7863.html;;16/7040;;71/1827;", "google_scholar": ";Avsw9IkAAAAJ;;xBfgUNoAAAAJ;s1i8Y9MAAAAJ;iPAX6jcAAAAJ;E0Zbuu8AAAAJ", "or_profile": "~Nikita_Mehandru1;~Sweta_Agrawal1;~Yimin_Xiao2;~Ge_Gao3;~Elaine_C_Khoong1;~Marine_Carpuat1;~Niloufar_Salehi1", "aff": ";University of Maryland, College Park;;University of Maryland, College Park;University of California, San Francisco;University of Maryland, College Park;University of California, Berkeley", "aff_domain": ";umd.edu;;umd.edu;ucsf.edu;umd.edu;berkeley.edu", "position": ";PhD student;;Assistant Professor;Assistant Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nmehandru2023physician,\ntitle={Physician Detection of Clinical Harm in Machine Translation: Quality Estimation Aids in Reliance and Backtranslation Identifies Critical Errors},\nauthor={Nikita Mehandru and Sweta Agrawal and Yimin Xiao and Ge Gao and Elaine C Khoong and Marine Carpuat and Niloufar Salehi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=IKz1dWj0I5}\n}", "github": "", "project": "", "reviewers": "6psY;fR6t;m7Bm", "site": "https://openreview.net/forum?id=IKz1dWj0I5", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;4", "excitement": "3;4;5", "reproducibility": "3;5;3", "correctness": "4;4;5", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-2514-3572;;", "linkedin": ";;;;;;", "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "University of Maryland;University of California, San Francisco;University of California, Berkeley", "aff_unique_dep": ";;", "aff_unique_url": "https://www/umd.edu;https://www.ucsf.edu;https://www.berkeley.edu", "aff_unique_abbr": "UMD;UCSF;UC Berkeley", "aff_campus_unique_index": "0;0;1;0;2", "aff_campus_unique": "College Park;San Francisco;Berkeley", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "ILQnct9H4H", "title": "TRIGO: Benchmarking Formal Mathematical Proof Reduction for Generative Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Automated theorem proving (ATP) has become an appealing domain for exploring the reasoning ability of the recent successful \ngenerative language models. However, current ATP benchmarks are mainly focus on symbolic inference, but rarely involve the understanding of complex number combination reasoning. In this work, we propose TRIGO, an ATP benchmark that not only requires a model to reduce a trigonometric expression with step-by-step proof but also evaluates a generative LM's reasoning ability on formulas and capability to manipulate, group, and factor number terms. We gather trigonometric expressions and their reduced forms from web, annotate the simplification process manually, and translate it into the ``Lean'' formal language system. We then automatically generate additional examples from the annotated samples to expand the dataset. Furthermore, we also create three automatically generated training and testing datasets of varying difficulty and distributions. Our extensive experiments show our proposed TRIGO poses a new challenge for advanced generative LM's including GPT-4 which is pre-trained on a considerable amount of open-source formal theorem-proving language data, and provide a new tool to study the generative LM's ability on both formal and mathematical reasoning.", "keywords": "Trigonometric Expression Reduction;Automated Theorem Proving;Formal Mathematical Proof Reduction;Complex Number Combination Reasoning;Generative Language Models", "primary_area": "", "supplementary_material": "", "author": "Jing Xiong;Jianhao Shen;Ye Yuan;Haiming Wang;Yichun Yin;Zhengying Liu;Lin Li;Zhijiang Guo;Qingxing Cao;Yinya Huang;Chuanyang Zheng;Xiaodan Liang;Ming Zhang;Qun Liu", "authorids": "~Jing_Xiong4;~Jianhao_Shen1;~Ye_Yuan12;~Haiming_Wang1;~Yichun_Yin2;~Zhengying_Liu2;~Lin_Li13;~Zhijiang_Guo2;~Qingxing_Cao1;~Yinya_Huang1;~Chuanyang_Zheng3;~Xiaodan_Liang2;~Ming_Zhang5;~Qun_Liu1", "gender": "M;M;M;M;M;M;M;;M;F;F;M;M;", "homepage": ";https://github.com/yuanyehome;;;;https://cartus.github.io/;;https://eleanor-h.github.io/;https://chuanyang-zheng.github.io/;https://www.sysu-hcp.net/;https://cs.pku.edu.cn/info/1080/1371.htm;http://liuquncn.github.io/;https://menik1126.github.io/;", "dblp": "217/2324;33/6315-16;97/604;180/5934;241/1782;43/6147;149/7615;282/1562;;;73/1844-4;75/4402-1;;", "google_scholar": "9fppVAUAAAAJ;h8WQaTkAAAAJ;zDPqP6AAAAAJ;x3Mz21gAAAAJ;http:// DFme0joAAAAJ;8b-u3icAAAAJ;flOBrd8AAAAJ;dWStaRIAAAAJ;LWwh7K4AAAAJ;voxznZAAAAAJ;LbzoQBsAAAAJ;2HhiGzcAAAAJ;https://scholar.google.com.hk/citations?user=dFX1hXkAAAAJ;https://scholar.google.com/citations?view_op=list_works", "or_profile": "~Jianhao_Shen1;~Ye_Yuan12;~Haiming_Wang1;~Yichun_Yin2;~Zhengying_Liu2;~Zhijiang_Guo2;~Qingxing_Cao1;~Yinya_Huang1;~Chuanyang_Zheng3;~Xiaodan_Liang2;~Ming_Zhang5;~Qun_Liu1;~jing_xiong3;~Linxuan_Li1", "aff": "Peking University;Peking University;SUN YAT-SEN UNIVERSITY;Huawei Noah's Ark Lab;Huawei Technologies Ltd.;University of Cambridge;SUN YAT-SEN UNIVERSITY, Tsinghua University;SUN YAT-SEN UNIVERSITY;The Chinese University of Hong Kong;SUN YAT-SEN UNIVERSITY;Peking University;Huawei Noah's Ark Lab;Sun Yat-Sen University;", "aff_domain": "pku.edu.cn;pku.edu.cn;sysu.edu.cn;huawei.com;huawei.com;cam.ac.uk;sysu.edu.cn;sysu.edu.cn;cse.cuhk.edu.hk;sysu.edu.cn;pku.edu.cn;huawei.com;sysu.edu.cn;", "position": "PhD student;PhD student;PhD student;Researcher;Researcher;Postdoc;Postdoc;PhD student;PhD student;Associate Professor;Full Professor;Chief Scientist of Speech and Language Computing;MS student;", "bibtex": "@inproceedings{\nxiong2023trigo,\ntitle={{TRIGO}: Benchmarking Formal Mathematical Proof Reduction for Generative Language Models},\nauthor={Jing Xiong and Jianhao Shen and Ye Yuan and Haiming Wang and Yichun Yin and Zhengying Liu and Lin Li and Zhijiang Guo and Qingxing Cao and Yinya Huang and Chuanyang Zheng and Xiaodan Liang and Ming Zhang and Qun Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ILQnct9H4H}\n}", "github": "", "project": "", "reviewers": "ET7v;W12Q;Gc1T", "site": "https://openreview.net/forum?id=ILQnct9H4H", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;2;3", "excitement": "4;4;4", "reproducibility": "3;4;2", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 9, "authors#_avg": 14, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;0000-0002-0686-0832;;;0000-0002-9809-3430;0000-0002-7000-1792;0000-0003-2986-6978;", "linkedin": ";%E9%87%8E-%E8%A2%81-0641241a4/;;;;;;;;;;qunliu/;;", "aff_unique_index": "0;0;1;2;2;3;1;1;4;1;0;2;1", "aff_unique_norm": "Peking University;Sun Yat-sen University;Huawei;University of Cambridge;Chinese University of Hong Kong", "aff_unique_dep": ";;Noah's Ark Lab;;", "aff_unique_url": "http://www.pku.edu.cn;http://www.sysu.edu.cn;https://www.huawei.com;https://www.cam.ac.uk;https://www.cuhk.edu.hk", "aff_unique_abbr": "Peking U;SYSU;Huawei;Cambridge;CUHK", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Cambridge;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;1;0;0;0;0;0;0;0", "aff_country_unique": "China;United Kingdom" }, { "id": "ILxXKWHkIB", "title": "BioFEG: Generate Latent Features for Biomedical Entity Linking", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Biomedical entity linking is an essential task in biomedical text processing, which aims to map entity mentions in biomedical text, such as clinical notes, to standard terms in a given knowledge base. However, this task is challenging due to the rarity of many biomedical entities in real-world scenarios, which often leads to a lack of annotated data for them. Limited by understanding these unseen entities, traditional biomedical entity linking models suffer from multiple types of linking errors. In this paper, we propose a novel latent feature generation framework BioFEG to address these challenges. Specifically, our BioFEG leverages domain knowledge to train a generative adversarial network, which generates latent semantic features of corresponding mentions for unseen entities. Utilizing these features, we fine-tune our entity encoder to capture fine-grained coherence information of unseen entities and better understand them. This allows models to make linking decisions more accurately, particularly for ambiguous mentions involving rare entities. Extensive experiments on the two benchmark datasets demonstrate the superiority of our proposed framework.", "keywords": "Biomedical entity linking;Unseen entities", "primary_area": "", "supplementary_material": "", "author": "Xuhui Sui;Ying Zhang;Xiangrui Cai;Kehui Song;Baohang Zhou;Xiaojie Yuan;Wensheng Zhang", "authorids": "~Xuhui_Sui1;~Ying_Zhang7;~Xiangrui_Cai1;~Kehui_Song1;~Baohang_Zhou1;~Xiaojie_Yuan1;~Wensheng_Zhang5", "gender": ";F;M;F;M;;M", "homepage": "https://www.linkedin.com/in/%E6%97%AD%E8%BE%89-%E9%9A%8B-0305b334b/;https://dbis.nankai.edu.cn/2023/0322/c12139a506904/page.htm;https://dbis.nankai.edu.cn/2023/0322/c12139a506911/page.htm;;https://scholar.google.com/citations?user=U_-raXAAAAAJ;https://dbis.nankai.edu.cn/2023/0322/c12139a506919/page.htm;https://people.ucas.ac.cn/~wenshengzhang", "dblp": "321/6900.html;13/6769-15;137/0504;197/1051.html;284/1471.html;79/2280;94/6627-2.html/", "google_scholar": ";;Y9vuweEAAAAJ;;U_-raXAAAAAJ;;", "or_profile": "~Xuhui_Sui1;~Ying_Zhang7;~Xiangrui_Cai1;~Kehui_Song1;~Baohang_Zhou1;~Xiaojie_Yuan1;~Wensheng_Zhang5", "aff": "Nankai University;Nankai University;Nankai University;Nankai University;Nankai University;Nankai University;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "nankai.edu.cn;nankai.edu.cn;nankai.edu.cn;nankai.edu.cn;nankai.edu.cn;nankai.edu.cn;ia.ac.cn", "position": "PhD student;Full Professor;Associate Professor;Postdoc;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nsui2023biofeg,\ntitle={Bio{FEG}: Generate Latent Features for Biomedical Entity Linking},\nauthor={Xuhui Sui and Ying Zhang and Xiangrui Cai and Kehui Song and Baohang Zhou and Xiaojie Yuan and Wensheng Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ILxXKWHkIB}\n}", "github": "", "project": "", "reviewers": "msFt;v9F6;gxqK", "site": "https://openreview.net/forum?id=ILxXKWHkIB", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;3;3", "excitement": "4;3;4", "reproducibility": "4;4;4", "correctness": "5;4;5", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.666666666666667, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-5386-9912;0000-0003-4906-5828;;;;0000-0002-5876-6856;0000-0003-0752-941X", "linkedin": ";;;;;;", "aff_unique_index": "0;0;0;0;0;0;1", "aff_unique_norm": "Nankai University;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Automation", "aff_unique_url": "http://www.nankai.edu.cn;http://www.ia.cas.cn", "aff_unique_abbr": "NKU;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "IPPURnxK2S", "title": "Improving generalization in large langue model by learning prefix subspaces", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "This article focuses on large language models (LLMs) fine-tuning in the scarce data regime (also known as \"few-shot learning setting\"). We propose a method to increase the generalization capabilities of LLMs based on neural network subspaces. \n\nThis optimization method, recently introduced in computer vision, aims to improve model generalization by identifying wider local optima through the joint optimization of an entire simplex of models in parameter space.\n\nAlthough this property would be highly beneficial in the context of training large language models in the \u201cfew-shot learning\u201d setting, its adaptation to massive, pretrained transformers poses some challenges. First, their considerable number of parameters make it difficult to train several model jointly, and second, their deterministic parameter initialisation schemes make them unfit to the subspace method as originaly proposed.\nWe show in this paper that its application to \"Parameter Efficient Fine-Tuning\" (PEFT) methods, however, is relatively natural, and we propose to apply it to prefix-tuning, by learning entire simplexes of continous prefixes. \n\nWe test our method on a variant of the GLUE benchmark adapted to the few-shot learning setting, and show that both our contributions (learning prefix simplexes, and non-deterministic validation metric inference) jointly lead to a gain in average performances compared to state of the art methods.", "keywords": "Deep learning;parameter efficient fine-tuning;prefix-tuning;subspace learning;natural language processing", "primary_area": "", "supplementary_material": "", "author": "LOUIS FALISSARD;Vincent Guigue;Laure Soulier", "authorids": "~LOUIS_FALISSARD1;~Vincent_Guigue1;~Laure_Soulier1", "gender": "M;M;", "homepage": ";https://vguigue.github.io;", "dblp": ";;", "google_scholar": ";VvFT0nAAAAAJ;", "or_profile": "~LOUIS_FALISSARD1;~Vincent_Guigue1;~Laure_Soulier1", "aff": "Universit\u00e9 Pierre et Marie Curie - Paris 6, Sorbonne Universit\u00e9 - Facult\u00e9 des Sciences (Paris VI);AgroParisTech;", "aff_domain": "isir.upmc.fr;agroparistech.fr;", "position": "Postdoc;Full Professor;", "bibtex": "@inproceedings{\nfalissard2023improving,\ntitle={Improving generalization in large langue model by learning prefix subspaces},\nauthor={LOUIS FALISSARD and Vincent Guigue and Laure Soulier},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=IPPURnxK2S}\n}", "github": "", "project": "", "reviewers": "bmEc;1Q35;vny6", "site": "https://openreview.net/forum?id=IPPURnxK2S", "pdf_size": 0, "rating": "4;4;4", "confidence": "1;4;3", "excitement": "2;5;2", "reproducibility": "4;3;4", "correctness": "3;5;2", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-5461-8330;;", "linkedin": ";;", "aff_unique_index": "0;1", "aff_unique_norm": "Universit\u00e9 Pierre et Marie Curie - Paris 6;AgroParisTech", "aff_unique_dep": "Facult\u00e9 des Sciences;", "aff_unique_url": "https://www.upmc.fr;https://www.agroparistech.fr", "aff_unique_abbr": "UPMC;AgroParisTech", "aff_campus_unique_index": "0", "aff_campus_unique": "Paris;", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "id": "IRUGqnZQwt", "title": "Diversifying language models for lesser-studied languages and language-usage contexts: A case of second language Korean", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "This study investigates the extent to which currently available morpheme parsers/taggers apply to lesser-studied languages and language-usage contexts, with a focus on second language (L2) Korean. We pursue this inquiry by (1) training a neural-network model (pre-trained on first language [L1] Korean data) on varying L2 datasets and (2) measuring its morpheme parsing/POS tagging performance on L2 test sets from both the same and different sources of the L2 train sets. Results show that the L2 trained models generally excel in domain-specific tokenization and POS tagging compared to the L1 pre-trained baseline model. Interestingly, increasing the size of the L2 training data does not lead to improving model performance consistently.", "keywords": "Multilinguality;DEI;NLP applications;L2 Korean;Morpheme parsing/tagging", "primary_area": "", "supplementary_material": "", "author": "Hakyung Sung;Gyu-Ho Shin", "authorids": "~Hakyung_Sung1;~Gyu-Ho_Shin1", "gender": ";M", "homepage": "https://hksung.github.io;https://gyuhoshin.weebly.com/", "dblp": ";", "google_scholar": "o-mWVF4AAAAJ;", "or_profile": "~Hakyung_Sung1;~Gyu-Ho_Shin1", "aff": "University of Oregon;Palack\u00fd University Olomouc", "aff_domain": "uoregon.edu;upol.cz", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nsung2023diversifying,\ntitle={Diversifying language models for lesser-studied languages and language-usage contexts: A case of second language Korean},\nauthor={Hakyung Sung and Gyu-Ho Shin},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=IRUGqnZQwt}\n}", "github": "", "project": "", "reviewers": "i3cX;qhvV;prvE", "site": "https://openreview.net/forum?id=IRUGqnZQwt", "pdf_size": 0, "rating": "2;2;2", "confidence": "2;4;4", "excitement": "3;3;3", "reproducibility": "5;5;3", "correctness": "4;3;2", "rating_avg": 2.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-8157-7148", "linkedin": ";", "aff_unique_index": "0;1", "aff_unique_norm": "University of Oregon;Palack\u00fd University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uoregon.edu;https://www.upol.cz", "aff_unique_abbr": "UO;UP", "aff_campus_unique_index": "1", "aff_campus_unique": ";Olomouc", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Czech Republic" }, { "id": "IT2bT8UigY", "title": "Enhancing Accessible Communication: from European Portuguese to Portuguese Sign Language", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Portuguese Sign Language (LGP) is the official language in deaf education in Portugal. Current approaches in developing a translation system between European Portuguese and LGP rely on hand-crafted rules. In this paper, we present a fully automatic corpora-driven rule-based machine translation system between European Portuguese and LGP glosses, and also two neural machine translation models. We also contribute with the LGP-5-Domain corpus, composed of five different text domains, built with the help of our rule-based system, and used to train the neural models. In addition, we provide a gold collection, annotated by LGP experts, that can be used for future evaluations. Compared with the only similar available translation system, PE2LGP, results are always improved with the new rule-based model, which competes for the highest scores with one of the neural models.", "keywords": "Portuguese Sign Language;Machine Translation;Computational Linguistics;Natural Language Processing;Deep Learning", "primary_area": "", "supplementary_material": "", "author": "Catarina Sousa;Luisa Coheur;Mara Moita", "authorids": "~Catarina_Sousa1;~Luisa_Coheur1;~Mara_Moita1", "gender": "F;;F", "homepage": "https://github.com/catasofia;;", "dblp": ";;", "google_scholar": ";;", "or_profile": "~Catarina_Sousa1;~Luisa_Coheur1;~Mara_Moita1", "aff": "Instituto Superior T\u00e9cnico;;Universidade Nova de Lisboa", "aff_domain": "tecnico.ulisboa.pt;;unl.pt", "position": "MS student;;Researcher", "bibtex": "@inproceedings{\nsousa2023enhancing,\ntitle={Enhancing Accessible Communication: from European Portuguese to Portuguese Sign Language},\nauthor={Catarina Sousa and Luisa Coheur and Mara Moita},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=IT2bT8UigY}\n}", "github": "", "project": "", "reviewers": "9TWC;Ue8D;LV1i", "site": "https://openreview.net/forum?id=IT2bT8UigY", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "3;3;4", "reproducibility": "2;3;3", "correctness": "3;2;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 2.6666666666666665, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-3767-4028", "linkedin": ";;", "aff_unique_index": "0;1", "aff_unique_norm": "Instituto Superior T\u00e9cnico;Universidade Nova de Lisboa", "aff_unique_dep": ";", "aff_unique_url": "https://www.ist.utl.pt;https://www.unl.pt", "aff_unique_abbr": "IST;UNL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Portugal" }, { "id": "IUKw6SyCxv", "title": "DiffS2UT: A Semantic Preserving Diffusion Model for Textless Direct Speech-to-Speech Translation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "While Diffusion Generative Models have achieved great success on image generation tasks, how to efficiently and effectively incorporate them into speech generation especially translation tasks remains a non-trivial problem. Specifically, due to the low information density of speech data, the transformed discrete speech unit sequence is much longer than the corresponding text transcription, posing significant challenges to existing auto-regressive models. Furthermore, it is not optimal to brutally apply discrete diffusion on the speech unit sequence while disregarding the continuous space structure, which will degrade the generation performance significantly. In this paper, we propose a novel diffusion model by applying the diffusion forward process in the continuous speech representation space, while employing the diffusion backward process in the discrete speech unit space. In this way, we preserve the semantic structure of the continuous speech representation space in the diffusion process and integrate the continuous and discrete diffusion models. We conduct extensive experiments on the textless direct speech-to-speech translation task, where the proposed method achieves comparable results to the computationally intensive auto-regressive baselines (500 steps on average) with significantly fewer decoding steps (50 steps).", "keywords": "Speech to Speech Translation;Diffusion Models", "primary_area": "", "supplementary_material": "", "author": "Yongxin Zhu;Zhujin Gao;Xinyuan Zhou;Ye Zhongyi;Linli Xu", "authorids": "~Yongxin_Zhu1;~Zhujin_Gao1;~Xinyuan_Zhou1;~Ye_Zhongyi1;~Linli_Xu1", "gender": "M;;M;;", "homepage": "https://youngsheen.github.io;;https://www.researchgate.net/profile/Xinyuan-Zhou-8;;", "dblp": "27/3343-3;336/4920;210/1336;;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;;;", "or_profile": "~Yongxin_Zhu1;~Zhujin_Gao1;~Xinyuan_Zhou1;~Ye_Zhongyi1;~Linli_Xu1", "aff": "University of Science and Technology of China;University of Science and Technology of China;IFlyTek;;", "aff_domain": "ustc.edu;ustc.edu.cn;iflytek.com;;", "position": "PhD student;MS student;Researcher;;", "bibtex": "@inproceedings{\nzhu2023diffsut,\ntitle={DiffS2{UT}: A Semantic Preserving Diffusion Model for Textless Direct Speech-to-Speech Translation},\nauthor={Yongxin Zhu and Zhujin Gao and Xinyuan Zhou and Ye Zhongyi and Linli Xu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=IUKw6SyCxv}\n}", "github": "", "project": "", "reviewers": "2aZv;787n;n2co;1oEp", "site": "https://openreview.net/forum?id=IUKw6SyCxv", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;4;2;4", "excitement": "4;2;4;4", "reproducibility": "4;3;4;4", "correctness": "4;2;4;4", "rating_avg": 4.0, "confidence_avg": 3.25, "excitement_avg": 3.5, "reproducibility_avg": 3.75, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;xinyuan-zhou-8a8a13191;;", "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Science and Technology of China;iFLYTEK", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.iflytek.com", "aff_unique_abbr": "USTC;IFlyTek", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "IXuCeFnnxU", "title": "Noisy Pair Corrector for Dense Retrieval", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Most dense retrieval models contain an implicit assumption: the training query-document pairs are exactly matched. Since it is expensive to annotate the corpus manually, training pairs in real-world applications are usually collected automatically, which inevitably introduces mismatched-pair noise. In this paper, we explore an interesting and challenging problem in dense retrieval, how to train an effective model with mismatched-pair noise. To solve this problem, we propose a novel approach called Noisy Pair Corrector (NPC), which consists of a detection module and a correction module. The detection module estimates noise pairs by calculating the perplexity between annotated positive and easy negative documents. The correction module utilizes an exponential moving average (EMA) model to provide a soft supervised signal, aiding in mitigating the effects of noise. We conduct experiments on text-retrieval benchmarks Natural Question and TriviaQA, code-search benchmarks StaQC and SO-DS. Experimental results show that NPC achieves excellent performance in handling both synthetic and realistic noise.", "keywords": "Dense Retrieval;Noisy Pair", "primary_area": "", "supplementary_material": "", "author": "Hang Zhang;Yeyun Gong;Xingwei He;Dayiheng Liu;Daya Guo;Jiancheng Lv;Jian Guo", "authorids": "~Hang_Zhang6;~Yeyun_Gong2;~Xingwei_He1;~Dayiheng_Liu1;~Daya_Guo2;~Jiancheng_Lv2;~Jian_Guo2", "gender": "M;M;M;M;M;M;M", "homepage": ";;https://scholar.google.com/citations?user=p1a5WXIAAAAJ&hl=zh-CN;https://dayihengliu.github.io/;https://guoday.github.io/;https://cs.scu.edu.cn/info/1303/13767.htm;https://idea.edu.cn/person/guojian/", "dblp": "49/6156-29;06/10400.html;18/8988-3;https://dblp.uni-trier.de/pers/hd/l/Liu:Dayiheng;225/5494.html;;96/2596-2", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;piUkwMYAAAAJ;p1a5WXIAAAAJ;pPLQrX4AAAAJ;gCG4cPYAAAAJ;https://scholar.google.com/citations?hl=zh-CN;", "or_profile": "~Hang_Zhang6;~Yeyun_Gong2;~Xingwei_He1;~Dayiheng_Liu1;~Daya_Guo2;~Jiancheng_Lv2;~Jian_Guo2", "aff": "Sichuan University;Microsoft;The University of Hong Kong;Alibaba Group;SUN YAT-SEN UNIVERSITY, Tsinghua University;Sichuan University;International Digital Economy Academy, International Digital Economy Academy", "aff_domain": "scu.edu.cn;microsoft.com;hku.hk;alibaba-inc.com;sysu.edu.cn;scu.edu.cn;idea.edu.cn", "position": "PhD student;Researcher;Postdoc;Researcher;PhD student;Full Professor;Researcher", "bibtex": "@inproceedings{\nzhang2023noisy,\ntitle={Noisy Pair Corrector for Dense Retrieval},\nauthor={Hang Zhang and Yeyun Gong and Xingwei He and Dayiheng Liu and Daya Guo and Jiancheng Lv and Jian Guo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=IXuCeFnnxU}\n}", "github": "", "project": "", "reviewers": "YGVa;7QqM;8vRk", "site": "https://openreview.net/forum?id=IXuCeFnnxU", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;4;3", "excitement": "4;4;3", "reproducibility": "4;2;4", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-8755-8941;;;", "linkedin": ";;;;;;", "aff_unique_index": "0;1;2;3;4;0;5", "aff_unique_norm": "Sichuan University;Microsoft;University of Hong Kong;Alibaba Group;Sun Yat-sen University;International Digital Economy Academy", "aff_unique_dep": ";Microsoft Corporation;;;;", "aff_unique_url": "https://www.scu.edu.cn;https://www.microsoft.com;https://www.hku.hk;https://www.alibaba.com;http://www.sysu.edu.cn;", "aff_unique_abbr": "SCU;Microsoft;HKU;Alibaba;SYSU;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "China;United States;" }, { "id": "IZjyMygbw4", "title": "Eyes Show the Way: Modelling Gaze Behaviour for Hallucination Detection", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Detecting hallucinations in natural language processing (NLP) is a critical undertaking that demands a deep understanding of both the semantic and pragmatic aspects of languages. Cognitive approaches that leverage users\u2019 behavioural signals, such as gaze, have demonstrated effectiveness in addressing NLP tasks with similar linguistic complexities. However, their potential in the context of hallucination detection remains largely unexplored. In this paper, we propose a novel cognitive approach for hallucination detection that leverages gaze signals from humans. We first collect and introduce an eye tracking corpus (IITB-HGC: IITB-Hallucination Gaze corpus) consisting of 500 instances, annotated by five annotators for hallucination detection. Our analysis reveals that humans selectively attend to relevant parts of the text based on distributional similarity, similar to the attention bias phenomenon in psychology. We identify two attention strategies employed by humans: global attention, which focuses on the most informative sentence, and local attention, which focuses on important words within a sentence. Leveraging these insights, we propose a novel cognitive framework for hallucination detection that incorporates these attention biases. Experimental evaluations on the FactCC dataset demonstrate the efficacy of our approach, obtaining a balanced accuracy of 87.1%. Our study highlights the potential of gaze-based approaches in addressing the task of hallucination detection and sheds light on the cognitive processes employed by humans in identifying inconsistencies.", "keywords": "Cognition;Hallucination;NLG;Gaze;Human Attention", "primary_area": "", "supplementary_material": "", "author": "Kishan Maharaj;Ashita Saxena;Raja Kumar;Abhijit Mishra;Pushpak Bhattacharyya", "authorids": "~Kishan_Maharaj1;~Ashita_Saxena1;~Raja_Kumar3;~Abhijit_Mishra2;~Pushpak_Bhattacharyya1", "gender": "M;F;M;M;M", "homepage": "https://www.cse.iitb.ac.in/~kishan/;;https://abhijitmishra.github.io;https://www.cse.iitb.ac.in/~pb/;https://raja-7-c.github.io/", "dblp": ";362/8674;;p/PushpakBhattacharyya;248/0662", "google_scholar": "bzBkpcQAAAAJ;rA-qKjAAAAAJ;bcbJ0jQAAAAJ;https://scholar.google.com.tw/citations?user=vvg-pAkAAAAJ;KQuqOvMAAAAJ", "or_profile": "~Kishan_Maharaj1;~Ashita_Saxena1;~Abhijit_Mishra2;~Pushpak_Bhattacharyya1;~RAJA_KUMAR1", "aff": "Indian Institute of Technology, Bombay;Indian Institute of Technology Bombay, Indian Institute of Technology, Bombay;Apple;Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology;Indian Institute of Technology Bombay", "aff_domain": "iitb.ac.in;cse.iitb.ac.in;apple.com;iitb.ac.in;iitb.ac.in", "position": "MS student;MS student;Researcher;Full Professor;Undergrad student", "bibtex": "@inproceedings{\nmaharaj2023eyes,\ntitle={Eyes Show the Way: Modelling Gaze Behaviour for Hallucination Detection},\nauthor={Kishan Maharaj and Ashita Saxena and Raja Kumar and Abhijit Mishra and Pushpak Bhattacharyya},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=IZjyMygbw4}\n}", "github": "", "project": "", "reviewers": "WXZX;Qrwq;EMNe", "site": "https://openreview.net/forum?id=IZjyMygbw4", "pdf_size": 0, "rating": "1;1;1", "confidence": "3;3;2", "excitement": "3;4;3", "reproducibility": "4;3;3", "correctness": "3;3;3", "rating_avg": 1.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "kishan-maharaj/;;;pushpakbh/?originalSubdomain=in;raja-kumar-156a12172/", "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Indian Institute of Technology Bombay;Apple;Indian Institute of Technology, Bombay", "aff_unique_dep": ";Apple Inc.;", "aff_unique_url": "https://www.iitb.ac.in;https://www.apple.com;https://www.iitb.ac.in", "aff_unique_abbr": "IIT Bombay;Apple;IIT Bombay", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Bombay;", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "India;United States" }, { "id": "IZzZnp7IUs", "title": "Crystal: Introspective Reasoners Reinforced with Self-Feedback", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Extensive work has shown that the performance and interpretability of commonsense reasoning can be improved via knowledge-augmented reasoning methods, where the knowledge that underpins the reasoning process is explicitly verbalized and utilized. However, existing implementations, including \"chain-of-thought\" and its variants, fall short in capturing the *introspective* nature of knowledge required in commonsense reasoning, and in accounting for the mutual adaptation between the generation and utilization of knowledge. We propose a novel method to develop an introspective commonsense reasoner, **Crystal**. To tackle commonsense problems, it first introspects for knowledge statements related to the given question, and subsequently makes an informed prediction that is grounded in the previously introspected knowledge. The knowledge introspection and knowledge-grounded reasoning modes of the model are tuned via reinforcement learning to mutually adapt, where the reward derives from the feedback given by the model itself. Experiments show that Crystal significantly outperforms both the standard supervised finetuning and chain-of-thought distilled methods, and enhances the transparency of the commonsense reasoning process. Our work ultimately validates the feasibility and potential of reinforcing a neural model with self-feedback.", "keywords": "introspective reasoning;commonsense reasoning;question answering;reinforcement learning", "primary_area": "", "supplementary_material": "", "author": "Jiacheng Liu;Ramakanth Pasunuru;Hannaneh Hajishirzi;Yejin Choi;Asli Celikyilmaz", "authorids": "~Jiacheng_Liu2;~Ramakanth_Pasunuru2;~Hannaneh_Hajishirzi1;~Yejin_Choi1;~Asli_Celikyilmaz1", "gender": "M;M;F;F;F", "homepage": "https://github.com/liujch1998;http://rama-kanth.com;https://homes.cs.washington.edu/~hannaneh/;https://yejinc.github.io/;https://asli.us", "dblp": "289/6273;199/1748;52/1296;89/579-1;15/3724", "google_scholar": "GJfoBZAAAAAJ;https://scholar.google.com/citations?hl=en;LOV6_WIAAAAJ;vhP-tlcAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Jiacheng_Liu2;~Ramakanth_Pasunuru2;~Hannaneh_Hajishirzi1;~Yejin_Choi1;~Asli_Celikyilmaz1", "aff": "Meta Facebook;Meta Platforms Inc;University of Washington;Department of Computer Science, University of Washington;FAIR ", "aff_domain": "meta.com;meta.com;uw.edu;cs.washington.edu;meta.com", "position": "Intern;Researcher;Associate Professor;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nliu2023crystal,\ntitle={Crystal: Introspective Reasoners Reinforced with Self-Feedback},\nauthor={Jiacheng Liu and Ramakanth Pasunuru and Hannaneh Hajishirzi and Yejin Choi and Asli Celikyilmaz},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=IZzZnp7IUs}\n}", "github": "", "project": "", "reviewers": "s1EV;Mz6D;PuFC", "site": "https://openreview.net/forum?id=IZzZnp7IUs", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;3;4", "reproducibility": "4;4;4", "correctness": "3;4;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-3308-2869;;;;", "linkedin": "liujch1998/;;;;aslicelikyilmaz/", "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "Meta;University of Washington", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.washington.edu", "aff_unique_abbr": "Meta;UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "IaBBd8Fod8", "title": "Breaking through Deterministic Barriers: Randomized Pruning Mask Generation and Selection", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "It is widely acknowledged that large and sparse models have higher accuracy than small and dense models under the same model size constraints. This motivates us to train a large model and then remove its redundant neurons or weights by pruning. Most existing works pruned the networks in a deterministic way, the performance of which solely depends on a single pruning criterion and thus lacks variety. Instead, in this paper, we propose a model pruning strategy that first generates several pruning masks in a designed random way. Subsequently, along with an effective mask-selection rule, the optimal mask is chosen from the pool of mask candidates. To further enhance efficiency, we introduce an early mask evaluation strategy, mitigating the overhead associated with training multiple masks. Our extensive experiments demonstrate that this approach achieves state-of-the-art performance across eight datasets from GLUE, particularly excelling at high levels of sparsity.", "keywords": "Pruning;Random;Language Models", "primary_area": "", "supplementary_material": "", "author": "Jianwei Li;Weizhi Gao;Qi Lei;Dongkuan Xu", "authorids": "~Jianwei_Li8;~Weizhi_Gao1;~Qi_Lei1;~Dongkuan_Xu2", "gender": "M;M;F;M", "homepage": "https://jianwei.gatsbyjs.io/;https://weizhigao.github.io/;https://cecilialeiqi.github.io/;https://dongkuanx27.github.io/", "dblp": ";335/0844;;142/8139", "google_scholar": ";8DMz6dUAAAAJ;kGOgaowAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Jianwei_Li8;~Weizhi_Gao1;~Qi_Lei1;~Dongkuan_Xu2", "aff": "North Carolina State University;University of Chinese Academy of Sciences;New York University;North Carolina State University", "aff_domain": "ncsu.edu;ucas.edu.cn;nyu.edu;ncsu.edu", "position": "PhD student;MS student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nli2023breaking,\ntitle={Breaking through Deterministic Barriers: Randomized Pruning Mask Generation and Selection},\nauthor={Jianwei Li and Weizhi Gao and Qi Lei and Dongkuan Xu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=IaBBd8Fod8}\n}", "github": "", "project": "", "reviewers": "gFqB;xLoZ;cTJA", "site": "https://openreview.net/forum?id=IaBBd8Fod8", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "3;3;3", "reproducibility": "3;4;3", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-5372-223X;;;0000-0002-1456-9658", "linkedin": "fourteenljw/;weizhi-gao-888052254/;;dongkuan-dk-xu-%F0%9F%87%BA%F0%9F%87%A6-05038087/", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "North Carolina State University;University of Chinese Academy of Sciences;New York University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ncsu.edu;http://www.ucas.ac.cn;https://www.nyu.edu", "aff_unique_abbr": "NCSU;UCAS;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;China" }, { "id": "IadylMsom5", "title": "Beyond Detection: A Defend-and-Summarize Strategy for Robust and Interpretable Rumor Analysis on Social Media", "track": "main", "status": "Long Main", "tldr": "", "abstract": "As the impact of social media gradually escalates, people are more likely to be exposed to indistinguishable fake news. Therefore, numerous studies have attempted to detect rumors on social media by analyzing the textual content and propagation paths. However, fewer works on rumor detection tasks consider the malicious attacks commonly observed at response level. Moreover, existing detection models have poor interpretability. To address these issues, we propose a novel framework named **D**efend-**A**nd-**S**ummarize (DAS) based on the concept that responses sharing similar opinions should exhibit similar features. Specifically, DAS filters out the attack responses and summarizes the responsive posts of each conversation thread in both extractive and abstractive ways to provide multi-perspective prediction explanations. Furthermore, we enhance our detection architecture with the transformer and Bi-directional Graph Convolutional Networks. Experiments on three public datasets, *i.e.*, RumorEval2019, Twitter15, and Twitter16, demonstrate that our DAS defends against malicious attacks and provides prediction explanations, and the proposed detection model achieves state-of-the-art.", "keywords": "rumor detection;social network analysis;adversarial attack;interpretability;summarization;unsupervised learning;self-supervised learning", "primary_area": "", "supplementary_material": "", "author": "Yi-Ting Chang;Yun-Zhu Song;Yi-Syuan Chen;Hong-Han Shuai", "authorids": "~Yi-Ting_Chang1;~Yun-Zhu_Song1;~Yi-Syuan_Chen1;~Hong-Han_Shuai1", "gender": "M;F;M;M", "homepage": ";;;http://basiclab.lab.nycu.edu.tw/", "dblp": "30/437;258/0863;286/0820.html;86/10294", "google_scholar": ";https://scholar.google.com.tw/citations?user=WWXqc2YAAAAJ;https://scholar.google.com.tw/citations?hl=zh-TW;https://scholar.google.com.tw/citations?user=MSWL2noAAAAJ", "or_profile": "~Yi-Ting_Chang1;~Yun-Zhu_Song1;~Yi-Syuan_Chen1;~Hong-Han_Shuai1", "aff": "National Yang Ming Chiao Tung University;National Yang Ming Chiao Tung University;National Yang Ming Chiao Tung University;National Yang Ming Chiao Tung University", "aff_domain": "nycu.edu.tw;nycu.edu.tw;nycu.edu.tw;nycu.edu.tw", "position": "MS student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nchang2023beyond,\ntitle={Beyond Detection: A Defend-and-Summarize Strategy for Robust and Interpretable Rumor Analysis on Social Media},\nauthor={Yi-Ting Chang and Yun-Zhu Song and Yi-Syuan Chen and Hong-Han Shuai},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=IadylMsom5}\n}", "github": "", "project": "", "reviewers": "v8Le;PMQb;4y82", "site": "https://openreview.net/forum?id=IadylMsom5", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;3;3", "reproducibility": "4;4;3", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-2216-077X", "linkedin": "joshchang0111/;yun-zhu-song-8a3b221a1/;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "National Yang Ming Chiao Tung University", "aff_unique_dep": "", "aff_unique_url": "https://www.nycu.edu.tw", "aff_unique_abbr": "NYCU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "IdSrFSqhHl", "title": "Self-Polish: Enhance Reasoning in Large Language Models via Problem Refinement", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "To enhance the multi-step reasoning capabilities of large language models, researchers have extensively explored prompting methods, notably the Chain-of-Thought (CoT) method which explicitly elicits human-like rationales. However, they have inadvertently overlooked the potential of enhancing model reasoning performance by formulating higher-quality problems \\footnote{A reasoning problem often consists of two parts: the context and the final question \\cite{DBLP:journals/corr/abs-2205-09712}.}. In this work, we start from the problem side and propose Self-Polish (SP), a novel method that facilitates the model's reasoning by guiding it to progressively refine the given problems to be more comprehensible and solvable. We also explore several automatic prompting varients and propose the Self-Polish prompt bank for the community. SP is orthogonal to all other prompting methods of answer/reasoning side like CoT, allowing for seamless integration with state-of-the-art techniques for further improvement. Thorough experiments show that the proposed method attains notable and consistent effectiveness on five reasoning benchmarks across different models. Furthermore, our method also showcases impressive performance on robustness evaluation. Codes and prompts are available at https://github.com/WooooDyy/Self-Polish.", "keywords": "Reasoning;Large Language Models;Mathematical Problems", "primary_area": "", "supplementary_material": "", "author": "Zhiheng Xi;Senjie Jin;Yuhao Zhou;Rui Zheng;Songyang Gao;Jia Liu;Tao Gui;Qi Zhang;Xuanjing Huang", "authorids": "~Zhiheng_Xi1;~Senjie_Jin1;~Yuhao_Zhou3;~Rui_Zheng1;~Songyang_Gao1;~Jia_Liu11;~Tao_Gui1;~Qi_Zhang8;~Xuanjing_Huang1", "gender": ";M;M;M;M;M;M;M;F", "homepage": "https://woooodyy.github.io/;;https://ciaran.top;https://github.com/ruizheng20;;https://software.nju.edu.cn/liujia/index.html;;http://qizhang.info;https://xuanjing-huang.github.io/", "dblp": "333/4268;348/5674.html;;;314/6067;49/1245-15;135/6973;52/323-1;05/6735-1", "google_scholar": "https://scholar.google.com.hk/citations?user=zSVLkqAAAAAJ;https://scholar.google.com.hk/citations?user=kMP_SiUAAAAJ;qHHExWgAAAAJ;https://scholar.google.com.hk/citations?user=7Z0V_SoAAAAJ;O42mLrsAAAAJ;;;XfqR3yYAAAAJ;RGsMgZA4H78C", "or_profile": "~Zhiheng_Xi1;~Senjie_Jin1;~Yuhao_Zhou3;~Rui_Zheng1;~Songyang_Gao1;~Jia_Liu11;~Tao_Gui1;~Qi_Zhang8;~Xuanjing_Huang1", "aff": "Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Nanjing University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;nju.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "PhD student;MS student;MS student;PhD student;MS student;Associate Professor;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nxi2023selfpolish,\ntitle={Self-Polish: Enhance Reasoning in Large Language Models via Problem Refinement},\nauthor={Zhiheng Xi and Senjie Jin and Yuhao Zhou and Rui Zheng and Songyang Gao and Jia Liu and Tao Gui and Qi Zhang and Xuanjing Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=IdSrFSqhHl}\n}", "github": "", "project": "", "reviewers": "Y5Nk;Wq3x;8rFo", "site": "https://openreview.net/forum?id=IdSrFSqhHl", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "2;3;3", "reproducibility": "4;3;5", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0001-8368-4898;;;0000-0001-9197-9426", "linkedin": ";;;;;;;;", "aff_unique_index": "0;0;0;0;0;1;0;0;0", "aff_unique_norm": "Fudan University;Nanjing University", "aff_unique_dep": ";", "aff_unique_url": "https://www.fudan.edu.cn;https://www.nju.edu.cn", "aff_unique_abbr": "Fudan;Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "IdXpzsTWRs", "title": "StoryAnalogy: Deriving Story-level Analogies from Large Language Models to Unlock Analogical Understanding", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Analogy-making between narratives is crucial for human reasoning.\nIn this paper, we evaluate the ability to identify and generate analogies by constructing a first-of-its-kind large-scale story-level analogy corpus, \\textsc{StoryAnalogy}, which contains 24K story pairs from diverse domains with human annotations on two similarities from the extended Structure-Mapping Theory.\nWe design a set of tests on \\textsc{StoryAnalogy}, presenting the first evaluation of story-level analogy identification and generation.\nInterestingly, we find that the analogy identification tasks are incredibly difficult not only for sentence embedding models but also for the recent large language models (LLMs) such as ChatGPT and LLaMa.\nChatGPT, for example, only achieved around 30\\% accuracy in multiple-choice questions (compared to over 85\\% accuracy for humans). \nFurthermore, we observe that the data in \\textsc{StoryAnalogy} can improve the quality of analogy generation in LLMs, where a fine-tuned FlanT5-xxl model achieves comparable performance to zero-shot ChatGPT.", "keywords": "Analogy;Semantic similarity", "primary_area": "", "supplementary_material": "", "author": "Cheng Jiayang;Lin Qiu;Tsz Ho CHAN;Tianqing Fang;Weiqi Wang;Chunkit Chan;Dongyu Ru;Qipeng Guo;Hongming Zhang;Yangqiu Song;Yue Zhang;Zheng Zhang", "authorids": "~Cheng_Jiayang1;~Lin_Qiu2;~Tsz_Ho_CHAN2;~Tianqing_Fang1;~Weiqi_Wang1;~Chunkit_Chan1;~Dongyu_Ru1;~Qipeng_Guo1;~Hongming_Zhang2;~Yangqiu_Song1;~Yue_Zhang7;~Zheng_Zhang1", "gender": ";M;M;M;M;;;M;M;M;M;M", "homepage": ";;;http://fangtq.com/;https://mighty-weaver.github.io/;;;;http://www.cse.ust.hk/~hzhangal/;https://www.cse.ust.hk/~yqsong/;http://frcchang.github.io;https://shanghai.nyu.edu/academics/faculty/directory/zheng-zhang", "dblp": ";;;283/4921;51/5775-1;;;172/1046;;86/2159;47/722-4;", "google_scholar": ";U4GJuPIAAAAJ;DqYZj7sAAAAJ;https://scholar.google.com.hk/citations?user=Tb3rc34AAAAJ;https://scholar.google.com/citations?hl=zh-CN;;;k3mPGKgAAAAJ;i5ETuuQAAAAJ;MdQZ-q8AAAAJ;;https://scholar.google.com.hk/citations?user=k0KiE4wAAAAJ", "or_profile": "~Cheng_Jiayang1;~Lin_Qiu2;~Tsz_Ho_CHAN2;~Tianqing_Fang1;~Weiqi_Wang1;~Chunkit_Chan1;~Dongyu_Ru1;~Qipeng_Guo1;~Hongming_Zhang2;~Yangqiu_Song1;~Yue_Zhang7;~Zheng_Zhang1", "aff": ";Amazon;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;;;Amazon;Tencent AI Lab Seattle;Hong Kong University of Science and Technology;Westlake University;Amazon", "aff_domain": ";amazon.com;hkust.edu;ust.hk;ust.hk;;;amazon.com;tencent.com;ust.hk;westlake.edu.cn;amazon.com", "position": ";Researcher;MS student;PhD student;PhD student;;;Researcher;Researcher;Associate Professor;Full Professor;Senior Principal Scientist", "bibtex": "@inproceedings{\njiayang2023storyanalogy,\ntitle={StoryAnalogy: Deriving Story-level Analogies from Large Language Models to Unlock Analogical Understanding},\nauthor={Cheng Jiayang and Lin Qiu and Tsz Ho CHAN and Tianqing Fang and Weiqi Wang and Chunkit Chan and Dongyu Ru and Qipeng Guo and Hongming Zhang and Yangqiu Song and Yue Zhang and Zheng Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=IdXpzsTWRs}\n}", "github": "", "project": "", "reviewers": "2Qdd;WoNd;yNU4;8L5d", "site": "https://openreview.net/forum?id=IdXpzsTWRs", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;4;4;4", "excitement": "4;4;3;3", "reproducibility": "4;5;2;3", "correctness": "4;4;3;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.5, "reproducibility_avg": 3.5, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 12, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-1617-9805;;;;;0000-0002-7818-6090;0000-0002-5214-2268;", "linkedin": ";;;;weiqi-wang-a49b5019a/;;;;;yqsong/;;", "aff_unique_index": "0;1;1;1;0;2;1;3;0", "aff_unique_norm": "Amazon;Hong Kong University of Science and Technology;Tencent;Westlake University", "aff_unique_dep": "Amazon.com, Inc.;;Tencent AI Lab;", "aff_unique_url": "https://www.amazon.com;https://www.ust.hk;https://ai.tencent.com;https://www.westlake.edu.cn", "aff_unique_abbr": "Amazon;HKUST;Tencent AI Lab;WU", "aff_campus_unique_index": "1;1;1;2;1", "aff_campus_unique": ";Hong Kong SAR;Seattle", "aff_country_unique_index": "0;1;1;1;0;0;1;1;0", "aff_country_unique": "United States;China" }, { "id": "Ie040B4nFm", "title": "Integrating Language Models into Direct Speech Translation: An Inference-Time Solution to Control Gender Inflection", "track": "main", "status": "Short Main", "tldr": "", "abstract": "When translating words referring to the speaker, speech translation (ST) systems should not resort to default masculine generics nor rely on potentially misleading vocal traits. Rather, they should assign gender according to the speakers' preference. The existing solutions to do so, though effective, are hardly feasible in practice as they involve dedicated model re-training on gender-labeled ST data. To overcome these limitations, we propose the first inference-time solution to control speaker-related gender inflections in ST. Our approach partially replaces the (biased) internal language model (LM) implicitly learned by the ST decoder with gender-specific external LMs. Experiments on en\u2192es/fr/it show that our solution outperforms the base models and the best training-time mitigation strategy by up to 31.0 and 1.6 points in gender accuracy, respectively, for feminine forms. The gains are even larger (up to 32.0 and 3.4) in the challenging condition where speakers' vocal traits conflict with their gender.", "keywords": "Speech translation;Gender Bias;Language Model", "primary_area": "", "supplementary_material": "", "author": "Dennis Fucci;Marco Gaido;Sara Papi;Mauro Cettolo;Matteo Negri;Luisa Bentivogli", "authorids": "~Dennis_Fucci1;~Marco_Gaido1;~Sara_Papi1;~Mauro_Cettolo1;~Matteo_Negri1;~Luisa_Bentivogli1", "gender": "M;M;F;M;M;F", "homepage": ";;https://sarapapi.github.io/;https://mt.fbk.eu/author/cettolo/;https://ict.fbk.eu/people/detail/matteo-negri/;https://mt.fbk.eu/author/bentivogli/", "dblp": "319/9730;266/8121;277/3949;67/5075;95/3678;50/1445", "google_scholar": "https://scholar.google.com/citations?hl=en;Ojc1LRYAAAAJ;E8_v8NUAAAAJ;5uWajM4AAAAJ;NTTQbJsAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Dennis_Fucci1;~Marco_Gaido1;~Sara_Papi1;~Mauro_Cettolo1;~Matteo_Negri1;~Luisa_Bentivogli1", "aff": "University of Trento;Fondazione Bruno Kessler;Microsoft;Fondazione Bruno Kessler;Fondazione Bruno Kessler;Fondazione Bruno Kessler", "aff_domain": "unitn.it;fbk.eu;microsoft.com;fbk.eu;fbk.eu;fbk.eu", "position": "PhD student;PhD student;Intern;Researcher;Senior researcher;Researcher", "bibtex": "@inproceedings{\nfucci2023integrating,\ntitle={Integrating Language Models into Direct Speech Translation: An Inference-Time Solution to Control Gender Inflection},\nauthor={Dennis Fucci and Marco Gaido and Sara Papi and Mauro Cettolo and Matteo Negri and Luisa Bentivogli},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Ie040B4nFm}\n}", "github": "", "project": "", "reviewers": "maVW;jwAu;6X26", "site": "https://openreview.net/forum?id=Ie040B4nFm", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;2;4", "excitement": "3;4;4", "reproducibility": "4;3;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-0940-5595;0000-0003-4217-1396;0000-0002-4494-8886;0000-0001-8388-497X;0000-0002-8811-4330;0000-0001-7480-2231", "linkedin": ";;sara-papi;mauro-cettolo-3928aa10/;negrimatteo/;luisa-bentivogli-89577587/", "aff_unique_index": "0;1;2;1;1;1", "aff_unique_norm": "University of Trento;Fondazione Bruno Kessler;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://www.unitn.it;https://www.fbk.eu;https://www.microsoft.com", "aff_unique_abbr": "UniTN;FBK;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "Italy;United States" }, { "id": "IgPf3oLp6B", "title": "Query2Triple: Unified Query Encoding for Answering Diverse Complex Queries over Knowledge Graphs", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Complex Query Answering (CQA) is a challenge task of Knowledge Graph (KG). Due to the incompleteness of KGs, query embedding (QE) methods have been proposed to encode queries and entities into the same embedding space, and treat logical operators as neural set operators to obtain answers.\nHowever, these methods train KG embeddings and neural set operators concurrently on both simple (one-hop) and complex (multi-hop and logical) queries, which causes performance degradation on simple queries and low training efficiency.\nIn this paper, we propose Query to Triple (Q2T), a novel approach that decouples the training for simple and complex queries.\nQ2T divides the training into two stages:\n(1) Pre-training the neural link predictor on simple queries to predict tail entities based on the head entity and relation.\n(2) Training the query encoder on complex queries to encode diverse complex queries into a unified triple form that can be efficiently solved by the pretrained link predictor. \nOur proposed Q2T is not only efficient to train, but also modular, thus easily adaptable to various neural link predictors that have been studied well.\nExtensive experiments demonstrate that, even without explicit modeling for neural set operators, Q2T still achieves state-of-the-art performance on diverse complex queries over three public benchmarks.", "keywords": "Knowledge Graph;Complex Query Answering;CQA", "primary_area": "", "supplementary_material": "", "author": "Yao Xu;Shizhu He;Cunguang Wang;Li Cai;Kang Liu;Jun Zhao", "authorids": "~Yao_Xu3;~Shizhu_He2;~Cunguang_Wang1;~Li_Cai2;~Kang_Liu1;~Jun_Zhao4", "gender": "M;M;;M;M;M", "homepage": "https://github.com/YaooXu/;https://heshizhu.github.io/;;;http://www.nlpr.ia.ac.cn/cip/~liukang/index.html;http://nlpr-web.ia.ac.cn/cip/english/~junzhao/index.html", "dblp": ";136/8650;;20/2971;42/4903.html;https://dblp.uni-trier.de/pid/47/2026-1.html", "google_scholar": ";zBPIt3QAAAAJ;CxiCFsUAAAAJ;;DtZCfl0AAAAJ;https://scholar.google.com.hk/citations?user=HljRttwAAAAJ", "or_profile": "~Yao_Xu3;~Shizhu_He2;~Cunguang_Wang1;~Li_Cai2;~Kang_Liu1;~Jun_Zhao4", "aff": "Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;;;Institute of Automation, Chinese Academy of Sciences;Institute of automation, Chinese academy of science", "aff_domain": "ia.ac.cn;ia.ac.cn;;;ia.ac.cn;nlpr.ia.ac.cn", "position": "PhD student;Associate Researcher;;;Professor;Full Professor", "bibtex": "@inproceedings{\nxu2023querytriple,\ntitle={Query2Triple: Unified Query Encoding for Answering Diverse Complex Queries over Knowledge Graphs},\nauthor={Yao Xu and Shizhu He and Cunguang Wang and Li Cai and Kang Liu and Jun Zhao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=IgPf3oLp6B}\n}", "github": "", "project": "", "reviewers": "EuCv;hmQo;Tnh9", "site": "https://openreview.net/forum?id=IgPf3oLp6B", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;5;5", "excitement": "4;4;3", "reproducibility": "5;4;2", "correctness": "4;4;2", "rating_avg": 4.0, "confidence_avg": 5.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";;;;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation", "aff_unique_url": "http://www.ia.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "IhHB1l1mwp", "title": "Seq2seq is All You Need for Coreference Resolution", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Existing works on coreference resolution suggest that task-specific models are necessary to achieve state-of-the-art performance. In this work, we present compelling evidence that such models are not necessary. We finetune a pretrained seq2seq transformer to map an input document to a tagged sequence encoding the coreference annotation. Despite the extreme simplicity, our model outperforms or closely matches the best coreference systems in the literature on an array of datasets. We consider an even simpler version of seq2seq that generates only the tagged spans and find it highly performant. Our analysis shows that the model size, the amount of supervision, and the choice of sequence representations are key factors in performance.", "keywords": "coreference resolution; sequence-to-sequence models;", "primary_area": "", "supplementary_material": "", "author": "Wenzheng Zhang;Sam Wiseman;Karl Stratos", "authorids": "~Wenzheng_Zhang1;~Sam_Wiseman1;~Karl_Stratos2", "gender": "M;M;M", "homepage": "https://wenzhengzhang.github.io/;https://swiseman.github.io;http://karlstratos.com/", "dblp": ";149/1260;07/11293", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;SDavuPAAAAAJ;Fx8-1JMAAAAJ", "or_profile": "~Wenzheng_Zhang1;~Sam_Wiseman1;~Karl_Stratos1", "aff": "Rutgers University;Department of Computer Science, Duke University;Rutgers University", "aff_domain": "cs.rutgers.edu;cs.duke.edu;rutgers.edu", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023seqseq,\ntitle={Seq2seq is All You Need for Coreference Resolution},\nauthor={Wenzheng Zhang and Sam Wiseman and Karl Stratos},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=IhHB1l1mwp}\n}", "github": "", "project": "", "reviewers": "msDa;SffD;Jhxb", "site": "https://openreview.net/forum?id=IhHB1l1mwp", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;4", "excitement": "5;4;3", "reproducibility": "3;3;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 4.0, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Rutgers University;Duke University", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www.rutgers.edu;https://www.duke.edu", "aff_unique_abbr": "Rutgers;Duke", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "Ihgea6IIWo", "title": "Countering Misinformation via Emotional Response Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The proliferation of misinformation on social media platforms (SMPs) poses a significant danger to public health, social cohesion and ultimately democracy. Previous research has shown how social correction can be an effective way to curb misinformation, by engaging directly in a constructive dialogue with users who spread \u2013 often in good faith \u2013 misleading messages. Although professional fact-checkers are crucial to debunking viral claims, they usually do not engage in conversations on social media. Thereby, significant effort has been made to automate the use of fact-checker material in social correction; however, no previous work has tried to integrate it with the style and pragmatics that are commonly employed in social media communication. To fill this gap, we present VerMouth, the first large-scale dataset comprising roughly 12 thousand claim-response pairs (linked to debunking articles), accounting for both SMP-style and basic emotions, two factors which have a significant role in misinformation credibility and spreading. To collect this dataset we used a technique based on an author-reviewer pipeline, which efficiently combines LLMs and human annotators to obtain high-quality data. We also provide comprehensive experiments showing how models trained on our proposed dataset have significant improvements in terms of output quality and generalization capabilities.", "keywords": "Misinformation Countering;Automated Fact-Checking;Knowledge-Driven NLG;Automatic Text Summarization;Human-Machine Collaboration;Data Collection", "primary_area": "", "supplementary_material": "", "author": "Daniel Russo;Shane Peter Kaszefski-Yaschuk;Jacopo Staiano;Marco Guerini", "authorids": "~Daniel_Russo3;~Shane_Peter_Kaszefski-Yaschuk1;~Jacopo_Staiano2;~Marco_Guerini1", "gender": "M;M;;", "homepage": ";;http://www.staiano.net;https://www.marcoguerini.eu/", "dblp": "10/9946-4;325/5528;94/2681;68/2913", "google_scholar": "pAJzA38AAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.it/citations?user=dt-Fys0AAAAJ", "or_profile": "~Daniel_Russo3;~Shane_Peter_Kaszefski-Yaschuk1;~Jacopo_Staiano2;~Marco_Guerini1", "aff": "University of Trento;University of Trento;University of Trento;Fondazione Bruno Kessler", "aff_domain": "unitn.it;unitn.it;unitn.it;fbk.eu", "position": "PhD student;MS student;Senior Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\nrusso2023countering,\ntitle={Countering Misinformation via Emotional Response Generation},\nauthor={Daniel Russo and Shane Peter Kaszefski-Yaschuk and Jacopo Staiano and Marco Guerini},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Ihgea6IIWo}\n}", "github": "", "project": "", "reviewers": "NTyP;LBT1;Ysmd;QBDx;Emnp", "site": "https://openreview.net/forum?id=Ihgea6IIWo", "pdf_size": 0, "rating": "5;5;5;5;5", "confidence": "4;4;4;4;3", "excitement": "4;3;3;4;4", "reproducibility": "4;2;2;4;2", "correctness": "4;3;3;3;4", "rating_avg": 5.0, "confidence_avg": 3.8, "excitement_avg": 3.6, "reproducibility_avg": 2.8, "correctness_avg": 3.4, "replies_avg": 17, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0006-9123-5316;;0000-0002-1260-4640;0000-0003-1582-6617", "linkedin": "daniel-russo-0b4913241;kaszefski;;", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Trento;Fondazione Bruno Kessler", "aff_unique_dep": ";", "aff_unique_url": "https://www.unitn.it;https://www.fbk.eu", "aff_unique_abbr": "UniTN;FBK", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Italy" }, { "id": "IksoHnq4rC", "title": "End-to-end Adversarial Sample Generation for Data Augmentation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Adversarial samples pose a significant challenge to neural inference models.\nIn this paper, we propose a novel enhancing approach A3 for the robustness of the neural NLP models, which combines the adversarial training and data augmentation.\nWe propose an adversarial sample generator that consists of a conditioned paraphrasing model and a condition generator.\nThe latter aims to generate conditions which guides the paraphrasing model to generate adversarial samples.\nA pretrained discriminator is introduced to help the adversarial sample generator adapt to the data characteristics for different tasks. \nWe adopt a weighted loss to incorporate the generated adversarial samples with the original samples for augmented training.\nCompared to existing methods, our approach is much efficient since the generation process is independent to the target model and the generated samples are reusable for different models.\nExperimental results on several tasks show that our approach improves the overall performance of the trained model. Specially, the enhanced model is robust for various attacking techniques.", "keywords": "adversarial sample;data augmentation", "primary_area": "", "supplementary_material": "", "author": "Tianyuan Liu;YUQING SUN", "authorids": "~Tianyuan_Liu1;~YUQING_SUN1", "gender": "M;", "homepage": ";", "dblp": ";58/5011-1", "google_scholar": "eeptaO4AAAAJ;", "or_profile": "~Tianyuan_Liu1;~YUQING_SUN1", "aff": "Shandong University;Shandong University", "aff_domain": "sdu.edu.cn;sdu.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nliu2023endtoend,\ntitle={End-to-end Adversarial Sample Generation for Data Augmentation},\nauthor={Tianyuan Liu and YUQING SUN},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=IksoHnq4rC}\n}", "github": "", "project": "", "reviewers": "CHih;ue2L;m7F9", "site": "https://openreview.net/forum?id=IksoHnq4rC", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;4;4", "excitement": "2;4;3", "reproducibility": "2;3;2", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-0625-6096", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Shandong University", "aff_unique_dep": "", "aff_unique_url": "http://www.sdu.edu.cn", "aff_unique_abbr": "SDU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "IlgpELdUeK", "title": "Axiomatic Preference Modeling for Longform Question Answering", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The remarkable abilities of large language models (LLMs) like ChatGPT and GPT-4 partially stem from the post-training processes involving human preferences encoded within a reward model as part of a Reinforcement Learning from Human Feedback (RLHF) regimen. \nThese reward models (RMs) often lack direct knowledge of why, or under what principles, the preferences annotations were made. In this study, we identify principles that guide RMs to better align with human preferences, and then develop an axiomatic framework to generate a rich variety of preference signals to uphold them. We use these axiomatic signals to train a model for the scoring answers to longform questions. Our approach yields a \\textbf{Preference Model} with only about 220M parameters that agrees with gold human-annotated preference labels more often than GPT-4.\nThe contributions of this work include: training a standalone preference model that can score human- and LLM-generated answers on the same scale; developing an axiomatic framework for generating training data pairs tailored to certain principles; and showing that a small amount of axiomatic signals can help small models outperform GPT-4 in preference scoring. We intend to release our axiomatic data and model.", "keywords": "reward modeling;preference modeling;RLHF;Large Language Models;long form question answering", "primary_area": "", "supplementary_material": "", "author": "Corby Rosset;Guoqing Zheng;Victor Dibia;Ahmed Hassan Awadallah;Paul N. Bennett", "authorids": "~Corby_Rosset2;~Guoqing_Zheng1;~Victor_Dibia1;~Ahmed_Hassan_Awadallah1;~Paul_N._Bennett1", "gender": ";M;M;;M", "homepage": "https://www.microsoft.com/en-us/research/people/zheng/;http://victordibia.com/;https://www.microsoft.com/en-us/research/people/hassanam/publications/;https://www.microsoft.com/en-us/research/people/pauben/publications/;http://corbyrosset.com/", "dblp": "https://dblp.org/pers/z/Zheng:Guoqing.html;147/6528;147/9148;33/6188;", "google_scholar": "aMhUcoMAAAAJ;;sNGk-9MAAAAJ;AIncPrIAAAAJ;Y2YBgCsAAAAJ", "or_profile": "~Guoqing_Zheng1;~Victor_Dibia1;~Ahmed_Hassan_Awadallah1;~Paul_N._Bennett1;~Corbin_L_Rosset1", "aff": "Microsoft Research;Microsoft Research;Microsoft Research;Microsoft;Microsoft Research", "aff_domain": "microsoft.com;research.microsoft.com;microsoft.com;microsoft.com;research.microsoft.com", "position": "Researcher;Principal Researcher;Principal Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nrosset2023axiomatic,\ntitle={Axiomatic Preference Modeling for Longform Question Answering},\nauthor={Corby Rosset and Guoqing Zheng and Victor Dibia and Ahmed Hassan Awadallah and Paul N. Bennett},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=IlgpELdUeK}\n}", "github": "", "project": "", "reviewers": "k1y7;83C4;sQQz", "site": "https://openreview.net/forum?id=IlgpELdUeK", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "3;5;4", "reproducibility": "3;3;4", "correctness": "3;4;5", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0009-0006-7852-9651;", "linkedin": ";;ahmed-hassan-awadallah-a355a27/;paulnbennett/;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Research", "aff_unique_url": "https://www.microsoft.com/en-us/research", "aff_unique_abbr": "MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "In4L79U5n7", "title": "$\\textit{``Don't Take This Out of Context!''}$ On the Need for Contextual Models and Evaluations for Stylistic Rewriting", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Most existing stylistic text rewriting methods and evaluation metrics operate on a sentence level, but ignoring the broader context of the text can lead to preferring generic, ambiguous, and incoherent rewrites. In this paper, we investigate integrating the preceding textual context into both the $\\textit{rewriting}$ and $\\textit{evaluation}$ stages of stylistic text rewriting, and introduce a new composite contextual evaluation metric $\\texttt{CtxSimFit}$ that combines similarity to the original sentence with contextual cohesiveness.\nWe comparatively evaluate non-contextual and contextual rewrites in formality, toxicity, and sentiment transfer tasks. Our experiments show that humans significantly prefer contextual rewrites as more fitting and natural over non-contextual ones, yet existing sentence-level automatic metrics (e.g., ROUGE, SBERT) correlate poorly with human preferences ($\\rho$=0--0.3). In contrast, human preferences are much better reflected by both our novel $\\texttt{CtxSimFit}$ ($\\rho$=0.7--0.9) as well as proposed context-infused versions of common metrics ($\\rho$=0.4--0.7). Overall, our findings highlight the importance of integrating context into the generation and especially the evaluation stages of stylistic text rewriting.", "keywords": "stylistic rewriting;contextual evaluation;contextual generation", "primary_area": "", "supplementary_material": "", "author": "Akhila Yerukola;Xuhui Zhou;Elizabeth Clark;Maarten Sap", "authorids": "~Akhila_Yerukola1;~Xuhui_Zhou1;~Elizabeth_Clark2;~Maarten_Sap1", "gender": ";M;;M", "homepage": "https://akhila-yerukola.github.io/;https://xuhuizhou.github.io/;https://eaclark07.github.io/;http://maartensap.com", "dblp": "249/5606.html;;148/6935;153/9519", "google_scholar": "Y7j60UQAAAAJ;CKyX_Y8AAAAJ;;gFN4QUYAAAAJ", "or_profile": "~Akhila_Yerukola1;~Xuhui_Zhou1;~Elizabeth_Clark2;~Maarten_Sap1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Google;Carnegie Mellon University", "aff_domain": "cmu.edu;andrew.cmu.edu;google.com;cmu.edu", "position": "PhD student;PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nyerukola2023textitdont,\ntitle={\\${\\textbackslash}textit\\{``Don't Take This Out of Context!''\\}\\$ On the Need for Contextual Models and Evaluations for Stylistic Rewriting},\nauthor={Akhila Yerukola and Xuhui Zhou and Elizabeth Clark and Maarten Sap},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=In4L79U5n7}\n}", "github": "", "project": "", "reviewers": "fCPE;joB3;XbBa;5HXe", "site": "https://openreview.net/forum?id=In4L79U5n7", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;3;2;4", "excitement": "4;4;4;4", "reproducibility": "4;4;3;4", "correctness": "4;4;4;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 3.75, "correctness_avg": 4.0, "replies_avg": 13, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "akhilayerukola;;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Carnegie Mellon University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.cmu.edu;https://www.google.com", "aff_unique_abbr": "CMU;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "InhYJzIuBi", "title": "Aspect-Category Enhanced Learning with a Neural Coherence Model for Implicit Sentiment Analysis", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Aspect-based sentiment analysis (ABSA) has been widely studied since the explosive growth of social networking services. However, the recognition of implicit sentiments that do not contain obvious opinion words remains less explored. In this paper, we propose aspect-category enhanced learning with a neural coherence model (ELCoM). It captures document-level coherence by using contrastive learning, and sentence-level by a hypergraph to mine opinions from explicit sentences to aid implicit sentiment classification. To address the issue of sentences with different sentiment polarities in the same category, we perform cross-category enhancement to offset the impact of anomalous nodes in the hypergraph and obtain sentence representations with enhanced aspect-category. Extensive experiments on benchmark datasets show that the ELCoM achieves state-of-the-art performance. Our source codes and data are released at \\url{https://github.com/cuijin-23/ELCoM}.", "keywords": "Aspect-Based Sentiment Analysis;Implicit Sentiment;Coherence", "primary_area": "", "supplementary_material": "", "author": "Jin Cui;Fumiyo Fukumoto;Xinfeng Wang;Yoshimi Suzuki;Jiyi Li;Wanzeng Kong", "authorids": "~Jin_Cui1;~Fumiyo_Fukumoto2;~Xinfeng_Wang2;~Yoshimi_Suzuki2;~Jiyi_Li1;~Wanzeng_Kong1", "gender": "F;F;M;M;;M", "homepage": ";http://cl.cs.yamanashi.ac.jp/index_e.html;https://wangxfng.github.io/;http://www.ircl.yamanashi.ac.jp/~ysuzuki/;http://bit.ly/jiyili;", "dblp": ";57/1318.html;94/6801;03/897.html;69/2797;04/10544", "google_scholar": "5dVugBQAAAAJ;BTZeUrQAAAAJ;l-ye3qgAAAAJ;g7-QywEAAAAJ;https://scholar.google.co.jp/citations?user=4upavkAAAAAJ;", "or_profile": "~Jin_Cui1;~Fumiyo_Fukumoto2;~Xinfeng_Wang2;~Yoshimi_Suzuki2;~Jiyi_Li1;~Wanzeng_Kong1", "aff": "University of Yamanashi;Yamanashi University;University of Yamanashi ;Yamanashi University;University of Yamanashi;Hangzhou Dianzi University", "aff_domain": "yamanashi.ac.jp;yamanashi.ac.jp;yamanashi.ac.jp;yamanashi.ac.jp;yamanashi.ac.jp;hdu.edu.cn", "position": "PhD student;Full Professor;PhD student;Full Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ncui2023aspectcategory,\ntitle={Aspect-Category Enhanced Learning with a Neural Coherence Model for Implicit Sentiment Analysis},\nauthor={Jin Cui and Fumiyo Fukumoto and Xinfeng Wang and Yoshimi Suzuki and Jiyi Li and Wanzeng Kong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=InhYJzIuBi}\n}", "github": "", "project": "", "reviewers": "3M1A;3JpN;mTUy", "site": "https://openreview.net/forum?id=InhYJzIuBi", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;5;3", "excitement": "3;4;3", "reproducibility": "4;4;4", "correctness": "4;4;2", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-9575-3678;0000-0001-7858-6206;0000-0003-4491-8369;0000-0001-5466-7351;0000-0003-4997-3850;", "linkedin": ";;;yoshimi-suzuki-395400120/;;", "aff_unique_index": "0;1;0;1;0;2", "aff_unique_norm": "University of Yamanashi;Yamanashi University;Hangzhou Dianzi University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.u-yamanashi.ac.jp;https://www.yamanashi-u.ac.jp;http://www.hdu.edu.cn/", "aff_unique_abbr": "UoY;Yamanashi U;HGHDU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "Japan;China" }, { "id": "IpJ5rAFLv7", "title": "Scaling Vision-Language Models with Sparse Mixture of Experts", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The field of natural language processing (NLP) has made significant strides in recent years, particularly in the development of large-scale vision-language models (VLMs). These models aim to bridge the gap between text and visual information, enabling a more comprehensive understanding of multimedia data. However, as these models become larger and more complex, they also become more challenging to train and deploy. One approach to addressing this challenge is the use of sparsely-gated mixture-of-experts (MoE) techniques, which divide the model into smaller, specialized sub-models that can jointly solve a task. In this paper, we explore the effectiveness of MoE in scaling vision-language models, demonstrating its potential to achieve state-of-the-art performance on a range of benchmarks over dense models of equivalent computational cost. Our research offers valuable insights into stabilizing the training of MoE models, understanding the impact of MoE on model interpretability, and balancing the trade-offs between compute performance when scaling VLMs. We hope our work will inspire further research into the use of MoE for scaling large-scale vision-language models and other multimodal machine learning applications.", "keywords": "vision language;scaling", "primary_area": "", "supplementary_material": "", "author": "Sheng Shen;Zhewei Yao;Chunyuan Li;Trevor Darrell;Kurt Keutzer;Yuxiong He", "authorids": "~Sheng_Shen2;~Zhewei_Yao1;~Chunyuan_Li1;~Trevor_Darrell2;~Kurt_Keutzer1;~Yuxiong_He1", "gender": "M;M;;M;;M", "homepage": "https://sincerass.github.io;;http://chunyuan.li/;https://people.eecs.berkeley.edu/~keutzer/;;https://people.eecs.berkeley.edu/~trevor/", "dblp": "138/5764-1.html;195/2887;64/9590;k/KurtKeutzer.html;https://dblp.org/pers/hd/h/He:Yuxiong;d/TrevorDarrell", "google_scholar": "https://scholar.google.com/citations?hl=en;gpSeMjYAAAAJ;Zd7WmXUAAAAJ;ID9QePIAAAAJ;SB3_eb0AAAAJ;https://scholar.google.com.tw/citations?user=bh-uRFMAAAAJ", "or_profile": "~Sheng_Shen2;~Zhewei_Yao1;~Chunyuan_Li1;~Kurt_Keutzer1;~Yuxiong_He1;~trevor_darrell1", "aff": "University of California, Berkeley;Microsoft;Microsoft Research;University of California, Berkeley;Microsoft;Electrical Engineering & Computer Science Department", "aff_domain": "berkeley.edu;microsoft.com;microsoft.com;berkeley.edu;microsoft.com;eecs.berkeley.edu", "position": "PhD student;Researcher;Principal Researcher;Full Professor;Researcher;Professor", "bibtex": "@inproceedings{\nshen2023scaling,\ntitle={Scaling Vision-Language Models with Sparse Mixture of Experts},\nauthor={Sheng Shen and Zhewei Yao and Chunyuan Li and Trevor Darrell and Kurt Keutzer and Yuxiong He},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=IpJ5rAFLv7}\n}", "github": "", "project": "", "reviewers": "rELj;sswr;htd9", "site": "https://openreview.net/forum?id=IpJ5rAFLv7", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "3;4;3", "reproducibility": "3;3;3", "correctness": "3;4;2", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-3868-8501;;", "linkedin": "sheng-s-ab198a174/;;;kurtkeutzer/;;", "aff_unique_index": "0;1;1;0;1;2", "aff_unique_norm": "University of California, Berkeley;Microsoft;Electrical Engineering & Computer Science Department", "aff_unique_dep": ";Microsoft Corporation;Electrical Engineering & Computer Science", "aff_unique_url": "https://www.berkeley.edu;https://www.microsoft.com;", "aff_unique_abbr": "UC Berkeley;Microsoft;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States;" }, { "id": "Ipo264MKyt", "title": "PersonaLM: Language Model Personalization via Domain-distributed Span Aggregated K-Nearest N-gram Retrieval Augmentation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We introduce PersonaLM - Domain-distributed Span-Aggregated K-nearest N-gram retrieval augmentation to improve language modeling for Automatic Speech Recognition (ASR) personalization. PersonaLM leverages contextually similar n-gram word frequencies for recognizing rare word patterns associated with unseen domains. It aggregates the next-word probability distribution based on the relative importance of different domains to the input query. To achieve this, we propose a Span Aggregated Group-Contrastive Neural (SCAN) retriever that learns to rank external domains/users by utilizing a group-wise contrastive span loss that pulls together span representations belonging to the same group while pushing away spans from unrelated groups in the semantic space. We propose ASAP benchmark for ASR LM personalization that consists of three user-specific speech-to-text tasks for meetings, TED talks, and financial earnings calls. Extensive experiments show that PersonaLM significantly outperforms strong baselines with a 10-16% improvement in perplexity and a 5-8% reduction in Word Error Rates on popular Wikitext-103, UserLibri, and our ASAP dataset. We further demonstrate the usefulness of the SCAN retriever for improving user-personalized text generation and classification by retrieving relevant context for zero-shot prompting and few-shot fine-tuning of LLMs by 7-12% on the LAMP benchmark.", "keywords": "speech recognition;language modeling;personalization;domain adaptation;retrieval augmentation", "primary_area": "", "supplementary_material": "", "author": "Puneet Mathur;Zhe Liu;Ke Li;Yingyi Ma;Gil Keren;Zeeshan Ahmed;Dinesh Manocha;Xuedong Zhang", "authorids": "~Puneet_Mathur1;~Zhe_Liu4;~Ke_Li22;~Yingyi_Ma1;~Gil_Keren1;~Zeeshan_Ahmed1;~Dinesh_Manocha3;~Xuedong_Zhang1", "gender": ";M;F;F;M;;M;M", "homepage": ";https://zhejosephliu.github.io/index.html;https://scholar.google.com/citations?user=i31osuAAAAAJ&hl=en;;;;https://www.cs.umd.edu/people/dmanocha;https://www.linkedin.com/in/xuedongzhang/", "dblp": ";;;242/3346;;;m/DineshManocha;", "google_scholar": ";;;eWGwAEwAAAAJ;;;X08l_4IAAAAJ;", "or_profile": "~Puneet_Mathur1;~Zhe_Liu4;~Ke_Li22;~Yingyi_Ma1;~Gil_Keren1;~Zeeshan_Ahmed1;~Dinesh_Manocha3;~Xuedong_Zhang1", "aff": ";Meta;Meta;Meta Facebook;;;University of Maryland, College Park;Meta Facebook", "aff_domain": ";meta.com;facebook.com;facebook.com;;;umd.edu;facebook.com", "position": ";Researcher;Researcher;Researcher;;;Professor;Researcher", "bibtex": "@inproceedings{\nmathur2023personalm,\ntitle={Persona{LM}: Language Model Personalization via Domain-distributed Span Aggregated K-Nearest N-gram Retrieval Augmentation},\nauthor={Puneet Mathur and Zhe Liu and Ke Li and Yingyi Ma and Gil Keren and Zeeshan Ahmed and Dinesh Manocha and Xuedong Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Ipo264MKyt}\n}", "github": "", "project": "", "reviewers": "cwNA;FqGg;n2GV", "site": "https://openreview.net/forum?id=Ipo264MKyt", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;1", "excitement": "3;3;3", "reproducibility": "3;4;2", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 2.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;0000-0001-7047-9801;", "linkedin": ";;;;;;dinesh-manocha-2311846;", "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Meta;University of Maryland", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www/umd.edu", "aff_unique_abbr": "Meta;UMD", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "IpzCUvade7", "title": "Interventional Rationalization", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Selective rationalizations improve the explainability of neural networks by selecting a subsequence of the input (i.e., rationales) to explain the prediction results. Although existing methods have achieved promising results, they still suffer from adopting the spurious correlations in data (aka., shortcuts) to compose rationales and make predictions. Inspired by the causal theory, in this paper, we develop an interventional rationalization (Inter-RAT) to discover the causal rationales. Specifically, we first analyse the causalities among the input, rationales and results with a structural causal model. Then, we discover spurious correlations between the input and rationales, and between rationales and results, respectively, by identifying the confounder in the causalities. Next, based on the backdoor adjustment, we propose a causal intervention method to remove the spurious correlations between input and rationales. Further, we discuss reasons why spurious correlations between the selected rationales and results exist by analysing the limitations of the sparsity constraint in the rationalization, and employ the causal intervention method to remove these correlations. Extensive experimental results on three real-world datasets clearly validate the effectiveness of our proposed method. The source code of Inter-RAT is available at https://github.com/yuelinan/Codes-of-Inter-RAT.", "keywords": "Rationalization;Causal intervention", "primary_area": "", "supplementary_material": "", "author": "Linan Yue;Qi Liu;Li Wang;Yanqing An;Yichao Du;Zhenya Huang", "authorids": "~Linan_Yue1;~Qi_Liu3;~Li_Wang18;~Yanqing_An1;~Yichao_Du1;~Zhenya_Huang2", "gender": "M;M;F;M;M;M", "homepage": "https://yuelinan.github.io/;http://staff.ustc.edu.cn/~qiliuql/;;http://home.ustc.edu.cn/~anyq/;http://staff.ustc.edu.cn/~huangzhy/;", "dblp": "297/1080;95/2446-3;;296/9937;178/8690;271/6727", "google_scholar": "https://scholar.google.com.hk/citations?user=XDaNgG4AAAAJ;5EoHAFwAAAAJ;poE7k1wAAAAJ;gjj3AZ4AAAAJ;dVZuU90AAAAJ;UC4wSP0AAAAJ", "or_profile": "~Linan_Yue1;~Qi_Liu3;~Li_Wang18;~Yanqing_An1;~Zhenya_Huang2;~Du_Yichao1", "aff": "University of Science and Technology of China;University of Science and Technology of China;;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;;mail.ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "PhD student;Full Professor;;MS student;Associate Professor;PhD student", "bibtex": "@inproceedings{\nyue2023interventional,\ntitle={Interventional Rationalization},\nauthor={Linan Yue and Qi Liu and Li Wang and Yanqing An and Yichao Du and Zhenya Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=IpzCUvade7}\n}", "github": "", "project": "", "reviewers": "gxKG;sFn6;fKwz;YC9Z", "site": "https://openreview.net/forum?id=IpzCUvade7", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;4;4;4", "excitement": "4;3;4;4", "reproducibility": "4;4;4;4", "correctness": "3;3;5;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.75, "reproducibility_avg": 4.0, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-5980-6098;0000-0001-6956-5550;;0000-0001-7977-775X;0000-0003-1661-0420;", "linkedin": ";;;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "IqEy2fbpt5", "title": "Do LLMs Understand Social Knowledge? Evaluating the Sociability of Large Language Models with SocKET Benchmark", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) have been shown to perform well at a variety of syntactic, discourse, and reasoning tasks. While LLMs are increasingly deployed in many forms including conversational agents that interact with humans, we lack a grounded benchmark to measure how well LLMs understand social language. Here, we introduce a new theory-driven benchmark, SocKET, that contains 58 NLP tasks testing social knowledge which we group into five categories: humor \\& sarcasm, offensiveness, sentiment \\& emotion, and trustworthiness. In tests on the benchmark, we demonstrate that current models attain only moderate performance but reveal significant potential for task transfer among different types and categories of tasks, which were predicted from theory. Through zero-shot evaluations, we show that pretrained models already possess some innate but limited capabilities of social language understanding and training on one category of tasks can improve zero-shot testing on others. Our benchmark provides a systematic way to analyze model performance on an important dimension of language and points to clear room for improvement to build more socially-aware LLMs. The resources are released at https://github.com/minjechoi/SOCKET.", "keywords": "large language models;social information;benchmark", "primary_area": "", "supplementary_material": "", "author": "Minje Choi;Jiaxin Pei;Sagar Kumar;Chang Shu;David Jurgens", "authorids": "~Minje_Choi1;~Jiaxin_Pei1;~Sagar_Kumar1;~Chang_Shu5;~David_Jurgens1", "gender": "M;;Not Specified;M;M", "homepage": "https://minjechoi.github.io/;;;https://ciaranshu.github.io;http://jurgens.people.si.umich.edu", "dblp": "257/3348;228/5526;;;48/4613.html", "google_scholar": "crAyusoAAAAJ;bfPz_-8AAAAJ;NhJMwocAAAAJ;SxQjvCUAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Minje_Choi1;~Jiaxin_Pei1;~Sagar_Kumar1;~Chang_Shu5;~David_Jurgens1", "aff": "University of Michigan;University of Michigan;Northeastern University;University of Cambridge;University of Michigan - Ann Arbor", "aff_domain": "umich.edu;umich.edu;northeastern.edu;cam.ac.uk;umich.edu", "position": "PhD student;PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nchoi2023do,\ntitle={Do {LLM}s Understand Social Knowledge? Evaluating the Sociability of Large Language Models with Soc{KET} Benchmark},\nauthor={Minje Choi and Jiaxin Pei and Sagar Kumar and Chang Shu and David Jurgens},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=IqEy2fbpt5}\n}", "github": "", "project": "", "reviewers": "ytnT;CEQi;egYJ", "site": "https://openreview.net/forum?id=IqEy2fbpt5", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;4", "excitement": "3;4;4", "reproducibility": "4;3;2", "correctness": "3;4;3", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-0157-4615;;0000-0002-2135-9878", "linkedin": ";;;;", "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "University of Michigan;Northeastern University;University of Cambridge", "aff_unique_dep": ";;", "aff_unique_url": "https://www.umich.edu;https://www.northeastern.edu;https://www.cam.ac.uk", "aff_unique_abbr": "UM;NEU;Cambridge", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Cambridge;Ann Arbor", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "IsDxBXUEd8", "title": "GRI: Graph-based Relative Isomorphism of Word Embedding Spaces", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Automated construction of bi-lingual dictionaries using monolingual embedding spaces is a\ncore challenge in machine translation. The end performance of these dictionaries relies on the\ngeometric similarity of individual spaces, i.e., their degree of isomorphism. Existing attempts\naimed at controlling the relative isomorphism of different spaces fail to incorporate the \nimpact of lexically different but semantically related words in the training objective. To \naddress this, we propose GRI that combines the distributional training objectives with attentive\ngraph convolutions to unanimously consider the impact of lexical variations of semantically\nsimilar words required to define/compute the relative isomorphism of multiple spaces. Exper\nimental evaluation shows that GRI outperforms the existing research by improving the average\nP@1 by a relative score of upto 63.6%.", "keywords": "Attentive graph Convolutions;Isomorphim;Bi-lingual Induction", "primary_area": "", "supplementary_material": "", "author": "Muhammad Asif Ali;Yan HU;Jianbin Qin;Di Wang", "authorids": "~Muhammad_Asif_Ali1;~Yan_HU2;~Jianbin_Qin1;~Di_Wang1", "gender": "M;;M;", "homepage": ";;http://qinjianbin.com/;", "dblp": "130/2551;;01/9727;", "google_scholar": "https://scholar.google.com.au/citations?user=Kj0S5aYAAAAJ;;;", "or_profile": "~Muhammad_Asif_Ali1;~Yan_HU2;~Jianbin_Qin1;~Di_Wang1", "aff": "King Abdullah University of Science and Technology;;Shenzhen University;", "aff_domain": "kaust.edu.sa;;szu.edu.cn;", "position": "Postdoc;;Full Professor;", "bibtex": "@inproceedings{\nali2023gri,\ntitle={{GRI}: Graph-based Relative Isomorphism of Word Embedding Spaces},\nauthor={Muhammad Asif Ali and Yan HU and Jianbin Qin and Di Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=IsDxBXUEd8}\n}", "github": "", "project": "", "reviewers": "spUb;AjeL;oHhd", "site": "https://openreview.net/forum?id=IsDxBXUEd8", "pdf_size": 0, "rating": "3;3;3", "confidence": "1;4;4", "excitement": "3;3;4", "reproducibility": "3;3;4", "correctness": "4;3;4", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;1", "aff_unique_norm": "King Abdullah University of Science and Technology;Shenzhen University", "aff_unique_dep": ";", "aff_unique_url": "https://www.kast.kau.edu.sa;https://www.szu.edu.cn", "aff_unique_abbr": "KAUST;SZU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Saudi Arabia;China" }, { "id": "Itnbse9MMW", "title": "An Integrative Survey on Mental Health Conversational Agents to Bridge Computer Science and Medical Perspectives", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Mental health conversational agents (a.k.a. chatbots) are widely studied for their potential to offer accessible support to those experiencing mental health challenges. Previous surveys on the topic primarily consider papers published in either computer science or medicine, leading to a divide in understanding and hindering the sharing of beneficial knowledge between both domains. To bridge this gap, we conduct a comprehensive literature review using the PRISMA framework, reviewing 534 papers published in both computer science and medicine. Our systematic review reveals 136 key papers on building mental health-related conversational agents with diverse characteristics of modeling and experimental design techniques. We find that computer science papers focus on LLM techniques and evaluating response quality using automated metrics with little attention to the application while medical papers use rule-based conversational agents and outcome metrics to measure the health outcomes of participants. Based on our findings on transparency, ethics, and cultural heterogeneity in this review, we provide a few recommendations to help bridge the disciplinary divide and enable the cross-disciplinary development of mental health conversational agents.", "keywords": "Conversational Agent;Chatbot;Mental health", "primary_area": "", "supplementary_material": "", "author": "Young Min Cho;Sunny Rai;Lyle Ungar;Jo\u00e3o Sedoc;Sharath Chandra Guntuku", "authorids": "~Young_Min_Cho1;~Sunny_Rai1;~Lyle_Ungar1;~Jo\u00e3o_Sedoc1;~Sharath_Chandra_Guntuku2", "gender": "M;F;M;M;M", "homepage": "https://jeffreych0.github.io/;https://raisunny.com/;http://www.cis.upenn.edu/~ungar/;;http://chandrasg.github.io/", "dblp": ";161/3908;u/LyleHUngar;;", "google_scholar": "BjdxOB4AAAAJ;C-gi0v8AAAAJ;https://scholar.google.com.tw/citations?user=KCiDjbkAAAAJ;vv355NgAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Young_Min_Cho1;~Sunny_Rai1;~Lyle_Ungar1;~Jo\u00e3o_Sedoc1;~Sharath_Chandra_Guntuku2", "aff": "University of Pennsylvania;School of Engineering and Applied Science, University of Pennsylvania;University of Pennsylvania;New York University;University of Pennsylvania", "aff_domain": "seas.upenn.edu;upenn.edu;upenn.edu;nyu.edu;upenn.edu", "position": "Researcher;Postdoc;Full Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\ncho2023an,\ntitle={An Integrative Survey on Mental Health Conversational Agents to Bridge Computer Science and Medical Perspectives},\nauthor={Young Min Cho and Sunny Rai and Lyle Ungar and Jo{\\~a}o Sedoc and Sharath Chandra Guntuku},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Itnbse9MMW}\n}", "github": "", "project": "", "reviewers": "2fQL;5Bp6;e2C5", "site": "https://openreview.net/forum?id=Itnbse9MMW", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;5", "excitement": "5;3;4", "reproducibility": "4;4;0", "correctness": "5;4;3", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 4.0, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-3793-7257;0000-0002-0677-3747;;;0000-0002-2929-0035", "linkedin": "jeffrey-young-min-cho-888105180/;sunnyrai-nlp;;joao-sedoc-9085714/;", "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of Pennsylvania;New York University", "aff_unique_dep": ";", "aff_unique_url": "https://www.upenn.edu;https://www.nyu.edu", "aff_unique_abbr": "UPenn;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "IvwcvJHLpc", "title": "IdealGPT: Iteratively Decomposing Vision and Language Reasoning via Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The field of vision-and-language (VL) understanding has made unprecedented progress with end-to-end large pre-trained VL models (VLMs). However, they still fall short in zero-shot reasoning tasks that require multi-step inferencing. To achieve this goal, previous works resort to a divide-and-conquer pipeline. In this paper, we argue that previous efforts have several inherent shortcomings: 1) They rely on domain-specific sub-question decomposing models. 2) They force models to predict the final answer even if the sub-questions or sub-answers provide insufficient information. We address these limitations via IdealGPT, a framework that iteratively decomposes VL reasoning using large language models (LLMs). Specifically, IdealGPT utilizes an LLM to generate sub-questions, a VLM to provide corresponding sub-answers, and another LLM to reason to achieve the final answer. These three modules perform the divide-and-conquer procedure iteratively until the model is confident about the final answer to the main question. We evaluate IdealGPT on multiple challenging VL reasoning tasks under a zero-shot setting. In particular, our IdealGPT outperforms the best existing GPT-4-like models by an absolute 10% on VCR and 15% on SNLI-VE. Code is available at https://github.com/Hxyou/IdealGPT.", "keywords": "Vision-Language Learning;Vision-Language Model;Large Language Model;Zero-Shot Evaluation", "primary_area": "", "supplementary_material": "", "author": "Haoxuan You;Rui Sun;Zhecan Wang;Long Chen;Gengyu Wang;Hammad Ayyubi;Kai-Wei Chang;Shih-Fu Chang", "authorids": "~Haoxuan_You1;~Rui_Sun10;~Zhecan_Wang2;~Long_Chen8;~Gengyu_Wang2;~Hammad_Ayyubi1;~Kai-Wei_Chang1;~Shih-Fu_Chang3", "gender": "M;M;M;M;;;M;M", "homepage": "https://hxyou.github.io/;https://threesr.github.io/;https://www.zhecanwang.com/;https://zjuchenlong.github.io/;http://wanggengyu.com;https://hammad001.github.io/;http://kwchang.net;http://www.ee.columbia.edu/~sfchang/", "dblp": "210/2628;;167/4251;64/5725-16;218/7459;251/5554;18/2428;c/ShihFuChang", "google_scholar": "BhysChMAAAAJ;;uqHPnmgAAAAJ;https://scholar.google.com.sg/citations?user=-gtmMpIAAAAJ;KMmeC-sAAAAJ;;fqDBtzYAAAAJ;OMVTRscAAAAJ", "or_profile": "~Haoxuan_You1;~Rui_Sun10;~Zhecan_Wang2;~Long_Chen8;~Gengyu_Wang2;~Hammad_Ayyubi1;~Kai-Wei_Chang1;~Shih-Fu_Chang3", "aff": "Columbia University;Columbia University;Columbia University;Columbia University;International Business Machines;Google;Amazon;Columbia University", "aff_domain": "columbia.edu;columbia.edu;columbia.edu;columbia.edu;ibm.com;google.com;amazon.com;ee.columbia.edu", "position": "PhD student;MS student;PhD student;Postdoc;Researcher;Intern;Researcher;Full Professor", "bibtex": "@inproceedings{\nyou2023idealgpt,\ntitle={Ideal{GPT}: Iteratively Decomposing Vision and Language Reasoning via Large Language Models},\nauthor={Haoxuan You and Rui Sun and Zhecan Wang and Long Chen and Gengyu Wang and Hammad Ayyubi and Kai-Wei Chang and Shih-Fu Chang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=IvwcvJHLpc}\n}", "github": "", "project": "", "reviewers": "duK1;NAya;MWQz", "site": "https://openreview.net/forum?id=IvwcvJHLpc", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;4;3", "reproducibility": "5;4;2", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 5, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-9927-8392;0009-0003-7785-4637;0000-0001-6148-9709;;;0000-0001-5365-0072;", "linkedin": ";rui-sun-three/;jameszhecanwang/;;;hammad-ayyubi-56380688/;kai-wei-chang-41239040;", "aff_unique_index": "0;0;0;0;1;2;3;0", "aff_unique_norm": "Columbia University;International Business Machines Corporation;Google;Amazon", "aff_unique_dep": ";;Google;Amazon.com, Inc.", "aff_unique_url": "https://www.columbia.edu;https://www.ibm.com;https://www.google.com;https://www.amazon.com", "aff_unique_abbr": "Columbia;IBM;Google;Amazon", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "IwI7Wpkzm7", "title": "Deciphering Stereotypes in Pre-Trained Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Warning: This paper contains content that is stereotypical and may be upsetting.\n\nThis paper addresses the issue of demographic stereotypes present in Transformer-based pre-trained language models (PLMs) and aims to deepen our understanding of how these biases are encoded in these models. To accomplish this, we introduce an easy-to-use framework for examining the stereotype-encoding behavior of PLMs through a combination of model probing and textual analyses. Our findings reveal that a small subset of attention heads within PLMs are primarily responsible for encoding stereotypes and that stereotypes toward specific minority groups can be identified using attention maps on these attention heads. Leveraging these insights, we propose an attention-head pruning method as a viable approach for debiasing PLMs, without compromising their language modeling capabilities or adversely affecting their performance on downstream tasks.", "keywords": "Stereotype Examination;Stereotype Dataset Construction;Probing and Other Interpretations", "primary_area": "", "supplementary_material": "", "author": "Weicheng Ma;Henry Scheible;Brian C Wang;Goutham Veeramachaneni;Pratim Chowdhary;Alan Sun;Andrew Koulogeorge;Lili Wang;Diyi Yang;Soroush Vosoughi", "authorids": "~Weicheng_Ma2;~Henry_Scheible1;~Brian_C_Wang1;~Goutham_Veeramachaneni1;~Pratim_Chowdhary1;~Alan_Sun1;~Andrew_Koulogeorge1;~Lili_Wang2;~Diyi_Yang2;~Soroush_Vosoughi1", "gender": "M;M;;M;;M;;F;;M", "homepage": "https://www.linkedin.com/in/weicheng-ma-83a2b11a1/;;;https://cpratim.github.io;https://alansun17904.github.io/;https://andrew-koulogeorge.github.io;;https://cs.stanford.edu/~diyiy/;https://www.cs.dartmouth.edu/~soroush/;https://github.com/henryscheible", "dblp": "127/3100;;;;;;;70/11145;01/1709;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;;;QRJoXDQAAAAJ;;vJXfvigAAAAJ;j9jhYqQAAAAJ;45DAXkwAAAAJ;", "or_profile": "~Weicheng_Ma2;~Brian_C_Wang1;~Goutham_Veeramachaneni1;~Pratim_Chowdhary1;~Alan_Sun1;~Andrew_Koulogeorge1;~Lili_Wang2;~Diyi_Yang2;~Soroush_Vosoughi1;~Henry_Jackson_Scheible1", "aff": "Dartmouth College;Dartmouth College;;Dartmouth College;Dartmouth College;Dartmouth College;Dartmouth College;Stanford University;Dartmouth College;Dartmouth College", "aff_domain": "dartmouth.edu;dartmouth.edu;;dartmouth.edu;dartmouth.edu;dartmouth.edu;dartmouth.edu;stanford.edu;dartmouth.edu;dartmouth.edu", "position": "PhD student;Undergrad student;;Undergrad student;Undergrad student;Undergrad student;PhD student;Assistant Professor;Assistant Professor;Undergrad student", "bibtex": "@inproceedings{\nma2023deciphering,\ntitle={Deciphering Stereotypes in Pre-Trained Language Models},\nauthor={Weicheng Ma and Henry Scheible and Brian C Wang and Goutham Veeramachaneni and Pratim Chowdhary and Alan Sun and Andrew Koulogeorge and Lili Wang and Diyi Yang and Soroush Vosoughi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=IwI7Wpkzm7}\n}", "github": "", "project": "", "reviewers": "gaT7;SpU2;MzAY", "site": "https://openreview.net/forum?id=IwI7Wpkzm7", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "5;4;4", "reproducibility": "5;3;4", "correctness": "4;5;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.333333333333333, "reproducibility_avg": 4.0, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 10, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-7494-9874;;;;;;;;0000-0002-2564-8909;", "linkedin": "weicheng-ma-83a2b11a1/;https://linkedin.com/in/brianwang2023;;;;;lili-wang-752552a5/;;;", "aff_unique_index": "0;0;0;0;0;0;1;0;0", "aff_unique_norm": "Dartmouth College;Stanford University", "aff_unique_dep": ";", "aff_unique_url": "https://www.dartmouth.edu;https://www.stanford.edu", "aff_unique_abbr": "Dartmouth;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "J2l0R8N3ks", "title": "Zero-shot Sharpness-Aware Quantization for Pre-trained Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Quantization is a promising approach for reducing memory overhead and accelerating inference, especially in large pre-trained language model (PLM) scenarios. While having no access to original training data due to security and privacy concerns has emerged the demand for zero-shot quantization. Most of the cutting-edge zero-shot quantization methods primarily 1) apply to computer vision tasks, and 2) neglect of overfitting problem in the generative adversarial learning process, leading to sub-optimal performance. Motivated by this, we propose a novel zero-shot sharpness-aware quantization (ZSAQ) framework for the zero-shot quantization of various PLMs. The key algorithm in solving ZSAQ is the SAM-SGA optimization, which aims to improve the quantization accuracy and model generalization via optimizing a minimax problem. We theoretically prove the convergence rate for the minimax optimization problem and this result can be applied to other nonconvex-PL minimax optimization frameworks. Extensive experiments on 11 tasks demonstrate that our method brings consistent and significant performance gains on both discriminative and generative PLMs, i.e., up to +6.98 average score. Furthermore, we empirically validate that our method can effectively improve the model generalization.", "keywords": "zero-shot quantization;minimax optimization;pre-trained language model", "primary_area": "", "supplementary_material": "", "author": "Miaoxi Zhu;Qihuang Zhong;Li Shen;Liang Ding;Juhua Liu;Bo Du;Dacheng Tao", "authorids": "~Miaoxi_Zhu2;~Qihuang_Zhong1;~Li_Shen1;~Liang_Ding3;~Juhua_Liu2;~Bo_Du3;~Dacheng_Tao1", "gender": ";M;M;M;M;;", "homepage": ";https://www.qihuangzhong.top/;https://sites.google.com/site/mathshenli/home;http://liamding.cc/;http://jszy.whu.edu.cn/liujuhua1/zh_CN/index.htm;;", "dblp": "344/1052;272/6439.html;91/3680-8;88/3340-6.html;122/1682;;", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;yVhgENIAAAAJ;lFCLvOAAAAAJ;wN-rIgIAAAAJ;;", "or_profile": "~Miaoxi_Zhu2;~Qihuang_Zhong1;~Li_Shen1;~Liang_Ding3;~Juhua_Liu2;~Bo_Du3;~Dacheng_Tao1", "aff": "Wuhan University;Wuhan University;JD Explore Academy;JD Explore Academy, JD.com Inc.;Wuhan University;;", "aff_domain": "whu.edu.cn;whu.edu.cn;jd.com;jd.com;whu.edu.cn;;", "position": "MS student;PhD student;Researcher;Research Scientist;Full Professor;;", "bibtex": "@inproceedings{\nzhu2023zeroshot,\ntitle={Zero-shot Sharpness-Aware Quantization for Pre-trained Language Models},\nauthor={Miaoxi Zhu and Qihuang Zhong and Li Shen and Liang Ding and Juhua Liu and Bo Du and Dacheng Tao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=J2l0R8N3ks}\n}", "github": "", "project": "", "reviewers": "vWQy;FTK8;2wRB;6MGP", "site": "https://openreview.net/forum?id=J2l0R8N3ks", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;3;4;4", "excitement": "3;3;3;4", "reproducibility": "4;3;3;4", "correctness": "3;3;3;4", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 3.25, "reproducibility_avg": 3.5, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-3907-8820;;", "linkedin": "https://www.linkedin.cn/incareer/in/%E5%98%BB-%E5%98%BB-0a078423a;;;;;;", "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Wuhan University;JD;JD.com Inc.", "aff_unique_dep": ";JD Explore Academy;JD Explore Academy", "aff_unique_url": "http://www.whu.edu.cn/;;https://www.jd.com", "aff_unique_abbr": "WHU;;JD.com", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China;" }, { "id": "J5FFUHZjNx", "title": "SteerLM: Attribute Conditioned SFT as an (User-Steerable) Alternative to RLHF", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Model alignment with human preferences is an essential step in making Large Language Models (LLMs) helpful and consistent with human values. It typically consists of supervised fine-tuning (SFT) and reinforcement learning from human feedback (RLHF) stages. However, RLHF faces inherent limitations stemming from a complex training setup and its tendency to align the model with implicit values that end users cannot control at run-time. Moreover, reward models in RLHF stage commonly rely on single-dimensional feedback as opposed to explicit, multifaceted signals that indicate attributes such as helpfulness, humor, and toxicity. To address these limitations, we propose SteerLM, a supervised fine-tuning method that empowers end-users to control responses during inference. SteerLM conditions responses to conform to an explicitly defined multi-dimensional set of attributes, thereby empowering a steerable AI capable of generating helpful and high-quality responses while maintaining customizability. Experiments show that SteerLM trained on open source datasets generates responses that are preferred by human and automatic evaluators to many state-of-the-art baselines trained with RLHF while being much easier to train. Try SteerLM at https://huggingface.co/nvidia/SteerLM-llama2-13B", "keywords": "Steerable AI;Large Language Model;Alignment;Supervised Fine-tuning", "primary_area": "", "supplementary_material": "", "author": "Yi Dong;Zhilin Wang;Makesh Narsimhan Sreedhar;Xianchao Wu;Oleksii Kuchaiev", "authorids": "~Yi_Dong4;~Zhilin_Wang2;~Makesh_Narsimhan_Sreedhar1;~Xianchao_Wu1;~Oleksii_Kuchaiev1", "gender": "M;;;M;", "homepage": ";;;https://sites.google.com/site/xianchaowu2012/home;http://www.kuchaev.com", "dblp": ";53/10643;;https://dblp.org/pers/hd/w/Wu:Xianchao;", "google_scholar": ";OmMgSQsAAAAJ;;0cP7RfUAAAAJ;qmmIGnwAAAAJ", "or_profile": "~Yi_Dong4;~Zhilin_Wang2;~Makesh_Narsimhan_Sreedhar1;~Xianchao_Wu1;~Oleksii_Kuchaiev1", "aff": "NVIDIA;NVIDIA;;NVIDIA;NVIDIA", "aff_domain": "nvidia.com;nvidia.com;;nvidia.com;nvidia.com", "position": "Researcher;Applied Scientist;;Senior Data Scientist;Principal Researcher", "bibtex": "@inproceedings{\ndong2023steerlm,\ntitle={Steer{LM}: Attribute Conditioned {SFT} as an (User-Steerable) Alternative to {RLHF}},\nauthor={Yi Dong and Zhilin Wang and Makesh Narsimhan Sreedhar and Xianchao Wu and Oleksii Kuchaiev},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=J5FFUHZjNx}\n}", "github": "", "project": "", "reviewers": "QL8N;PUsH;kdwP", "site": "https://openreview.net/forum?id=J5FFUHZjNx", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;3", "excitement": "4;3;3", "reproducibility": "2;3;3", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "yi-dong-04057b18;;;xianchao-wu-6239101a/;oleksiikuchaiev/", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "NVIDIA", "aff_unique_dep": "NVIDIA Corporation", "aff_unique_url": "https://www.nvidia.com", "aff_unique_abbr": "NVIDIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "J6pq6AcmbE", "title": "A Zero-Shot Language Agent for Computer Control with Structured Reflection", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models (LLMs) have shown increasing capacity at planning and executing a high-level goal in a live computer environment (e.g. MiniWoB++). To perform a task, recent works often require a model to learn from trace examples of the task via either supervised learning or few/many-shot prompting. Without these trace examples, it remains a challenge how an agent can autonomously learn and improve its control on a computer, which limits the ability of an agent to perform a new task. We approach this problem with a zero-shot agent that requires no given expert traces. Our agent plans for executable actions on a partially observed environment, and iteratively progresses a task by identifying and learning from its mistakes via self-reflection and structured thought management. On the easy tasks of MiniWoB++, we show that our zero-shot agent often outperforms recent SoTAs, with more efficient reasoning. For tasks with more complexity, our reflective agent performs on par with prior best models, even though previous works had the advantages of accessing expert traces or additional screen information.", "keywords": "planning;reflection;action;grounding", "primary_area": "", "supplementary_material": "", "author": "Tao Li;Gang Li;Zhiwei Deng;Bryan Wang;Yang Li", "authorids": "~Tao_Li11;~Gang_Li13;~Zhiwei_Deng3;~Bryan_Wang1;~Yang_Li2", "gender": "M;;M;;M", "homepage": "https://www.cs.utah.edu/~tli/;;http://www.zhiweideng.com;http://www.dgp.toronto.edu/~bryanw/;http://yangl.org", "dblp": "75/4601-39;62/2655-21;160/3578;187/9736;37/4190-58", "google_scholar": "C1-ACVEAAAAJ;gmBt9v8AAAAJ;tWBPUHwAAAAJ;2s6wkyYAAAAJ;ZZdB48QAAAAJ", "or_profile": "~Tao_Li11;~Gang_Li13;~Zhiwei_Deng3;~Bryan_Wang1;~Yang_Li2", "aff": "Google DeepMind;Google;Google Deepmind;Department of Computer Science, University of Toronto;Google", "aff_domain": "google.com;google.com;google.com;cs.toronto.edu;google.com", "position": "Researcher;Software Engineer;Research Scientist;PhD student;Research Scientist", "bibtex": "@inproceedings{\nli2023a,\ntitle={A Zero-Shot Language Agent for Computer Control with Structured Reflection},\nauthor={Tao Li and Gang Li and Zhiwei Deng and Bryan Wang and Yang Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=J6pq6AcmbE}\n}", "github": "", "project": "", "reviewers": "HJRY;7sGp;wto2;zQff", "site": "https://openreview.net/forum?id=J6pq6AcmbE", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;3;4;2", "excitement": "3;4;3;2", "reproducibility": "3;3;2;4", "correctness": "3;4;4;2", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-9490-2990;;;", "linkedin": ";;;;yang-li-127a2a41/", "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Google;DeepMind;University of Toronto", "aff_unique_dep": "Google DeepMind;DeepMind;Department of Computer Science", "aff_unique_url": "https://deepmind.com;https://deepmind.com;https://www.utoronto.ca", "aff_unique_abbr": "DeepMind;DeepMind;U of T", "aff_campus_unique_index": "1;2;1", "aff_campus_unique": ";Mountain View;Toronto", "aff_country_unique_index": "0;1;0;2;1", "aff_country_unique": "United Kingdom;United States;Canada" }, { "id": "J6uWPjukdR", "title": "Data Similarity is Not Enough to Explain Language Model Performance", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Large language models achieve high performance on many but not all downstream tasks. The interaction between pretraining data and task data is commonly assumed to determine this variance: a task with data that is more similar to a model's pretraining data is assumed to be easier for that model. We test whether distributional and example-specific similarity measures (embedding-, token- and model-based) correlate with language model performance through a large-scale comparison of the Pile and C4 pretraining datasets with downstream benchmarks. Similarity correlates with performance for multilingual datasets, but in other benchmarks, we surprisingly find that similarity metrics are not correlated with accuracy or even each other. This suggests that the relationship between pretraining data and downstream tasks is more complex than often assumed.", "keywords": "similarity;dataset difficulty;pretraining data analysis", "primary_area": "", "supplementary_material": "", "author": "Gregory Yauney;Emily Reif;David Mimno", "authorids": "~Gregory_Yauney1;~Emily_Reif2;~David_Mimno1", "gender": ";F;M", "homepage": "http://cs.cornell.edu/~gyauney;;https://mimno.infosci.cornell.edu/", "dblp": "212/5939;;39/5487", "google_scholar": "https://scholar.google.com/citations?hl=en;J1hMgtAAAAAJ;uBFV6SUAAAAJ", "or_profile": "~Gregory_Yauney1;~Emily_Reif2;~David_Mimno1", "aff": "Cornell University;Google;Cornell University", "aff_domain": "cornell.edu;google.com;cornell.edu", "position": "PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nyauney2023data,\ntitle={Data Similarity is Not Enough to Explain Language Model Performance},\nauthor={Gregory Yauney and Emily Reif and David Mimno},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=J6uWPjukdR}\n}", "github": "", "project": "", "reviewers": "pNy4;ra9x;tQTb", "site": "https://openreview.net/forum?id=J6uWPjukdR", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;5;4", "excitement": "4;3;4", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.666666666666667, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";emily-reif-4b995884;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Cornell University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.cornell.edu;https://www.google.com", "aff_unique_abbr": "Cornell;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "J8iaZda5aG", "title": "Words, Subwords, and Morphemes: What Really Matters in the Surprisal-Reading Time Relationship?", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "An important assumption that comes with using LLMs on psycholinguistic data has gone unverified. LLM-based predictions are based on subword tokenization, not decomposition of words into morphemes. Does that matter? We carefully test this by comparing surprisal estimates using orthographic, morphological, and BPE tokenization against reading time data. Our results replicate previous findings and provide evidence that *in the aggregate*, predictions using BPE tokenization do not suffer relative to morphological and orthographic segmentation. However, a finer-grained analysis points to potential issues with relying on BPE-based tokenization, as well as providing promising results involving morphologically-aware surprisal estimates and suggesting a new method for evaluating morphological prediction.", "keywords": "psycholinguistics;sentence processing;tokenization", "primary_area": "", "supplementary_material": "", "author": "Sathvik Nair;Philip Resnik", "authorids": "~Sathvik_Nair1;~Philip_Resnik1", "gender": ";M", "homepage": "https://sathvikn.github.io/;http://www.umiacs.umd.edu/~resnik/", "dblp": "277/5045;p/PhilipResnik", "google_scholar": "zT7BU8AAAAAJ;https://scholar.google.com.tw/citations?user=71BFWc0AAAAJ", "or_profile": "~Sathvik_Nair1;~Philip_Resnik1", "aff": "University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "umd.edu;umd.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nnair2023words,\ntitle={Words, Subwords, and Morphemes: What Really Matters in the Surprisal-Reading Time Relationship?},\nauthor={Sathvik Nair and Philip Resnik},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=J8iaZda5aG}\n}", "github": "", "project": "", "reviewers": "MAqK;jJ46;aGqM", "site": "https://openreview.net/forum?id=J8iaZda5aG", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;3;2", "reproducibility": "5;4;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-4467-9049;0000-0002-6130-8602", "linkedin": "sathvik-nair/;", "aff_unique_index": "0;0", "aff_unique_norm": "University of Maryland", "aff_unique_dep": "", "aff_unique_url": "https://www/umd.edu", "aff_unique_abbr": "UMD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "College Park", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "J9Vx7eTuWb", "title": "TATA: Stance Detection via Topic-Agnostic and Topic-Aware Embeddings", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Stance detection is important for understanding different attitudes and beliefs on the Internet. However, given that a passage's stance toward a given topic is often highly dependent on that topic, building a stance detection model that generalizes to unseen topics is difficult. In this work, we propose using contrastive learning as well as an unlabeled dataset of news articles that cover a variety of different topics to train topic-agnostic/TAG and topic-aware/TAW embeddings for use in downstream stance detection. Combining these embeddings in our full TATA model, we achieve state-of-the-art performance across several public stance detection datasets (0.771 $F_1$-score on the Zero-shot VAST dataset). We release our code and data at https://github.com/hanshanley/tata.", "keywords": "Zero-shot;Few-shot;Stance Detection;Topic-Agnostic;Topic-Aware", "primary_area": "", "supplementary_material": "", "author": "Hans William Alexander Hanley;Zakir Durumeric", "authorids": "~Hans_William_Alexander_Hanley1;~Zakir_Durumeric1", "gender": "M;", "homepage": "https://www.hanshanley.com/;https://zakird.com", "dblp": "296/1485;143/5673", "google_scholar": "ewdWfOoAAAAJ;TxPSRHIAAAAJ", "or_profile": "~Hans_William_Alexander_Hanley1;~Zakir_Durumeric1", "aff": "Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nhanley2023tata,\ntitle={{TATA}: Stance Detection via Topic-Agnostic and Topic-Aware Embeddings},\nauthor={Hans William Alexander Hanley and Zakir Durumeric},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=J9Vx7eTuWb}\n}", "github": "", "project": "", "reviewers": "dEgC;TUpb;epNc", "site": "https://openreview.net/forum?id=J9Vx7eTuWb", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "2;3;3", "reproducibility": "3;3;4", "correctness": "2;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-4291-5896;", "linkedin": "hans-hanley-0694a180;", "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "J9vgDEDjAw", "title": "UDAPDR: Unsupervised Domain Adaptation via LLM Prompting and Distillation of Rerankers", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Many information retrieval tasks require large labeled datasets for fine-tuning. However, such datasets are often unavailable, and their utility for real-world applications can diminish quickly due to domain shifts. To address this challenge, we develop and motivate a method for using large language models (LLMs) to generate large numbers of synthetic queries cheaply. The method begins by generating a small number of synthetic queries using an expensive LLM. After that, a much less expensive one is used to create large numbers of synthetic queries, which are used to fine-tune a family of reranker models. These rerankers are then distilled into a single efficient retriever for use in the target domain. We show that this technique boosts zero-shot accuracy in long-tail domains and achieves substantially lower latency than standard reranking methods.", "keywords": "Natural Language Processing;Information Retrieval;Domain Adaptation", "primary_area": "", "supplementary_material": "", "author": "Jon Saad-Falcon;Omar Khattab;Keshav Santhanam;Radu Florian;Martin Franz;Salim Roukos;Avirup Sil;Md Arafat Sultan;Christopher Potts", "authorids": "~Jon_Saad-Falcon1;~Omar_Khattab1;~Keshav_Santhanam1;~Radu_Florian1;~Martin_Franz1;~Salim_Roukos1;~Avirup_Sil1;~Md_Arafat_Sultan1;~Christopher_Potts1", "gender": "M;M;M;M;;M;;M;M", "homepage": "https://jonsaadfalcon.com/;https://scholar.google.com/citations?hl=en&user=Lwr5ozgAAAAJ;https://cs.stanford.edu/~keshav2;;;;http://ibm.biz/avirupsil;https://ma-sultan.github.io/;http://web.stanford.edu/~cgpotts/", "dblp": "267/2373.html;129/7815;221/1812.html;91/663;81/6493;01/1417;07/10489;77/11514;13/2617", "google_scholar": "zCVmjboAAAAJ;;bAyZGdAAAAAJ;NvIcXEYAAAAJ;https://scholar.google.com/citations?hl=en;1S7VwIcAAAAJ;;lDB1ul4AAAAJ;3j08YoAAAAAJ", "or_profile": "~Jon_Saad-Falcon1;~Omar_Khattab1;~Keshav_Santhanam1;~Radu_Florian1;~Martin_Franz1;~Salim_Roukos1;~Avirup_Sil1;~Md_Arafat_Sultan1;~Christopher_Potts1", "aff": "Computer Science Department, Stanford University;Stanford University;Stanford University;International Business Machines;International Business Machines;International Business Machines;International Business Machines;International Business Machines;Stanford University", "aff_domain": "cs.stanford.edu;stanford.edu;stanford.edu;ibm.com;ibm.com;ibm.com;ibm.com;ibm.com;stanford.edu", "position": "PhD student;PhD student;PhD student;Researcher;Researcher/Engineer;Principal Researcher;Principal Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nsaad-falcon2023udapdr,\ntitle={{UDAPDR}: Unsupervised Domain Adaptation via {LLM} Prompting and Distillation of Rerankers},\nauthor={Jon Saad-Falcon and Omar Khattab and Keshav Santhanam and Radu Florian and Martin Franz and Salim Roukos and Avirup Sil and Md Arafat Sultan and Christopher Potts},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=J9vgDEDjAw}\n}", "github": "", "project": "", "reviewers": "LCfk;8wrV;1Fo5;H5VJ", "site": "https://openreview.net/forum?id=J9vgDEDjAw", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;4;3;4", "excitement": "4;3;4;3", "reproducibility": "4;3;4;4", "correctness": "4;3;3;3", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 3.5, "reproducibility_avg": 3.75, "correctness_avg": 3.25, "replies_avg": 13, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-5939-7944;0000-0002-3670-1576;;;;;0000-0002-7978-6055", "linkedin": "jonsaadfalcon/;;;;martin-franz-138a7810/;salim-roukos-55a3871/;;;", "aff_unique_index": "0;0;0;1;1;1;1;1;0", "aff_unique_norm": "Stanford University;International Business Machines Corporation", "aff_unique_dep": "Computer Science Department;", "aff_unique_url": "https://www.stanford.edu;https://www.ibm.com", "aff_unique_abbr": "Stanford;IBM", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "JC7uPaMwpW", "title": "KBioXLM: A Knowledge-anchored Biomedical Multilingual Pretrained Language Model", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Most biomedical pretrained language models are monolingual and cannot handle the growing cross-lingual requirements. The scarcity of non-English domain corpora, not to mention parallel data, poses a significant hurdle in training multilingual biomedical models. Since knowledge forms the core of domain-specific corpora and can be translated into various languages accurately, we propose a model called KBioXLM, which transforms the multilingual pretrained model XLM-R into the biomedical domain using a knowledge-anchored approach. We achieve a biomedical multilingual corpus by incorporating three granularity knowledge alignments (entity, fact, and passage levels) into monolingual corpora. Then we design three corresponding training tasks (entity masking, relation masking, and passage relation prediction) and continue training on top of the XLM-R model to enhance its domain cross-lingual ability. To validate the effectiveness of our model, we translate the English benchmarks of multiple tasks into Chinese. Experimental results demonstrate that our model significantly outperforms monolingual and multilingual pretrained models in cross-lingual zero-shot and few-shot scenarios, achieving improvements of up to 10+ points.", "keywords": "Biomedical;Cross-lingual;Multi-lingual;Pretrained Language Model", "primary_area": "", "supplementary_material": "", "author": "Lei Geng;Xu Yan;Ziqiang Cao;Juntao Li;Wenjie Li;Sujian Li;Xinjie Zhou;Yang Yang;Jun Zhang", "authorids": "~Lei_Geng2;~Xu_Yan5;~Ziqiang_Cao2;~Juntao_Li2;~Wenjie_Li1;~Sujian_Li1;~Xinjie_Zhou1;~Yang_Yang60;~Jun_Zhang29", "gender": "M;M;M;F;F;M;;M;F", "homepage": "https://github.com/yyxx1997/yanxu.github.io;;https://lijuntaopku.github.io/;https://web.comp.polyu.edu.hk/cswjli/;https://pku-tangent.github.io/;https://scholar.google.com/citations?user=O-ODfkkAAAAJ&hl=en;;;https://github.com/ngwlh-gl?tab=repositories", "dblp": ";148/4447;;33/3999-2.html;05/4288;;;;", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;sZSygsYAAAAJ;Rx5swD4AAAAJ;https://scholar.google.com.tw/citations?user=RvBDhSwAAAAJ;O-ODfkkAAAAJ;;LIKas5AAAAAJ;", "or_profile": "~Xu_Yan5;~Ziqiang_Cao2;~Juntao_Li2;~Wenjie_Li1;~Sujian_Li1;~Xinjie_Zhou1;~Yang_Yang60;~Jun_Zhang29;~lei_Geng1", "aff": "Suzhou University;Soochow University, China;Soochow University, China;The Hong Kong Polytechnic University, The Hong Kong Polytechnic University;Peking University;;PharmCube;Changping Laboratory;Soochow University", "aff_domain": "suda.edu.cn;suda.edu.cn;suda.edu.cn;comp.polyu.edu.hk;pku.edu.cn;;pharmcube.com;cpl.ac.cn;suda.edu.cn", "position": "MS student;Associate Professor;Associate Professor;Full Professor;Associate Professor;;Researcher;Principal Researcher;MS student", "bibtex": "@inproceedings{\ngeng2023kbioxlm,\ntitle={{KB}io{XLM}: A Knowledge-anchored Biomedical Multilingual Pretrained Language Model},\nauthor={Lei Geng and Xu Yan and Ziqiang Cao and Juntao Li and Wenjie Li and Sujian Li and Xinjie Zhou and Yang Yang and Jun Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=JC7uPaMwpW}\n}", "github": "", "project": "", "reviewers": "eLNp;yywL;VPvR", "site": "https://openreview.net/forum?id=JC7uPaMwpW", "pdf_size": 0, "rating": "5;5;5", "confidence": "2;4;3", "excitement": "4;3;4", "reproducibility": "2;4;3", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-1077-9033;0000-0002-6286-7529;0000-0002-7360-8864;;;;0000-0002-8760-6747;", "linkedin": ";;;;;;;;", "aff_unique_index": "0;1;1;2;3;4;5;1", "aff_unique_norm": "Suzhou University;Soochow University;Hong Kong Polytechnic University;Peking University;PharmCube;Changping Laboratory", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.suda.edu.cn;https://www.soochow.edu.cn;https://www.polyu.edu.hk;http://www.pku.edu.cn;;", "aff_unique_abbr": "Suda;Soochow U;PolyU;Peking U;;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China;" }, { "id": "JHd4FSJSC5", "title": "Anchoring Fine-tuning of Sentence Transformer with Semantic Label Information for Efficient Truly Few-shot Classification", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Few-shot classification is a powerful technique,\nbut training requires substantial computing\npower and data. We propose an efficient\nmethod with small model sizes and less training\ndata with only 2-8 training instances per class.\nOur proposed method, AncSetFit, targets low data\nscenarios by anchoring the task and label\ninformation through sentence embeddings in\nfine-tuning a Sentence Transformer model. It\nuses contrastive learning and a triplet loss to enforce\ntraining instances of a class to be closest\nto its own textual semantic label information\nin the embedding space - and thereby learning\nto embed different class instances more distinct.\nAncSetFit obtains strong performance\nin data-sparse scenarios compared to existing\nmethods across SST-5, Emotion detection, and\nAG News data, even with just two examples\nper class.", "keywords": "few-shot;sentence transformer;classification;efficiency", "primary_area": "", "supplementary_material": "", "author": "Amalie Brogaard Pauli;Leon Derczynski;Ira Assent", "authorids": "~Amalie_Brogaard_Pauli1;~Leon_Derczynski1;~Ira_Assent1", "gender": ";M;F", "homepage": ";https://www.derczynski.com/itu/;https://cs.au.dk/contact/people/show/person/ira@cs.au.dk", "dblp": ";66/8157;a/IraAssent", "google_scholar": ";https://scholar.google.dk/citations?user=d8iwqa8AAAAJ;https://scholar.google.com.tw/citations?user=w2n5LhUAAAAJ", "or_profile": "~Amalie_Brogaard_Pauli1;~Leon_Derczynski1;~Ira_Assent1", "aff": ";University of Washington;Aarhus University", "aff_domain": ";uw.edu;au.dk", "position": ";Visiting Professor;Full Professor", "bibtex": "@inproceedings{\npauli2023anchoring,\ntitle={Anchoring Fine-tuning of Sentence Transformer with Semantic Label Information for Efficient Truly Few-shot Classification},\nauthor={Amalie Brogaard Pauli and Leon Derczynski and Ira Assent},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=JHd4FSJSC5}\n}", "github": "", "project": "", "reviewers": "Dzxn;qm4T;3p6y", "site": "https://openreview.net/forum?id=JHd4FSJSC5", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;4", "excitement": "2;3;3", "reproducibility": "5;4;4", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-8656-3431;0000-0002-1091-9948", "linkedin": ";leon-derczynski/;ira-assent-954b2431/", "aff_unique_index": "0;1", "aff_unique_norm": "University of Washington;Aarhus University", "aff_unique_dep": ";", "aff_unique_url": "https://www.washington.edu;https://au.dk", "aff_unique_abbr": "UW;AU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Denmark" }, { "id": "JI5lhPHVbK", "title": "Battle of the Large Language Models: Dolly vs LLaMA vs Vicuna vs Guanaco vs Bard vs ChatGPT - A Text-to-SQL Parsing Comparison", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The success of ChatGPT has ignited an AI race, with researchers striving to develop new large language models (LLMs) that can match or surpass the language understanding and generation abilities of commercial ones. In recent times, a number of models have emerged, claiming performance near that of GPT-3.5 or GPT-4 through various instruction-tuning methods. As practitioners of Text-to-SQL parsing, we are grateful for their valuable contributions to open-source research. However, it is important to approach these claims with a sense of scrutiny and ascertain the actual effectiveness of these models. Therefore, we pit six popular large language models against each other, systematically evaluating their Text-to-SQL parsing capability on nine benchmark datasets with five different prompting strategies, covering both zero-shot and few-shot scenarios. Regrettably, the open-sourced models fell significantly short of the performance achieved by closed-source models like GPT-3.5, highlighting the need for further work to bridge the performance gap between these models.", "keywords": "Text-to-SQL;large language model", "primary_area": "", "supplementary_material": "", "author": "Shuo Sun;Yuchen Zhang;Jiahuan Yan;Yuze GAO;Donovan Ong;Bin Chen;Jian Su", "authorids": "~Shuo_Sun1;~Yuchen_Zhang9;~Jiahuan_Yan2;~Yuze_GAO1;~Donovan_Ong1;~Bin_Chen13;~Jian_Su1", "gender": "M;F;M;M;;M;", "homepage": ";https://www.linkedin.com/in/yuchen-zhang-nus/;https://github.com/YanJiaHuan;https://gyyz.github.io/;;;", "dblp": "04/4493;;;;;;", "google_scholar": ";;https://scholar.google.com.hk/citations?user=_qGKRv8AAAAJ;ube3oF0AAAAJ;;lel6kMAAAAAJ;", "or_profile": "~Shuo_Sun1;~Yuchen_Zhang9;~Jiahuan_Yan2;~Yuze_GAO1;~Donovan_Ong1;~Bin_Chen13;~Jian_Su1", "aff": ", A*STAR;A*STAR;National University of Singapore;A*STAR;;Institute for Infocomm Research, A*STAR;", "aff_domain": "i2r.a-star.edu.sg;a-star.edu.sg;u.nus.edu;a-star.edu.sg;;i2r.a-star.edu.sg;", "position": "Researcher;Researcher;MS student;Researcher;;Researcher;", "bibtex": "@inproceedings{\nsun2023battle,\ntitle={Battle of the Large Language Models: Dolly vs {LL}a{MA} vs Vicuna vs Guanaco vs Bard vs Chat{GPT} - A Text-to-{SQL} Parsing Comparison},\nauthor={Shuo Sun and Yuchen Zhang and Jiahuan Yan and Yuze GAO and Donovan Ong and Bin Chen and Jian Su},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=JI5lhPHVbK}\n}", "github": "", "project": "", "reviewers": "AYeh;9kjy;A359;tPZP", "site": "https://openreview.net/forum?id=JI5lhPHVbK", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;4;5", "excitement": "2;4;2;2", "reproducibility": "3;5;3;3", "correctness": "2;4;2;3", "rating_avg": 3.0, "confidence_avg": 4.25, "excitement_avg": 2.5, "reproducibility_avg": 3.5, "correctness_avg": 2.75, "replies_avg": 13, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0009-0002-1411-7915;", "linkedin": ";;jiahuan-yan-416757185/;;;bin-chen-297b0519/;", "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "A*STAR;Agency for Science, Technology and Research;National University of Singapore;Institute for Infocomm Research", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.a-star.edu.sg;https://www.a-star.edu.sg;https://www.nus.edu.sg;https://www.i2r.a-star.edu.sg", "aff_unique_abbr": "A*STAR;A*STAR;NUS;I2R", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Singapore" }, { "id": "JIrP8CIvx6", "title": "Improving Sequential Model Editing with Fact Retrieval", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The task of sequential model editing is to fix erroneous knowledge in Pre-trained Language Models (PLMs) efficiently, precisely and continuously. Although existing methods can deal with a small number of modifications, these methods experience a performance decline or require additional annotated data, when the number of edits increases. \n\nIn this paper, we propose a $\\textbf{R}$etrieval $\\textbf{A}$ugmented $\\textbf{S}$equential Model $\\textbf{E}$diting framework ($\\textbf{RASE}$) that leverages factual information to enhance editing generalization and to guide the identification of edits by retrieving related facts from the fact-patch memory we constructed.\nOur main findings are: \n(i) State-of-the-art models can hardly correct massive mistakes stably and efficiently;\n(ii) Even if we scale up to thousands of edits, RASE can significantly enhance editing generalization and maintain consistent performance and efficiency;\n(iii) RASE can edit large-scale PLMs and increase the performance of different editors.\n Moreover, it can integrate with ChatGPT and further improve performance. Our code and data are available at: https://github.com/sev777/RASE.", "keywords": "Model Editing; Sequential Model Editing; Pre-trained Language model", "primary_area": "", "supplementary_material": "", "author": "XiaoQi Han;Ru Li;Hongye Tan;Wang Yuanlong;Qinghua Chai;Jeff Z. Pan", "authorids": "~XiaoQi_Han1;~Ru_Li2;~Hongye_Tan2;~Wang_Yuanlong1;~Qinghua_Chai2;~Jeff_Z._Pan1", "gender": "M;F;F;M;M;M", "homepage": "https://scholar.google.com.sg/citations?hl=zh-CN&user=9K5bqZAAAAAJ;http://cs.sxu.edu.cn/faculty/professor/1448/index.htm;http://cs.sxu.edu.cn/faculty/professor/1469/index.htm;http://cs.sxu.edu.cn/faculty/associate_professor/4173/index.htm;http://wy.sxu.edu.cn/szdw/js/5ac6fecf387a45878396fe319e6921ef.htm;https://knowledge-representation.org/j.z.pan/", "dblp": "297/8914;90/3813-1;19/397;;;59/6490", "google_scholar": "https://scholar.google.com.sg/citations?hl=zh-CN;;;;;https://scholar.google.co.uk/citations?hl=en", "or_profile": "~XiaoQi_Han1;~Ru_Li2;~Hongye_Tan2;~Wang_Yuanlong1;~Qinghua_Chai2;~Jeff_Z._Pan1", "aff": "Shanxi University;Shanxi University;Shanxi University;University of ShanXi;Shanxi University;University of Edinburgh, University of Edinburgh", "aff_domain": "sxu.edu.cn;sxu.edu.cn;sxu.edu.cn;umass.edu;sxu.edu.cn;ed.ac.uk", "position": "PhD student;Full Professor;Full Professor;Associate Professor;Lecturer;Full Professor", "bibtex": "@inproceedings{\nhan2023improving,\ntitle={Improving Sequential Model Editing with Fact Retrieval},\nauthor={XiaoQi Han and Ru Li and Hongye Tan and Wang Yuanlong and Qinghua Chai and Jeff Z. Pan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=JIrP8CIvx6}\n}", "github": "", "project": "", "reviewers": "Gyuf;Eqru;z2e8", "site": "https://openreview.net/forum?id=JIrP8CIvx6", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "3;3;5", "reproducibility": "4;4;5", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-8827-8474;0000-0003-1545-5553;0000-0002-5858-899X;;;0000-0002-9779-2088", "linkedin": ";;;;;", "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Shanxi University;University of Edinburgh", "aff_unique_dep": ";", "aff_unique_url": "http://www.sxu.edu.cn;https://www.ed.ac.uk", "aff_unique_abbr": "SXU;Edinburgh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "China;United Kingdom" }, { "id": "JKmsjKJ0Q8", "title": "Long-Horizon Dialogue Understanding for Role Identification in the Game of Avalon with Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Deception and persuasion play a critical role in long-horizon dialogues between multiple parties, especially when the interests, goals, and motivations of the participants are not aligned. Such complex tasks pose challenges for current Large Language Models (LLM) as deception and persuasion can easily mislead them, especially in long-horizon multi-party dialogues. To this end, we explore the game of Avalon: The Resistance, a social deduction game in which players must determine each other's hidden identities to complete their team\u2019s objective. We introduce an online testbed and a dataset containing 20 carefully collected and labeled games among human players that exhibit long-horizon deception in a cooperative-competitive setting. We discuss the capabilities of LLMs to utilize deceptive long-horizon conversations between six human players to determine each player's goal and motivation. Particularly, we discuss the multimodal integration of the chat between the players and the game's state that grounds the conversation, providing further insights into the true player identities. We find that even current state-of-the-art LLMs do not reach human performance, making our dataset a compelling benchmark to investigate the decision-making and language-processing capabilities of LLMs. Our dataset and online testbed can be found at our project website: https://sstepput.github.io/Avalon-NLU/", "keywords": "Dialogue Understanding;Multimodal Reasoning;Long-Horizon Games", "primary_area": "", "supplementary_material": "", "author": "Simon Stepputtis;Joseph Campbell;Yaqi Xie;Zhengyang Qi;Wenxin Sharon Zhang;Ruiyi Wang;Sanketh Rangreji;Charles Michael Lewis;Katia P. Sycara", "authorids": "~Simon_Stepputtis1;~Joseph_Campbell1;~Yaqi_Xie1;~Zhengyang_Qi1;~Wenxin_Sharon_Zhang1;~Ruiyi_Wang1;~Sanketh_Rangreji1;~Charles_Michael_Lewis1;~Katia_P._Sycara1", "gender": ";;F;M;F;F;M;M;F", "homepage": "https://simonstepputtis.com/;;https://yaqi-xie.me/;;;https://ruiyiw.github.io/;;http://www.pitt.edu/~cmlewis;", "dblp": "192/7092;179/2732;237/8691;;;;;;s/KatiaPSycara", "google_scholar": "WUQgzsAAAAAJ;1NmM6OUAAAAJ;lBCCo0EAAAAJ;4WAn1gwAAAAJ;;;;BBS25qkAAAAJ;VWv6a9kAAAAJ", "or_profile": "~Simon_Stepputtis1;~Joseph_Campbell1;~Yaqi_Xie1;~Zhengyang_Qi1;~Wenxin_Sharon_Zhang1;~Ruiyi_Wang1;~Sanketh_Rangreji1;~Charles_Michael_Lewis1;~Katia_P._Sycara1", "aff": "Carnegie Mellon University;Carnegie Mellon University;National University of Singapore;School of Computer Science, Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;University of Pittsburgh;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;nus.edu;cs.cmu.edu;andrew.cmu.edu;cmu.edu;andrew.cmu.edu;pitt.edu;cmu.edu", "position": "Postdoc;Postdoc;PhD student;MS student;MS student;MS student;MS student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nstepputtis2023longhorizon,\ntitle={Long-Horizon Dialogue Understanding for Role Identification in the Game of Avalon with Large Language Models},\nauthor={Simon Stepputtis and Joseph Campbell and Yaqi Xie and Zhengyang Qi and Wenxin Sharon Zhang and Ruiyi Wang and Sanketh Rangreji and Charles Michael Lewis and Katia P. Sycara},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=JKmsjKJ0Q8}\n}", "github": "", "project": "", "reviewers": "f9MG;n5L7;YFpd", "site": "https://openreview.net/forum?id=JKmsjKJ0Q8", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;5;5", "excitement": "3;3;3", "reproducibility": "4;4;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 4.666666666666667, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0003-0519-3454;;0009-0005-0458-9419;;;;;0000-0002-1013-9482;", "linkedin": "simon-stepputtis/;;yaqi-xie/;zhengyang-jason-qi/;sharon-zhang-48690504/;ruiyi-wang-a43a32183/;sanketh-rangreji/;;", "aff_unique_index": "0;0;1;0;0;0;0;2;0", "aff_unique_norm": "Carnegie Mellon University;National University of Singapore;University of Pittsburgh", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cmu.edu;https://www.nus.edu.sg;https://www.pitt.edu", "aff_unique_abbr": "CMU;NUS;Pitt", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;1;0;0;0;0;0;0", "aff_country_unique": "United States;Singapore" }, { "id": "JMSkoIYFSn", "title": "Improving Span Representation by Efficient Span-Level Attention", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "High-quality span representations are crucial to natural language processing tasks involving span prediction and classification. Most existing methods derive a span representation by aggregation of token representations within the span. In contrast, we aim to improve span representations by considering span-span interactions as well as more comprehensive span-token interactions. Specifically, we introduce layers of span-level attention on top of a normal token-level transformer encoder. Given that attention between all span pairs results in $O(n^4)$ complexity ($n$ being the sentence length) and not all span interactions are intuitively meaningful, we restrict the range of spans that a given span could attend to, thereby reducing overall complexity to $O(n^3)$. We conduct experiments on various span-related tasks and show superior performance of our model surpassing baseline models. Our code is publicly available at \\url{https://github.com/jipy0222/Span-Level-Attention}.", "keywords": "representation learning;efficient methods", "primary_area": "", "supplementary_material": "", "author": "Pengyu Ji;Songlin Yang;Kewei Tu", "authorids": "~Pengyu_Ji1;~Songlin_Yang1;~Kewei_Tu1", "gender": "M;F;M", "homepage": "https://jipy0222.github.io;https://sustcsonglin.github.io;https://faculty.sist.shanghaitech.edu.cn/faculty/tukw/", "dblp": ";;22/918", "google_scholar": "XScFHEMAAAAJ;1chlis0AAAAJ;5gi3Pm0AAAAJ", "or_profile": "~Pengyu_Ji1;~Songlin_Yang1;~Kewei_Tu1", "aff": "ShanghaiTech University;ShanghaiTech University;ShanghaiTech University", "aff_domain": "shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn", "position": "Undergrad student;MS student;Associate Professor", "bibtex": "@inproceedings{\nji2023improving,\ntitle={Improving Span Representation by Efficient Span-Level Attention},\nauthor={Pengyu Ji and Songlin Yang and Kewei Tu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=JMSkoIYFSn}\n}", "github": "", "project": "", "reviewers": "Qvm2;T2H8;iZpj", "site": "https://openreview.net/forum?id=JMSkoIYFSn", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;5;5", "excitement": "2;3;3", "reproducibility": "3;3;5", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "ShanghaiTech University", "aff_unique_dep": "", "aff_unique_url": "https://www.shanghaitech.edu.cn", "aff_unique_abbr": "ShanghaiTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "JMbJeMTFos", "title": "Improving word mover's distance by leveraging self-attention matrix", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Measuring the semantic similarity between two sentences is still an important task. The word mover's distance (WMD) computes the similarity via the optimal alignment between the sets of word embeddings. However, WMD does not utilize word order, making it challenging to distinguish sentences with significant overlaps of similar words, even if they are semantically very different. Here, we attempt to improve WMD by incorporating the sentence structure represented by BERT's self-attention matrix (SAM). The proposed method is based on the Fused Gromov-Wasserstein distance, which simultaneously considers the similarity of the word embedding and the SAM for calculating the optimal transport between two sentences. Experiments demonstrate the proposed method enhances WMD and its variants in paraphrase identification with near-equivalent performance in semantic textual similarity.", "keywords": "word embeddings;word mover's distance;optimal transport;Gromov-Wasserstein distance;Fused Gromov-Wasserstein distance;Self-Attention;paraphrase identification;semantic textual similarity", "primary_area": "", "supplementary_material": "", "author": "Hiroaki Yamagiwa;Sho Yokoi;Hidetoshi Shimodaira", "authorids": "~Hiroaki_Yamagiwa1;~Sho_Yokoi1;~Hidetoshi_Shimodaira1", "gender": "M;;M", "homepage": "https://ymgw55.github.io/;http://www.cl.ecei.tohoku.ac.jp/~yokoi/;http://stat.sys.i.kyoto-u.ac.jp/", "dblp": "333/0809;184/8316;19/3393", "google_scholar": "k5m5X-EAAAAJ;https://scholar.google.co.jp/citations?user=EW2QPKoAAAAJ;LvoIaIsAAAAJ", "or_profile": "~Hiroaki_Yamagiwa1;~Sho_Yokoi1;~Hidetoshi_Shimodaira1", "aff": "Kyoto University, Kyoto University;Tohoku University;RIKEN", "aff_domain": "i.kyoto-u.ac.jp;tohoku.ac.jp;riken.jp", "position": "PhD student;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nyamagiwa2023improving,\ntitle={Improving word mover's distance by leveraging self-attention matrix},\nauthor={Hiroaki Yamagiwa and Sho Yokoi and Hidetoshi Shimodaira},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=JMbJeMTFos}\n}", "github": "", "project": "", "reviewers": "RMgW;ycfD;YoLW", "site": "https://openreview.net/forum?id=JMbJeMTFos", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;4;3", "excitement": "3;4;3", "reproducibility": "4;4;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0002-4437-5245;0000-0002-3371-7724", "linkedin": ";shoyokoi/;shimo/", "aff_unique_index": "0;1;2", "aff_unique_norm": "Kyoto University;Tohoku University;RIKEN", "aff_unique_dep": ";;", "aff_unique_url": "https://www.kyoto-u.ac.jp;https://www.tohoku.ac.jp;https://www.riken.jp", "aff_unique_abbr": "Kyoto U;Tohoku U;RIKEN", "aff_campus_unique_index": "0", "aff_campus_unique": "Kyoto;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "id": "JNd6XPdaXj", "title": "Polyglot or Not? Measuring Multilingual Encyclopedic Knowledge in Foundation Models", "track": "main", "status": "Short Main", "tldr": "", "abstract": "In this work, we assess the ability of foundation models to recall encyclopedic knowledge across a wide range of linguistic contexts. To support this, we: 1) produce a 20-language dataset that contains 303k factual associations paired with counterfactuals, 2) evaluate 5 models in a multilingual test, and 3) benchmark a diverse set of 24 models in an English-only test. Meta's LLaMA achieves the highest scores in both multilingual and English-only evaluations. Yet, an analysis of LLaMA's errors reveals significant limitations in its ability to recall facts in languages other than English, plus difficulties related to the location and gender of fact subjects. Overall, our findings suggest that today's foundation models are far from polyglots.", "keywords": "NLP;machine learning;LLMs;language modeling;multilingual;datasets;benchmarks", "primary_area": "", "supplementary_material": "", "author": "Tim Schott;Daniel Ryan Furman;Shreshta Bhat", "authorids": "~Tim_Schott1;~Daniel_Ryan_Furman1;~Shreshta_Bhat1", "gender": "M;M;", "homepage": "https://timschott.com;https://daniel-furman.github.io/;", "dblp": ";;", "google_scholar": "DUBNB7YAAAAJ;phb-R4cAAAAJ;", "or_profile": "~Tim_Schott1;~Daniel_Ryan_Furman1;~Shreshta_Bhat1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu", "position": "MS student;MS student;MS student", "bibtex": "@inproceedings{\nschott2023polyglot,\ntitle={Polyglot or Not? Measuring Multilingual Encyclopedic Knowledge in Foundation Models},\nauthor={Tim Schott and Daniel Ryan Furman and Shreshta Bhat},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=JNd6XPdaXj}\n}", "github": "", "project": "", "reviewers": "vbS2;CaEc;JE4L", "site": "https://openreview.net/forum?id=JNd6XPdaXj", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;4", "excitement": "4;3;4", "reproducibility": "5;4;5", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.666666666666667, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";daniel-ryan-furman/;bhat-shreshta", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "JPUx2nVgWa", "title": "How Many Demonstrations Do You Need for In-context Learning?", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models (LLMs) are capable to perform complex reasoning by in-context learning (ICL) when provided with a few input-output demonstrations (demos) and more powerful when intermediate reasoning steps (chain of thoughts (CoT)) of the demos are given. Is it necessary to use multi-demo in ICL? In this paper, we study ICL using fewer demos for each test query on the tasks in~\\cite{wei2022chain}. Surprisingly, we do not observe significant degradation when using only one randomly chosen demo. To study this phenomenon, for each test query, we categorize demos into ''positive demos'' leading to the correct answer, and ``negative demos'' resulting in wrong answers. Our analysis reveals an inherent bias in those widely studied datasets and the redundancy of demos: most demos are positive for a majority of test queries, which explains the good performance of ICL with one random demo. Moreover, ICL (with and w/o CoT) using only one positive demo significantly outperforms multi-demo ICL adopted by most previous works, indicating the weakness of LLMs in finding positive demo(s) for input queries, which is difficult to evaluate on the biased datasets. Furthermore, we observe a counterintuitive behavior of ICL using multi-demo, i.e., its accuracy degrades(improves) when given more positive(negative) demos. This implies that ICL can be easily misguided by interference among demos and their spurious correlations. Our analyses highlight several fundamental challenges that need to be addressed in LLMs training, ICL, and benchmark design.", "keywords": "In-context learning: Large language model", "primary_area": "", "supplementary_material": "", "author": "Jiuhai Chen;Lichang Chen;Chen Zhu;Tianyi Zhou", "authorids": "~Jiuhai_Chen1;~Lichang_Chen2;~Chen_Zhu2;~Tianyi_Zhou1", "gender": "M;M;M;M", "homepage": "https://www.linkedin.com/in/jiuhai-chen-6a486715a/;http://www.cs.umd.edu/~chenzhu/;https://tianyizhou.github.io/;", "dblp": ";59/10522-1.html;88/8205-1;151/6212", "google_scholar": ";m-om5O8AAAAJ;OKvgizMAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Jiuhai_Chen1;~Chen_Zhu2;~Tianyi_Zhou1;~LICHANG_CHEN1", "aff": "University of Maryland, College Park;Google;University of Maryland, College Park;Department of Computer Science, University of Maryland, College Park", "aff_domain": "umd.edu;google.com;umd.edu;cs.umd.edu", "position": "PhD student;Research Scientist;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nchen2023how,\ntitle={How Many Demonstrations Do You Need for In-context Learning?},\nauthor={Jiuhai Chen and Lichang Chen and Chen Zhu and Tianyi Zhou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=JPUx2nVgWa}\n}", "github": "", "project": "", "reviewers": "k4LP;byCC;XyZd", "site": "https://openreview.net/forum?id=JPUx2nVgWa", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "3;3;4", "reproducibility": "4;3;4", "correctness": "2;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-5348-0632;", "linkedin": ";;tianyizhou;lichang-chen-b7a506173/", "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of Maryland;Google;University of Maryland, College Park", "aff_unique_dep": ";Google;Department of Computer Science", "aff_unique_url": "https://www/umd.edu;https://www.google.com;https://www/umd.edu", "aff_unique_abbr": "UMD;Google;UMD", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "College Park;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "JRHhpw77q3", "title": "Steering Large Language Models for Machine Translation with Finetuning and In-Context Learning", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Large language models (LLMs) are a promising avenue for machine translation (MT). However, current LLM-based MT systems are brittle: their effectiveness highly depends on the choice of few-shot examples and they often require extra post-processing due to overgeneration. Alternatives such as finetuning on translation instructions are computationally expensive and may weaken in-context learning capabilities, due to overspecialization. In this paper, we provide a closer look at this problem. We start by showing that adapter-based finetuning with LoRA matches the performance of traditional finetuning while reducing the number of training parameters by a factor of 50. This method also outperforms few-shot prompting and eliminates the need for post-processing or in-context examples. However, we show that finetuning generally degrades few-shot performance, hindering adaptation capabilities. Finally, to obtain the best of both worlds, we propose a simple approach that incorporates few-shot examples during finetuning. Experiments on 10 language pairs show that our proposed approach recovers the original few-shot capabilities while keeping the added benefits of finetuning.", "keywords": "Machine Translation; In-Context Learning; Efficient Finetuning", "primary_area": "", "supplementary_material": "", "author": "Duarte Miguel Alves;Nuno M Guerreiro;Jo\u00e3o Alves;Jos\u00e9 Pombal;Ricardo Rei;Jos\u00e9 G. C. de Souza;Pierre Colombo;Andre Martins", "authorids": "~Duarte_Miguel_Alves1;~Nuno_M_Guerreiro1;~Jo\u00e3o_Alves2;~Jos\u00e9_Pombal1;~Ricardo_Rei1;~Jos\u00e9_G._C._de_Souza1;~Pierre_Colombo2;~Andre_Martins1", "gender": "M;;M;;M;M;M;M", "homepage": ";https://nunonmg.github.io/;;;https://pierrecolombo.github.io/;https://andre-martins.github.io/;;", "dblp": "329/4709;267/0265;;;;m/AndreFTMartins;66/1087;72/3176", "google_scholar": ";268m9FgAAAAJ;;;yPoMt8gAAAAJ;https://scholar.google.pt/citations?user=mT7ppvwAAAAJ;20ApDosAAAAJ;jf4S4tsAAAAJ", "or_profile": "~Duarte_Miguel_Alves1;~Nuno_M_Guerreiro1;~Jo\u00e3o_Alves2;~Jos\u00e9_Pombal1;~Pierre_Colombo2;~Andre_Martins1;~Jos\u00e9_Guilherme_Camargo_de_Souza2;~Ricardo_Costa_Dias_Rei1", "aff": "Instituto Superior T\u00e9cnico;Instituto Superior T\u00e9cnico;Unbabel;Feedzai;CentraleSupelec;Unbabel;Unbabel;INESC-ID", "aff_domain": "tecnico.ulisboa.pt;tecnico.ulisboa.pt;unbabel.com;feedzai.com;centralesupelec.fr;unbabel.com;unbabel.com;inesc-id.pt", "position": "PhD student;PhD student;Researcher;Intern;Assistant Professor;Research Scientist;Researcher;Researcher", "bibtex": "@inproceedings{\nalves2023steering,\ntitle={Steering Large Language Models for Machine Translation with Finetuning and In-Context Learning},\nauthor={Duarte Miguel Alves and Nuno M Guerreiro and Jo{\\~a}o Alves and Jos{\\'e} Pombal and Ricardo Rei and Jos{\\'e} G. C. de Souza and Pierre Colombo and Andre Martins},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=JRHhpw77q3}\n}", "github": "", "project": "", "reviewers": "Rre2;G2rv;mF6X;3izU;ceRx", "site": "https://openreview.net/forum?id=JRHhpw77q3", "pdf_size": 0, "rating": "3;3;3;3;3", "confidence": "4;5;3;3;3", "excitement": "4;3;3;2;4", "reproducibility": "3;4;3;3;4", "correctness": "4;3;3;3;4", "rating_avg": 3.0, "confidence_avg": 3.6, "excitement_avg": 3.2, "reproducibility_avg": 3.4, "correctness_avg": 3.4, "replies_avg": 16, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0007-2109-9555;;;;;;0000-0001-6344-7633;0000-0001-8265-1939", "linkedin": "duarte-alves;;jmc-alves/;jos%C3%A9-maria-prc-pombal/;;;josesouza/;ricardo-rei-159154172/", "aff_unique_index": "0;0;1;2;3;1;1;4", "aff_unique_norm": "Instituto Superior T\u00e9cnico;Unbabel;Feedzai;CentraleSup\u00e9lec;INESC-ID", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.ist.utl.pt;https://www.unbabel.com;https://www.feedzai.com;https://www.centralesupelec.fr;https://www.inesc-id.pt", "aff_unique_abbr": "IST;;Feedzai;CS;INESC-ID", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;0;0", "aff_country_unique": "Portugal;France" }, { "id": "JW3UKn4bmG", "title": "Is ChatGPT a Good Causal Reasoner? A Comprehensive Evaluation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Causal reasoning ability is crucial for numerous NLP applications. Despite the impressive emerging ability of ChatGPT in various NLP tasks, it is unclear how well ChatGPT performs in causal reasoning.\nIn this paper, we conduct the first comprehensive evaluation of the ChatGPT's causal reasoning capabilities.\nExperiments show that ChatGPT is not a good causal reasoner, but a good causal interpreter. Besides, ChatGPT has a serious hallucination on causal reasoning, possibly due to the reporting biases between causal and non-causal relationships in natural language, as well as ChatGPT's upgrading processes, such as RLHF. The In-Context Learning (ICL) and Chain-of-Thought (COT) techniques can further exacerbate such causal hallucination. Additionally, the causal reasoning ability of ChatGPT is sensitive to the words used to express the causal concept in prompts, and close-ended prompts perform better than open-ended prompts. For events in sentences, ChatGPT excels at capturing explicit causality rather than implicit causality, and performs better in sentences with lower event density and smaller lexical distance between events.", "keywords": "Causal Reasoning;Large Language Models;Performance Evaluation", "primary_area": "", "supplementary_material": "", "author": "Jinglong Gao;Xiao Ding;Bing Qin;Ting Liu", "authorids": "~Jinglong_Gao1;~Xiao_Ding1;~Bing_Qin2;~Ting_Liu2", "gender": "M;M;;M", "homepage": "https://arrogantl.github.io;http://ir.hit.edu.cn/~xding/index_english.htm;http://ir.hit.edu.cn/~qinb;", "dblp": "200/1083;;86/5934.html;52/5150-1", "google_scholar": "jaVvkHIAAAAJ;Mi9XXuAAAAAJ;LKnCub0AAAAJ;zyMJ1V0AAAAJ", "or_profile": "~Jinglong_Gao1;~Xiao_Ding1;~Bing_Qin2;~Ting_Liu2", "aff": "Research Center for Social Computing and Information Retrieval;Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology", "aff_domain": "ir.hit.edu.cn;hit.edu.cn;hit.edu.cn;hit.edu.cn", "position": "MS student;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\ngao2023is,\ntitle={Is Chat{GPT} a Good Causal Reasoner? A Comprehensive Evaluation},\nauthor={Jinglong Gao and Xiao Ding and Bing Qin and Ting Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=JW3UKn4bmG}\n}", "github": "", "project": "", "reviewers": "vWs8;SCUQ;MYMZ", "site": "https://openreview.net/forum?id=JW3UKn4bmG", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;5", "excitement": "3;3;3", "reproducibility": "3;4;4", "correctness": "3;4;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-5838-0320;0000-0002-2543-5604;", "linkedin": ";;;", "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Research Center for Social Computing and Information Retrieval;Harbin Institute of Technology", "aff_unique_dep": "Research Center;", "aff_unique_url": ";http://www.hit.edu.cn/", "aff_unique_abbr": ";HIT", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "JWMIm1EyaE", "title": "Explaining with Contrastive Phrasal Highlighting: A Case Study in Assisting Humans to Detect Translation Differences", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Explainable NLP techniques primarily explain by answering \"Which tokens in the input are responsible for this prediction?\". We argue that for NLP models that make predictions by comparing two input texts, it is more useful to explain by answering \"What differences between the two inputs explain this prediction?\". We introduce a technique to generate contrastive phrasal highlights that explain the predictions of a semantic divergence model via phrase alignment guided erasure. We show that the resulting highlights match human rationales of cross-lingual semantic differences better than popular post-hoc saliency techniques and that they successfully help people detect fine-grained meaning differences in human translations and critical machine translation errors.", "keywords": "explainability;human-centered evaluation;machine translation evaluation;cross-lingual semantics;contrastive highlights", "primary_area": "", "supplementary_material": "", "author": "Eleftheria Briakou;Navita Goyal;Marine Carpuat", "authorids": "~Eleftheria_Briakou1;~Navita_Goyal1;~Marine_Carpuat1", "gender": "F;F;F", "homepage": "https://elbria.github.io;https://navitagoyal.github.io/;http://www.cs.umd.edu/~marine/", "dblp": "217/4858;277/1584;71/1827", "google_scholar": "bxqqNFEAAAAJ;YrvZ2E0AAAAJ;iPAX6jcAAAAJ", "or_profile": "~Eleftheria_Briakou1;~Navita_Goyal1;~Marine_Carpuat1", "aff": "Department of Computer Science, University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "cs.umd.edu;umd.edu;umd.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nbriakou2023explaining,\ntitle={Explaining with Contrastive Phrasal Highlighting: A Case Study in Assisting Humans to Detect Translation Differences},\nauthor={Eleftheria Briakou and Navita Goyal and Marine Carpuat},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=JWMIm1EyaE}\n}", "github": "", "project": "", "reviewers": "WUTA;w8E1;uUif", "site": "https://openreview.net/forum?id=JWMIm1EyaE", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;2", "excitement": "4;4;4", "reproducibility": "3;3;2", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Maryland, College Park;University of Maryland", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www/umd.edu;https://www/umd.edu", "aff_unique_abbr": "UMD;UMD", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "College Park", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "JaP8ZnOxmi", "title": "Mitigating Framing Bias with Polarity Minimization Loss", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Framing bias plays a significant role in exacerbating political polarization by distorting the perception of actual events.\nMedia outlets with divergent political stances often use polarized language in their reporting of the same event.\nWe propose a new loss function that encourages the model to minimize the polarity difference between the polarized input articles to reduce framing bias. Specifically, our loss is designed to jointly optimize the model to map polarity ends bidirectionally.\nOur experimental results demonstrate that incorporating the proposed polarity minimization loss leads to a substantial reduction in framing bias when compared to a BART-based multi-document summarization model. Notably, we find that the effectiveness of this approach is most pronounced when the model is trained to minimize the polarity loss associated with informational framing bias (i.e., skewed selection of information to report).", "keywords": "framing bias", "primary_area": "", "supplementary_material": "", "author": "Yejin Bang;Nayeon Lee;Pascale Fung", "authorids": "~Yejin_Bang1;~Nayeon_Lee1;~Pascale_Fung1", "gender": ";F;F", "homepage": ";;http://pascale.home.ece.ust.hk/", "dblp": "261/2805;;29/4187", "google_scholar": "https://scholar.google.com.hk/citations?user=s2bVuXEAAAAJ;https://scholar.google.com.hk/citations?user=HN6Y7z0AAAAJ;", "or_profile": "~Yejin_Bang1;~Nayeon_Lee1;~Pascale_Fung1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;HKUST", "aff_domain": "ust.hk;ust.hk;ece.ust.hk", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nbang2023mitigating,\ntitle={Mitigating Framing Bias with Polarity Minimization Loss},\nauthor={Yejin Bang and Nayeon Lee and Pascale Fung},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=JaP8ZnOxmi}\n}", "github": "", "project": "", "reviewers": "rWMe;xt1k;X96S", "site": "https://openreview.net/forum?id=JaP8ZnOxmi", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;4;4", "excitement": "3;3;3", "reproducibility": "5;4;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "Jc0sVyM0JP", "title": "Semantic Parsing by Large Language Models for Intricate Updating Strategies of Zero-Shot Dialogue State Tracking", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Zero-shot Dialogue State Tracking (DST) addresses the challenge of acquiring and annotating task-oriented dialogues, which can be time-consuming and costly. \nHowever, DST extends beyond simple slot-filling and requires effective updating strategies for tracking dialogue state as conversations progress. \nIn this paper, we propose ParsingDST, a new In-Context Learning (ICL) method, to introduce additional intricate updating strategies in zero-shot DST. \nOur approach reformulates the DST task by leveraging powerful Large Language Models (LLMs) and translating the original dialogue text to JSON through semantic parsing as an intermediate state. \nWe also design a novel framework that includes more modules to ensure the effectiveness of updating strategies in the text-to-JSON process. \nExperimental results demonstrate that our approach outperforms existing zero-shot DST methods on MultiWOZ, exhibiting significant improvements in Joint Goal Accuracy (JGA) and slot accuracy compared to existing ICL methods.", "keywords": "Dialogue State Tracking;Zero-Shot;Large Language Models;In-Context Learning;Semantic Parsing", "primary_area": "", "supplementary_material": "", "author": "Yuxiang Wu;Guanting Dong;Weiran Xu", "authorids": "~Yuxiang_Wu3;~Guanting_Dong1;~Weiran_Xu1", "gender": ";M;M", "homepage": "https://www.zhihu.com/people/newera-86;https://dongguanting.github.io/;", "dblp": ";;41/5448", "google_scholar": ";amozZDkAAAAJ;https://scholar.google.com/citations?view_op=list_works", "or_profile": "~Yuxiang_Wu3;~Guanting_Dong1;~Weiran_Xu1", "aff": "Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Post and Telecommunication", "aff_domain": "bupt.edu.cn;bupt.edu.cn;bupt.edu.cn", "position": "MS student;MS student;Associate Professor", "bibtex": "@inproceedings{\nwu2023semantic,\ntitle={Semantic Parsing by Large Language Models for Intricate Updating Strategies of Zero-Shot Dialogue State Tracking},\nauthor={Yuxiang Wu and Guanting Dong and Weiran Xu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Jc0sVyM0JP}\n}", "github": "", "project": "", "reviewers": "Qxvg;Zu2U;9Gsy", "site": "https://openreview.net/forum?id=Jc0sVyM0JP", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;4;4", "excitement": "3;3;3", "reproducibility": "3;4;3", "correctness": "2;3;4", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-9416-7666", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications", "aff_unique_dep": "", "aff_unique_url": "http://www.bupt.edu.cn/", "aff_unique_abbr": "BUPT", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Beijing", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "JhC3lwWDhZ", "title": "Treepiece: Faster Semantic Parsing via Tree Tokenization", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "\\emph{Autoregressive} (AR) encoder-decoder neural networks have proved successful in many NLP problems, including \\emph{Semantic Parsing} -- a task that translates natural language to machine-readable \\emph{parse trees}. However, the sequential prediction process of AR models can be slow. To accelerate AR for semantic parsing, we introduce a new technique called \\emph{TreePiece} that tokenizes a parse tree into subtrees and generates one subtree per decoding step. On TOPv2 benchmark, TreePiece shows $4.6$ times faster decoding speed than standard AR, and comparable speed but significantly higher accuracy compared to \\emph{Non-Autoregressive} (NAR).", "keywords": "semantic parsing;decoding;tokenization algorithm;parse tree", "primary_area": "", "supplementary_material": "", "author": "Sid Wang;Akshat Shrivastava;Aleksandr Livshits", "authorids": "~Sid_Wang1;~Akshat_Shrivastava1;~Aleksandr_Livshits1", "gender": "M;M;M", "homepage": ";https://akshatsh.github.io/;https://www.linkedin.com/in/alivshits", "dblp": "340/6784;;", "google_scholar": "https://scholar.google.com/citations?hl=en;ecQt6m4AAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Sid_Wang1;~Akshat_Shrivastava1;~Aleksandr_Livshits1", "aff": "Meta platforms inc;Meta Facebook;Meta", "aff_domain": "meta.com;facebook.com;meta.com", "position": "Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nwang2023treepiece,\ntitle={Treepiece: Faster Semantic Parsing via Tree Tokenization},\nauthor={Sid Wang and Akshat Shrivastava and Aleksandr Livshits},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=JhC3lwWDhZ}\n}", "github": "", "project": "", "reviewers": "JKxP;7MJZ;5cvo", "site": "https://openreview.net/forum?id=JhC3lwWDhZ", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;2", "excitement": "3;2;4", "reproducibility": "4;3;3", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";akshatsh/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Meta", "aff_unique_dep": "Meta Platforms, Inc.", "aff_unique_url": "https://www.meta.com", "aff_unique_abbr": "Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "JhzzvJnL9t", "title": "Out-of-Domain Intent Detection Considering Multi-turn Dialogue Contexts", "track": "main", "status": "Reject", "tldr": "", "abstract": "Out-of-Domain (OOD) intent detection is vital for practical dialogue systems, and it usually requires considering multi-turn dialogue contexts.\nHowever, most previous OOD intent detection approaches are limited to single dialogue turns.\nIn this paper, we introduce a context-aware OOD intent detection (Caro) framework to model multi-turn contexts in OOD intent detection tasks.\nSpecifically, we follow the information bottleneck principle to extract robust representations from multi-turn dialogue contexts.\nTwo different views are constructed for each input sample and the superfluous information not related to intent detection is removed using a multi-view information bottleneck loss.\nMoreover, we also explore utilizing unlabeled data in Caro.\nA two-stage training process is introduced to mine OOD samples from these unlabeled data,\nand these OOD samples are used to train the resulting model with a bootstrapping approach.\nComprehensive experiments demonstrate that Caro establishes state-of-the-art performances on multi-turn OOD detection tasks by improving the F1-OOD score of over 29% compared to the previous best method.", "keywords": "OOD Detection;Multi-turn Dialogue Contexts", "primary_area": "", "supplementary_material": "", "author": "Hao Lang;Yinhe Zheng;Binyuan Hui;Fei Huang;Yongbin Li", "authorids": "~Hao_Lang1;~Yinhe_Zheng1;~Binyuan_Hui1;~Fei_Huang1;~Yongbin_Li2", "gender": "M;;F;;M", "homepage": "https://github.com/langhaobeijing;;https://huybery.github.io/;;https://yongbin-li.github.io/", "dblp": "71/6934.html;;246/4699;;", "google_scholar": "0UGQL9QAAAAJ;;RBb3ItMAAAAJ;;xF5VrokAAAAJ", "or_profile": "~Hao_Lang1;~Yinhe_Zheng1;~Binyuan_Hui1;~Fei_Huang1;~Yongbin_Li2", "aff": "Tongyi Lab, Alibaba Group;;Alibaba Group;;Alibaba Group", "aff_domain": "alibaba-inc.com;;alibaba-inc.com;;alibaba-inc.com", "position": "Researcher;;Researcher;;Researcher", "bibtex": "@misc{\nlang2023outofdomain,\ntitle={Out-of-Domain Intent Detection Considering Multi-turn Dialogue Contexts},\nauthor={Hao Lang and Yinhe Zheng and Binyuan Hui and Fei Huang and Yongbin Li},\nyear={2023},\nurl={https://openreview.net/forum?id=JhzzvJnL9t}\n}", "github": "", "project": "", "reviewers": "H5LW;dLzu;j6vP", "site": "https://openreview.net/forum?id=JhzzvJnL9t", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;2", "excitement": "3;3;3", "reproducibility": "2;4;4", "correctness": "2;3;3", "rating_avg": 3.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Alibaba Group", "aff_unique_dep": "Tongyi Lab", "aff_unique_url": "https://www.alibaba.com", "aff_unique_abbr": "Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "JiUTJJrkL4", "title": "clembench: Using Game Play to Evaluate Chat-Optimized Language Models as Conversational Agents", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recent work has proposed a methodology for the systematic evaluation of \"Situated Language Understanding Agents\" --- agents that operate in rich linguistic and non-linguistic contexts --- through testing them in carefully constructed interactive settings. Other recent work has argued that Large Language Models (LLMs), if suitably set up, can be understood as (simulators of) such agents. A connection suggests itself, which this paper explores: Can LLMs be evaluated meaningfully by exposing them to constrained game-like settings that are built to challenge specific capabilities? As a proof of concept, this paper investigates five interaction settings, showing that current chat-optimised LLMs are, to an extent, capable of following game-play instructions. Both this capability and the quality of the game play, measured by how well the objectives of the different games are met, follows the development cycle, with newer models generally performing better. The metrics even for the comparatively simple example games are far from being saturated, suggesting that the proposed instrument will remain to have diagnostic value.", "keywords": "large language models; evaluation; dialogue; dialogue games; interaction", "primary_area": "", "supplementary_material": "", "author": "Kranti CH;Jana G\u00f6tze;Sherzod Hakimov;Brielen Madureira;Philipp Sadler;David Schlangen", "authorids": "~Kranti_CH1;~Jana_G\u00f6tze1;~Sherzod_Hakimov1;~Brielen_Madureira1;~Philipp_Sadler1;~David_Schlangen1", "gender": ";;;F;M;M", "homepage": ";;https://sherzod-hakimov.github.io/;https://briemadu.github.io/;;http://www.ling.uni-potsdam.de/~das", "dblp": ";49/11429;117/6023;270/7933;228/8385;11/1189", "google_scholar": ";;7cm4SVgAAAAJ;;;https://scholar.google.com.tw/citations?user=QoDgwZYAAAAJ", "or_profile": "~Kranti_CH1;~Jana_G\u00f6tze1;~Sherzod_Hakimov1;~Brielen_Madureira1;~Philipp_Sadler1;~David_Schlangen1", "aff": ";Universit\u00e4t Potsdam;Universit\u00e4t Potsdam;Universidade de S\u00e3o Paulo;University of Potsdam;University of Potsdam", "aff_domain": ";uni-potsdam.de;uni-potsdam.de;usp.br;uni-potsdam.de;uni-potsdam.de", "position": ";Postdoc;Postdoc;Undergrad student;PhD student;Full Professor", "bibtex": "@inproceedings{\nch2023clembench,\ntitle={clembench: Using Game Play to Evaluate Chat-Optimized Language Models as Conversational Agents},\nauthor={Kranti CH and Jana G{\\\"o}tze and Sherzod Hakimov and Brielen Madureira and Philipp Sadler and David Schlangen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=JiUTJJrkL4}\n}", "github": "", "project": "", "reviewers": "LsBC;6HrB;dwAD", "site": "https://openreview.net/forum?id=JiUTJJrkL4", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;2;4", "excitement": "3;4;4", "reproducibility": "3;5;5", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 9, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-7829-5561;0000-0002-7421-6213;;;0000-0002-2686-6887", "linkedin": ";;;;;", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of Potsdam;Universidade de S\u00e3o Paulo", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-potsdam.de;https://www.usp.br", "aff_unique_abbr": "UP;USP", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Germany;Brazil" }, { "id": "Jk6LA0NGOU", "title": "Explicit Planning Helps Language Models in Logical Reasoning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Language models have been shown to perform remarkably well on a wide range of natural language processing tasks. In this paper, we propose LEAP, a novel system that uses language models to perform multi-step logical reasoning and incorporates explicit planning into the inference procedure. Explicit planning enables the system to make more informed reasoning decisions at each step by looking ahead into their future effects. Moreover, we propose a training strategy that safeguards the planning process from being led astray by spurious features. Our full system significantly outperforms other competing methods on multiple standard datasets. When using small T5 models as its core selection and deduction components, our system performs competitively compared to GPT-3 despite having only about 1B parameters (i.e., 175 times smaller than GPT-3). When using GPT-3.5, it significantly outperforms chain-of-thought prompting on the challenging PrOntoQA dataset. We have conducted extensive empirical studies to demonstrate that explicit planning plays a crucial role in the system's performance.", "keywords": "logical reasoning;large language model;planning", "primary_area": "", "supplementary_material": "", "author": "Hongyu Zhao;Kangrui Wang;Mo Yu;Hongyuan Mei", "authorids": "~Hongyu_Zhao4;~Kangrui_Wang2;~Mo_Yu1;~Hongyuan_Mei1", "gender": "M;M;M;M", "homepage": "https://hzhao.ttic.edu/;https://jameskrw.github.io/;http://researcher.ibm.com/researcher/view.php?person=us-yum;http://www.cs.jhu.edu/~hmei/", "dblp": ";216/9159;32/7445.html;164/5576", "google_scholar": ";;vC8DssQAAAAJ;g_zaiVIAAAAJ", "or_profile": "~Hongyu_Zhao4;~Kangrui_Wang2;~Mo_Yu1;~Hongyuan_Mei1", "aff": "University of Chicago;;WeChat AI, Tencent;Toyota Technological Institute at Chicago", "aff_domain": "uchicago.edu;;tencent.com;ttic.edu", "position": "MS student;;Principal Researcher;Research Assistant Professor", "bibtex": "@inproceedings{\nzhao2023explicit,\ntitle={Explicit Planning Helps Language Models in Logical Reasoning},\nauthor={Hongyu Zhao and Kangrui Wang and Mo Yu and Hongyuan Mei},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Jk6LA0NGOU}\n}", "github": "", "project": "", "reviewers": "661C;Mxjp;u2em", "site": "https://openreview.net/forum?id=Jk6LA0NGOU", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;4", "excitement": "4;4;4", "reproducibility": "4;3;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";wang-kangrui-8b9a37257/;;hongyuan-mei-57687858?trk=nav_responsive_tab_profile_pic", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Chicago;Tencent;Toyota Technological Institute at Chicago", "aff_unique_dep": ";WeChat AI;", "aff_unique_url": "https://www.uchicago.edu;https://www.tencent.com;https://www.tti-chicago.org", "aff_unique_abbr": "UChicago;Tencent;TTI Chicago", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chicago", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;China" }, { "id": "JnJsaXfVte", "title": "Architectural Sweet Spots for Modeling Human Label Variation by the Example of Argument Quality: It\u2019s Best to Relate Perspectives!", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Many annotation tasks in natural language processing are highly subjective in that there can be different valid and justified perspectives on what is a proper label for a given example.\nThis also applies to the judgment of argument quality, where the assignment of a single ground truth is often questionable.\nAt the same time, there are generally accepted concepts behind argumentation that form a common ground.\nTo best represent the interplay of individual and shared perspectives, we consider a continuum of approaches ranging from models that fully aggregate perspectives into a majority label to ``share nothing''-architectures in which each annotator is considered in isolation from all other annotators. \nIn between these extremes, inspired by models used in the field of recommender systems, we investigate the extent to which architectures that predict labels for single annotators but include layers that model the relations between different annotators are beneficial.\nBy means of two tasks of argument quality classification (argument concreteness and validity/novelty of conclusions), we show that recommender architectures increase the averaged annotator-individual F1-scores up to 43% over a majority-label model.\nOur findings indicate that approaches to subjectivity can benefit from relating individual perspectives.", "keywords": "argument quality;perspectivism;inter-annotator-disagreement;LLM;recommender;argument mining", "primary_area": "", "supplementary_material": "", "author": "Philipp Heinisch;Matthias Orlikowski;Julia Romberg;Philipp Cimiano", "authorids": "~Philipp_Heinisch1;~Matthias_Orlikowski1;~Julia_Romberg1;~Philipp_Cimiano1", "gender": "M;M;F;M", "homepage": "https://philippheinisch.de/;https://orlikow.ski;https://juliaromberg.github.io/;https://ekvv.uni-bielefeld.de/pers_publ/publ/PersonDetail.jsp?personId=15020699", "dblp": "294/1423;226/2136;180/3140;12/1983", "google_scholar": "https://scholar.google.de/citations?user=ooL1D6gAAAAJ;https://scholar.google.de/citations?user=hjdzJq8AAAAJ;T-EZRH8AAAAJ;https://scholar.google.com.tw/citations?user=ZyR3798AAAAJ", "or_profile": "~Philipp_Heinisch1;~Matthias_Orlikowski1;~Julia_Romberg1;~Philipp_Cimiano1", "aff": "Universit\u00e4t Bielefeld;Universit\u00e4t Bielefeld;Heinrich Heine University D\u00fcsseldorf ;Bielefeld University", "aff_domain": "uni-bielefeld.de;uni-bielefeld.de;hhu.de;uni-bielefeld.de", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nheinisch2023architectural,\ntitle={Architectural Sweet Spots for Modeling Human Label Variation by the Example of Argument Quality: It{\\textquoteright}s Best to Relate Perspectives!},\nauthor={Philipp Heinisch and Matthias Orlikowski and Julia Romberg and Philipp Cimiano},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=JnJsaXfVte}\n}", "github": "", "project": "", "reviewers": "kkvu;XbzC;QBW2", "site": "https://openreview.net/forum?id=JnJsaXfVte", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;5;3", "excitement": "3;4;3", "reproducibility": "4;4;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0002-8079-5570;;0000-0003-0033-9963;0000-0002-4771-441X", "linkedin": ";matthias-orlikowski-1952b133a/;;", "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Universit\u00e4t Bielefeld;Heinrich Heine University;Bielefeld University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-bielefeld.de/;https://www.hhu.de;https://www.uni-bielefeld.de/", "aff_unique_abbr": "Uni Bielefeld;HHU;Uni Bielefeld", "aff_campus_unique_index": "1", "aff_campus_unique": ";D\u00fcsseldorf", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "id": "Jo9P7hrDdy", "title": "SpEL: Structured Prediction for Entity Linking", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Entity linking is a prominent thread of research focused on structured data creation by linking spans of text to an ontology or knowledge source. We revisit the use of structured prediction for entity linking which classifies each individual input token as an entity, and aggregates the token predictions. Our system, called SpEL (Structured prediction for Entity Linking) is a state-of-the-art entity linking system that uses some new ideas to apply structured prediction to the task of entity linking including: two refined fine-tuning steps; a context sensitive prediction aggregation strategy; reduction of the size of the model's output vocabulary, and; we address a common problem in entity-linking systems where there is a training vs. inference tokenization mismatch. Our experiments show that we can outperform the state-of-the-art on the commonly used AIDA benchmark dataset for entity linking to Wikipedia. Our method is also very compute efficient in terms of number of parameters and speed of inference.", "keywords": "Structured Prediction;Entity Linking;AIDA dataset;AIDA/testc;SpEL", "primary_area": "", "supplementary_material": "", "author": "Hassan Shavarani;Anoop Sarkar", "authorids": "~Hassan_Shavarani1;~Anoop_Sarkar1", "gender": "M;M", "homepage": "https://shavarani.github.io/;http://anoopsarkar.github.io/", "dblp": "222/9910;s/AnoopSarkar", "google_scholar": "axBhCW0AAAAJ;https://scholar.google.ca/citations?user=KhJJchQAAAAJ", "or_profile": "~Hassan_Shavarani1;~Anoop_Sarkar1", "aff": "Simon Fraser University;Simon Fraser University", "aff_domain": "sfu.ca;sfu.ca", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nshavarani2023spel,\ntitle={Sp{EL}: Structured Prediction for Entity Linking},\nauthor={Hassan Shavarani and Anoop Sarkar},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Jo9P7hrDdy}\n}", "github": "", "project": "", "reviewers": "FVrb;8QvA;8Rzo", "site": "https://openreview.net/forum?id=Jo9P7hrDdy", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;5;3", "excitement": "4;5;3", "reproducibility": "5;5;3", "correctness": "4;5;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0009-8992-2864;0000-0002-4795-9361", "linkedin": "shavarani/?originalSubdomain=ca;", "aff_unique_index": "0;0", "aff_unique_norm": "Simon Fraser University", "aff_unique_dep": "", "aff_unique_url": "https://www.sfu.ca", "aff_unique_abbr": "SFU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "id": "JotVdrvFtJ", "title": "Collaborative Generative AI: Integrating GPT-k for Efficient Editing in Text-to-Image Generation", "track": "main", "status": "Short Main", "tldr": "", "abstract": "The field of text-to-image (T2I) generation has garnered significant attention both within the research community and among everyday users. Despite the advancements of T2I models, a common issue encountered by users is the need for repetitive editing of input prompts in order to receive a satisfactory image, which is time-consuming and labor-intensive. Given the demonstrated text generation power of large-scale language models, such as GPT-k, we investigate the potential of utilizing such models to improve the prompt editing process for T2I generation. We conduct a series of experiments to compare the common edits made by humans and GPT-k, evaluate the performance of GPT-k in prompting T2I, and examine factors that may influence this process. We found that GPT-k models focus more on inserting modifiers while humans tend to replace words and phrases, which includes changes to the subject matter. Experimental results show that GPT-k are more effective in adjusting modifiers rather than predicting spontaneous changes in the primary subject matters. Adopting the edit suggested by GPT-k models may reduce the percentage of remaining edits by 20-30%.", "keywords": "Text-to-image;LLM;Efficiency", "primary_area": "", "supplementary_material": "", "author": "Wanrong Zhu;Xinyi Wang;Yujie Lu;Tsu-Jui Fu;Xin Eric Wang;Miguel Eckstein;William Yang Wang", "authorids": "~Wanrong_Zhu1;~Xinyi_Wang2;~Yujie_Lu1;~Tsu-Jui_Fu2;~Xin_Eric_Wang2;~Miguel_Eckstein1;~William_Yang_Wang2", "gender": ";F;;M;M;M;", "homepage": ";https://wangxinyilinda.github.io/;https://yujielu10.github.io/;https://tsujuifu.github.io;https://eric-xw.github.io;https://psych.ucsb.edu/people/faculty/miguel-eckstein;", "dblp": ";;;218/5366.html;10/5630-61;56/975;", "google_scholar": ";3vvbplcAAAAJ;pcmr6GMAAAAJ;https://scholar.google.com.tw/citations?user=7QRDcC0AAAAJ;YjqluE0AAAAJ;G5dQztgAAAAJ;", "or_profile": "~Wanrong_Zhu1;~Xinyi_Wang2;~Yujie_Lu1;~Tsu-Jui_Fu2;~Xin_Eric_Wang2;~Miguel_Eckstein1;~William_Yang_Wang2", "aff": ";Microsoft;UC Santa Barbara;UC Santa Barbara;University of California, Santa Cruz;;", "aff_domain": ";microsoft.com;ucsb.edu;ucsb.edu;ucsc.edu;;", "position": ";Intern;PhD student;PhD student;Assistant Professor;;", "bibtex": "@inproceedings{\nzhu2023collaborative,\ntitle={Collaborative Generative {AI}: Integrating {GPT}-k for Efficient Editing in Text-to-Image Generation},\nauthor={Wanrong Zhu and Xinyi Wang and Yujie Lu and Tsu-Jui Fu and Xin Eric Wang and Miguel Eckstein and William Yang Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=JotVdrvFtJ}\n}", "github": "", "project": "", "reviewers": "zSe4;bKnd;AHM3", "site": "https://openreview.net/forum?id=JotVdrvFtJ", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;4;4", "reproducibility": "4;4;3", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0003-2605-5504;;", "linkedin": ";xinyi-wang-444385133/;;tsujuifu1996;;;", "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Microsoft;University of California, Santa Barbara;University of California, Santa Cruz", "aff_unique_dep": "Microsoft Corporation;;", "aff_unique_url": "https://www.microsoft.com;https://www.ucsb.edu;https://www.ucsc.edu", "aff_unique_abbr": "Microsoft;UCSB;UCSC", "aff_campus_unique_index": "1;1;2", "aff_campus_unique": ";Santa Barbara;Santa Cruz", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "JrlSX4nHTv", "title": "Natural Response Generation for Chinese Reading Comprehension", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Machine reading comprehension (MRC) is an important area of conversation agents and draws a lot of attention. However, there is a notable limitation to current MRC benchmarks: The labeled answers are mostly either spans extracted from the target corpus or the choices of the given candidates, ignoring the natural aspect of high-quality responses. As a result,\nMRC models trained on these datasets can not generate human-like responses in real QA scenarios.\nTo this end, we construct a new dataset called \\textbf{Penguin} to promote the research of MRC, providing a training and test bed for natural response generation to real scenarios.\nConcretely, Penguin consists of 200k training data with high-quality fluent, and well-informed responses. Penguin is the first benchmark towards natural response generation in Chinese MRC on a relatively large scale. To address the challenges in Penguin, we develop two strong baselines: end-to-end and two-stage frameworks. Following that, we further design \\textit{Prompt-BART}: fine-tuning the pre-trained generative language models with a mixture of prefix prompts in Penguin. Extensive experiments validated the effectiveness of this design.", "keywords": "machine reading comprehension;dataset", "primary_area": "", "supplementary_material": "", "author": "Nuo Chen;Hongguang Li;Yinan Bao;Baoyuan Wang;Jia Li", "authorids": "~Nuo_Chen1;~Hongguang_Li2;~Yinan_Bao1;~Baoyuan_Wang3;~Jia_Li4", "gender": "M;;;M;M", "homepage": "https://jerrynchen.github.io/;;;;https://sites.google.com/view/lijia", "dblp": "135/5622-1;;;41/8869;23/6950-9", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;;https://scholar.google.co.jp/citations?user=OWa5rOEAAAAJ;1gSbcYoAAAAJ", "or_profile": "~Nuo_Chen1;~Hongguang_Li2;~Yinan_Bao1;~Baoyuan_Wang3;~Jia_Li4", "aff": "Hong Kong University of Science and Technology;;;Xiaobing.ai;Hong Kong University of Science and Technology (Guangzhou)", "aff_domain": "hkust.edu;;;xiaobing.ai;ust.hk", "position": "PhD student;;;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nchen2023natural,\ntitle={Natural Response Generation for Chinese Reading Comprehension},\nauthor={Nuo Chen and Hongguang Li and Yinan Bao and Baoyuan Wang and Jia Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=JrlSX4nHTv}\n}", "github": "", "project": "", "reviewers": "7YEK;qhwR;DxeM", "site": "https://openreview.net/forum?id=JrlSX4nHTv", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;4;5", "excitement": "2;2;4", "reproducibility": "2;2;5", "correctness": "2;2;4", "rating_avg": 2.0, "confidence_avg": 4.333333333333333, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-6362-4385", "linkedin": ";;;;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Xiaobing.AI", "aff_unique_dep": ";", "aff_unique_url": "https://www.ust.hk;https://www.xiaobing.ai", "aff_unique_abbr": "HKUST;Xiaobing.ai", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "Js80TDwMfY", "title": "Argument-based Detection and Classification of Fallacies in Political Debates", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Fallacies are arguments that employ faulty reasoning. Given their persuasive and seemingly valid nature, fallacious arguments are often used in political debates. Employing these misleading arguments in politics can have detrimental consequences for society, since they can lead to inaccurate conclusions and invalid inferences from the public opinion and the policymakers. \nAutomatically detecting and classifying fallacious arguments represents therefore a crucial challenge to limit the spread of misleading or manipulative claims and promote a more informed and healthier political discourse. \nOur contribution to address this challenging task is twofold. \nFirst, we extend the ElecDeb60To16 dataset of U.S. presidential debates annotated with fallacious arguments, by incorporating the most recent Trump-Biden presidential debate. We include updated token-level annotations, incorporating argumentative components (i.e., claims and premises), the relations between these components (i.e., support and attack), and six categories of fallacious arguments (i.e., Ad Hominem, Appeal to Authority, Appeal to Emotion, False Cause, Slippery Slope, and Slogans). \nSecond, we perform the twofold task of fallacious argument detection and classification by defining neural network architectures based on Transformers models, combining text, argumentative features, and engineered features. \nOur results show the advantages of complementing transformer-generated text representations with non-text features.", "keywords": "Fallacy Detection;NLP;Token Classification;Political Debates;Machine Learning;Transformers;Argumantation", "primary_area": "", "supplementary_material": "", "author": "Pierpaolo Goffredo;Mariana Chaves Espinoza;Serena Villata;Elena Cabrio", "authorids": "~Pierpaolo_Goffredo1;~Mariana_Chaves_Espinoza1;~Serena_Villata1;~Elena_Cabrio1", "gender": "M;F;F;F", "homepage": ";https://m-chaves.github.io/;http://www.i3s.unice.fr/~villata/;https://www-sop.inria.fr/members/Elena.Cabrio/", "dblp": "325/0952.html;;84/5009;35/7561", "google_scholar": "9jQ3S3AAAAAJ;;;hEP0YzwAAAAJ", "or_profile": "~Pierpaolo_Goffredo1;~Mariana_Chaves_Espinoza1;~Serena_Villata1;~Elena_Cabrio1", "aff": "Universit\u00e9 de Nice-Sophia Antipolis;INRIA;CNRS;Universit\u00e9 C\u00f4te d'Azur", "aff_domain": "unice.fr;inria.fr;cnrs.fr;univ-cotedazur.fr", "position": "PhD student;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\ngoffredo2023argumentbased,\ntitle={Argument-based Detection and Classification of Fallacies in Political Debates},\nauthor={Pierpaolo Goffredo and Mariana Chaves Espinoza and Serena Villata and Elena Cabrio},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Js80TDwMfY}\n}", "github": "", "project": "", "reviewers": "fe3c;YLEV;CQr9", "site": "https://openreview.net/forum?id=Js80TDwMfY", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "4;3;4", "reproducibility": "4;1;3", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-8685-694X;0009-0001-8086-9034;;0000-0001-7124-8300", "linkedin": "pierpaologoffredo/;mariana-chaves-615639183/;;", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Universit\u00e9 de Nice-Sophia Antipolis;INRIA;Centre National de la Recherche Scientifique;Universit\u00e9 C\u00f4te d'Azur", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.unice.fr;https://www.inria.fr;https://www.cnrs.fr;https://www.univ-cotedazur.fr", "aff_unique_abbr": "UNICA;INRIA;CNRS;UCA", "aff_campus_unique_index": "0", "aff_campus_unique": "Sophia Antipolis;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "id": "JwbEwhL3VP", "title": "Boosting Inference Efficiency: Unleashing the Power of Parameter-Shared Pre-trained Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Parameter-shared pre-trained language models (PLMs) have emerged as a successful approach in resource-constrained environments, enabling substantial reductions in model storage and memory costs without significant performance compromise. However, it is important to note that parameter sharing does not alleviate computational burdens associated with inference, thus impeding its practicality in situations characterized by limited stringent latency requirements or computational resources. Building upon neural ordinary differential equations (ODEs), we introduce a straightforward technique to enhance the inference efficiency of parameter-shared PLMs. Additionally, we propose a simple pre-training technique that leads to fully or partially shared models capable of achieving even greater inference acceleration. The experimental results demonstrate the effectiveness of our methods on both autoregressive and autoencoding PLMs, providing novel insights into more efficient utilization of parameter-shared models in resource-constrained settings.", "keywords": "pre-trained language model;parameter sharing;inference acceleration", "primary_area": "", "supplementary_material": "", "author": "Weize Chen;Xiaoyue Xu;Xu Han;Yankai Lin;Ruobing Xie;Zhiyuan Liu;Maosong Sun;Jie Zhou", "authorids": "~Weize_Chen1;~Xiaoyue_Xu1;~Xu_Han2;~Yankai_Lin1;~Ruobing_Xie2;~Zhiyuan_Liu1;~Maosong_Sun1;~Jie_Zhou8", "gender": "M;;;M;M;M;M;M", "homepage": "https://chenweize1998.github.io/;https://xiaoyue2002.github.io;;https://linyankai.github.io/;http://nlp.csai.tsinghua.edu.cn/~xrb/;http://nlp.csai.tsinghua.edu.cn/~lzy;https://www.cs.tsinghua.edu.cn/csen/info/1312/4394.htm;", "dblp": "245/7488;;;161/0001.html;178/8590;53/3245-1;95/3291-1;00/5012-16", "google_scholar": "0CoGHtIAAAAJ;;;https://scholar.google.com.hk/citations?user=j8K1FqEAAAAJ;j3OX8KUAAAAJ;dT0v5u0AAAAJ;https://scholar.google.com.tw/citations?user=zIgT0HMAAAAJ;https://scholar.google.com.hk/citations?user=OijxQCMAAAAJ", "or_profile": "~Weize_Chen1;~Xiaoyue_Xu1;~Xu_Han2;~Yankai_Lin1;~Ruobing_Xie2;~Zhiyuan_Liu1;~Maosong_Sun1;~Jie_Zhou8", "aff": "Tsinghua University;Tsinghua University;;Renmin University of China;Tencent;Tsinghua University;Tsinghua University;WeChat AI, Tencent Inc.", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;;ruc.edu.cn;tencent.com;tsinghua.edu.cn;tsinghua.edu.cn;tencent.com", "position": "PhD student;Undergrad student;;Assistant Professor;Senior researcher;Associate Professor;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nchen2023boosting,\ntitle={Boosting Inference Efficiency: Unleashing the Power of Parameter-Shared Pre-trained Language Models},\nauthor={Weize Chen and Xiaoyue Xu and Xu Han and Yankai Lin and Ruobing Xie and Zhiyuan Liu and Maosong Sun and Jie Zhou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=JwbEwhL3VP}\n}", "github": "", "project": "", "reviewers": "y9YL;T3zw;k26S", "site": "https://openreview.net/forum?id=JwbEwhL3VP", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;3", "excitement": "3;3;4", "reproducibility": "2;3;4", "correctness": "2;3;4", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-9182-8158;0000-0003-3170-5647;0000-0002-7709-2543;;0000-0002-5899-5165", "linkedin": ";;;;;;;", "aff_unique_index": "0;0;1;2;0;0;2", "aff_unique_norm": "Tsinghua University;Renmin University of China;Tencent", "aff_unique_dep": ";;Tencent Holdings Limited", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.ruc.edu.cn;https://www.tencent.com", "aff_unique_abbr": "THU;RUC;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "JyvycLG00G", "title": "EMO-KNOW: A Large Scale Dataset on Emotion-Cause", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Emotion-Cause analysis has attracted the attention of researchers in recent years. However, most existing datasets are limited in size and number of emotion categories. They often focus on extracting parts of the document that contain the emotion cause and fail to provide more abstractive, generalizable root cause.\nTo bridge this gap, we introduce a large-scale dataset of emotion causes, derived from 9.8 million cleaned tweets over 15 years.\nWe describe our curation process, which includes a comprehensive pipeline for data gathering, cleaning, labeling, and validation, ensuring the dataset's reliability and richness. We extract emotion labels and provide abstractive summarization of the events causing emotions. \nThe final dataset comprises over 700,000 tweets with corresponding emotion-cause pairs spanning 48 emotion classes, validated by human evaluators. \nThe novelty of our dataset stems from its broad spectrum of emotion classes and the abstractive emotion cause that facilitates the development of an emotion-cause knowledge graph for nuanced reasoning.\nOur dataset will enable the design of emotion-aware systems that account for the diverse emotional responses of different people for the same event.", "keywords": "Dataset;Emotion Analysis;Emotion-Cause;Large-scale", "primary_area": "", "supplementary_material": "", "author": "Mia Huong Nguyen;Yasith Samaradivakara;Prasanth Sasikumar;Chitralekha Gupta;Suranga Chandima Nanayakkara", "authorids": "~Mia_Huong_Nguyen1;~Yasith_Samaradivakara1;~Prasanth_Sasikumar1;~Chitralekha_Gupta2;~Suranga_Chandima_Nanayakkara1", "gender": "F;M;;F;M", "homepage": "https://comp.nus.edu.sg/~huongntm;;https://www.prasanthsasikumar.com/;https://chitralekha18.github.io/home/;https://suranga.info/", "dblp": ";;;116/5545;01/9308.html", "google_scholar": ";7E6HOxcAAAAJ;85RKyZMAAAAJ;NFi7pkcAAAAJ;G0JugenpCgwC", "or_profile": "~Mia_Huong_Nguyen1;~Yasith_Samaradivakara1;~Prasanth_Sasikumar1;~Chitralekha_Gupta2;~Suranga_Chandima_Nanayakkara1", "aff": "National University of Singapore;University of Colombo;University of Auckland;National University of Singapore;National University of Singapore", "aff_domain": "u.nus.edu;ucsc.ac.lk;auckland.ac.nz;nus.edu.sg;nus.edu.sg", "position": "PhD student;Undergrad student;PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nnguyen2023emoknow,\ntitle={{EMO}-{KNOW}: A Large Scale Dataset on Emotion-Cause},\nauthor={Mia Huong Nguyen and Yasith Samaradivakara and Prasanth Sasikumar and Chitralekha Gupta and Suranga Chandima Nanayakkara},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=JyvycLG00G}\n}", "github": "", "project": "", "reviewers": "NfoM;mpV3;XtNV", "site": "https://openreview.net/forum?id=JyvycLG00G", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;3;2", "reproducibility": "3;3;1", "correctness": "3;2;2", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 2.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0007-3256-8118;0009-0006-1142-5030;0000-0002-5844-9164;0000-0003-1350-9095;0000-0001-7441-5493", "linkedin": ";yasith-samaradivakara-a2b72919b/;prasanth-sasikumar/;chitralekha-gupta-04213546/;assoc-prof-suranga-nanayakkara-142b7527/", "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "National University of Singapore;University of Colombo;University of Auckland", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;https://www.colu.ac.lk;https://www.auckland.ac.nz", "aff_unique_abbr": "NUS;UC;UoA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;0", "aff_country_unique": "Singapore;Sri Lanka;New Zealand" }, { "id": "K1ih2El1IO", "title": "A Predictive Factor Analysis of Social Biases and Task-Performance in Pretrained Masked Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Various types of social biases have been reported with pretrained Masked Language Models (MLMs) in prior work. However, multiple underlying factors are associated with an MLM such as its model size, size of the training data, training objectives, the domain from which pretraining data is sampled, tokenization, and languages present in the pretrained corpora, to name a few. It remains unclear as to which of those factors influence social biases that are learned by MLMs. To study the relationship between model factors and the social biases learned by an MLM, as well as the downstream task performance of the model, we conduct a comprehensive study over 39 pretrained MLMs covering different model sizes, training objectives, tokenization methods, training data domains and languages. Our results shed light on important factors often neglected in prior literature, such as tokenization or model objectives.", "keywords": "mask language models;social bias;task performance;model size;model type;training corpora;tokenisation;language", "primary_area": "", "supplementary_material": "", "author": "Yi Zhou;Jose Camacho-Collados;Danushka Bollegala", "authorids": "~Yi_Zhou14;~Jose_Camacho-Collados1;~Danushka_Bollegala1", "gender": "F;M;M", "homepage": "https://aclanthology.org/people/y/yi-zhou/;http://www.josecamachocollados.com;https://danushka.net", "dblp": "01/1901-19;165/0790;https://dblp.uni-trier.de/pers/hd/b/Bollegala:Danushka", "google_scholar": "3BdddIMAAAAJ;NP4KdQQAAAAJ;https://scholar.google.co.uk/citations?user=kLqCYLMAAAAJ", "or_profile": "~Yi_Zhou14;~Jose_Camacho-Collados1;~Danushka_Bollegala1", "aff": "Cardiff University;Cardiff University;University of Liverpool", "aff_domain": "cardiff.ac.uk;cardiff.ac.uk;liverpool.ac.uk", "position": "Postdoc;Full Professor;Professor", "bibtex": "@inproceedings{\nzhou2023a,\ntitle={A Predictive Factor Analysis of Social Biases and Task-Performance in Pretrained Masked Language Models},\nauthor={Yi Zhou and Jose Camacho-Collados and Danushka Bollegala},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=K1ih2El1IO}\n}", "github": "", "project": "", "reviewers": "Swdc;QwCm;8eDx", "site": "https://openreview.net/forum?id=K1ih2El1IO", "pdf_size": 0, "rating": "2;2;2", "confidence": "5;3;2", "excitement": "4;4;3", "reproducibility": "4;2;3", "correctness": "4;3;3", "rating_avg": 2.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-7009-8515;;0000-0003-4476-7003", "linkedin": "yi-zhou-867578210/;;danushka-bollegala-6a636516/?originalSubdomain=uk", "aff_unique_index": "0;0;1", "aff_unique_norm": "Cardiff University;University of Liverpool", "aff_unique_dep": ";", "aff_unique_url": "https://www.cardiff.ac.uk;https://www.liverpool.ac.uk", "aff_unique_abbr": "Cardiff;Liv Uni", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "K2CrJIcFqg", "title": "Grounded and well-rounded: a methodological approach to the study of cross-modal and cross-lingual grounding", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Grounding has been argued to be a crucial component towards the development of more complete and truly semantically competent artificial intelligence systems. Literature has divided into two camps: While some argue that grounding allows for qualitatively different generalizations, others believe it can be compensated by mono-modal data quantity. Limited empirical evidence has emerged for or against either position, which we argue is due to the methodological challenges that come with studying grounding and its effects on NLP systems.\n\nIn this paper, we establish a methodological framework for studying what the effects are---if any---of providing models with richer input sources than text-only. The crux of it lies in the construction of comparable samples of populations of models trained on different input modalities, so that we can tease apart the qualitative effects of different input sources from quantifiable model performances. Experiments using this framework reveal qualitative differences in model behavior between cross-modally grounded, cross-lingually grounded, and ungrounded models, which we measure both at a global dataset level as well as for specific word representations, depending on how concrete their semantics is.", "keywords": "grounding;crosslingual grounding;multimodality", "primary_area": "", "supplementary_material": "", "author": "Timothee Mickus;Elaine Zosa;Denis Paperno", "authorids": "~Timothee_Mickus1;~Elaine_Zosa1;~Denis_Paperno1", "gender": "M;;", "homepage": "https://timotheemickus.github.io/;https://ezosa.github.io/;https://sites.google.com/site/denispaperno/", "dblp": "254/1236;256/8750;148/8948", "google_scholar": "https://scholar.google.com/citations?hl=en;WmG3doYAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Timothee_Mickus1;~Elaine_Zosa1;~Denis_Paperno1", "aff": "University of Helsinki;;CNRS", "aff_domain": "helsinki.fi;;cnrs.fr", "position": "Postdoc;;Researcher", "bibtex": "@inproceedings{\nmickus2023grounded,\ntitle={Grounded and well-rounded: a methodological approach to the study of cross-modal and cross-lingual grounding},\nauthor={Timothee Mickus and Elaine Zosa and Denis Paperno},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=K2CrJIcFqg}\n}", "github": "", "project": "", "reviewers": "yRLh;CJ8b;dJYX", "site": "https://openreview.net/forum?id=K2CrJIcFqg", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;2;1", "excitement": "3;3;3", "reproducibility": "4;3;4", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 1.6666666666666667, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-9538-7209;0000-0003-2482-0663;", "linkedin": "timothee-mickus-0844b113a/;;", "aff_unique_index": "0;1", "aff_unique_norm": "University of Helsinki;Centre National de la Recherche Scientifique", "aff_unique_dep": ";", "aff_unique_url": "https://www.helsinki.fi;https://www.cnrs.fr", "aff_unique_abbr": "UH;CNRS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Finland;France" }, { "id": "K35sqjeg5J", "title": "Semi-supervised multimodal coreference resolution in image narrations", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In this paper, we study multimodal coreference resolution, specifically where a longer descriptive text, i.e., a narration is paired with an image. This poses significant challenges due to fine-grained image-text alignment, inherent ambiguity present in narrative language, and unavailability of large annotated training sets. To tackle these challenges, we present a data efficient semi-supervised approach that utilizes image-narration pairs to resolve coreferences and narrative grounding in a multimodal context. Our approach incorporates losses for both labeled and unlabeled data within a cross-modal framework. Our evaluation shows that the proposed approach outperforms strong baselines both quantitatively and qualitatively, for the tasks of coreference resolution and narrative grounding.", "keywords": "Coreference Resolution;Vision Language Understanding;Narrative Grounding;Semi-Supervised Learning", "primary_area": "", "supplementary_material": "", "author": "Arushi Goel;Basura Fernando;Frank Keller;Hakan Bilen", "authorids": "~Arushi_Goel2;~Basura_Fernando1;~Frank_Keller1;~Hakan_Bilen1", "gender": "F;M;M;M", "homepage": "https://goelarushi.github.io/;https://basurafernando.github.io/;https://homepages.inf.ed.ac.uk/keller/;http://homepages.inf.ed.ac.uk/hbilen/", "dblp": ";01/9558;30/4872;97/2993", "google_scholar": "tj08PZcAAAAJ;https://scholar.google.com.au/citations?user=GyvseMkAAAAJ;https://scholar.google.co.uk/citations?user=-lbtnAgAAAAJ;PtBtfawAAAAJ", "or_profile": "~Arushi_Goel2;~Basura_Fernando1;~Frank_Keller1;~Hakan_Bilen1", "aff": "University of Edinburgh;A*STAR;University of Edinburgh;University of Edinburgh", "aff_domain": "ed.ac.uk;astar.edu.sg;ed.ac.uk;ed.ac.uk", "position": "PhD student;Principal Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\ngoel2023semisupervised,\ntitle={Semi-supervised multimodal coreference resolution in image narrations},\nauthor={Arushi Goel and Basura Fernando and Frank Keller and Hakan Bilen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=K35sqjeg5J}\n}", "github": "", "project": "", "reviewers": "B6pm;X5uF;xjtT", "site": "https://openreview.net/forum?id=K35sqjeg5J", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;3", "excitement": "4;4;4", "reproducibility": "4;5;2", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-6920-9916;0000-0002-8242-4362;0000-0002-6947-6918", "linkedin": ";;;", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Edinburgh;Agency for Science, Technology and Research", "aff_unique_dep": ";", "aff_unique_url": "https://www.ed.ac.uk;https://www.a-star.edu.sg", "aff_unique_abbr": "Edinburgh;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United Kingdom;Singapore" }, { "id": "K5DBkivtyO", "title": "Diffusion Language Model with Query-Document Relevance for Query-Focused Summarization", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Query-Focused Summarization (QFS) aims to generate summaries from source documents that can answer specific queries. \nAlthough the QFS task has gained increasing attention recently, its development is constrained by the fact that mainstream QFS models are BART variants, which are autoregressive and suffer from long-term dependencies and exposure bias. \nTo address these problems, we adopt a diffusion language model that performs well in non-autoregressive scenarios to effectively resolve issues related to autoregressive methods.\nHowever, QFS requires guidance from queries to generate adequate summaries, while diffusion language models have limited sensitivity to queries. \nIn this paper, we propose QFS-DLM, a non-autoregressive diffusion language model that incorporates query-document fragment relevance and query-document global relevance to enhance the adaptability of QFS tasks.\nFirstly, we extract key fragments from documents based on queries and assign higher weights to them, thereby emphasizing crucial and continuous information within the document.\nSecondly, we calculate global relevance scores between queries and documents, and then integrate these scores into the model's loss function, enabling the model to prefer high-quality data and distance itself from low-quality data.\nOverall, our method achieves state-of-the-art performance on Debatepedia and PubMedQA datasets in ROUGE scores, GPT-4, and human evaluations.", "keywords": "Query-Focused Summarization;Diffusion Language Model;Query-Document Relevance", "primary_area": "", "supplementary_material": "", "author": "Shaoyao Huang;Luozheng Qin;Ziqiang Cao", "authorids": "~Shaoyao_Huang2;~Luozheng_Qin1;~Ziqiang_Cao2", "gender": "M;M;M", "homepage": "https://github.com/yhyhhhhh;https://github.com/Fr0zenCrane;", "dblp": ";362/8707;148/4447", "google_scholar": "https://scholar.google.cz/citations?hl=zh-CN;;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Shaoyao_Huang2;~Luozheng_Qin1;~Ziqiang_Cao2", "aff": "Soochow University;Soochow University;Soochow University, China", "aff_domain": "suda.edu.cn;suda.edu.cn;suda.edu.cn", "position": "MS student;MS student;Associate Professor", "bibtex": "@inproceedings{\nhuang2023diffusion,\ntitle={Diffusion Language Model with Query-Document Relevance for Query-Focused Summarization},\nauthor={Shaoyao Huang and Luozheng Qin and Ziqiang Cao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=K5DBkivtyO}\n}", "github": "", "project": "", "reviewers": "SFJ4;ds5i;Feux", "site": "https://openreview.net/forum?id=K5DBkivtyO", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "3;4;3", "reproducibility": "5;4;2", "correctness": "4;4;2", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0002-3067-4316;0000-0002-1077-9033", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Soochow University", "aff_unique_dep": "", "aff_unique_url": "https://www.soochow.edu.cn", "aff_unique_abbr": "Soochow U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "K5o8oDa0Z0", "title": "Chain-of-Thought Reasoning in Tabular Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Tabular mathematical reasoning task requires models to perform multi-step operations including information look-up and numerical calculation, based on heterogeneous data from tables and questions. Existing solutions tend to extend chain-of-thought (CoT) reasoning into powerful large language models (LLMs) to promote multi-hop mathematical reasoning. However, such LLM-based approaches are not a viable solution in the scenario of privatization deployment or limited resources. To address this problem, we revisit small-scale tabular language models (TaLMs) and extend chain-of-thought reasoning into TaLMs for the first time. Specifically, we propose a novel framework, TaCo, which coordinates two TaLMs responsible for CoT generation and answer inference, respectively. Besides, our framework can be combined with an external calculator to enhance accurate numerical calculation. On the TABMWP dataset, TaCo outperforms the state-of-the-art ChatGPT by 9.55\\% (82.60\\%$\\rightarrow$92.15\\% in accuracy) with much less parameters (0.8B). The code will be released along with the paper.", "keywords": "Tabular mathematical reasoning;Chain-of-thought reasoning;Tabular language models", "primary_area": "", "supplementary_material": "", "author": "Mingyu Zheng;Hao Yang;Wenbin Jiang;Zheng Lin;Yajuan Lyu;Qiaoqiao She;Weiping Wang", "authorids": "~Mingyu_Zheng3;~Hao_Yang15;~Wenbin_Jiang3;~Zheng_Lin5;~Yajuan_Lyu1;~Qiaoqiao_She1;~Weiping_Wang4", "gender": "M;M;F;F;M;M;F", "homepage": "https://github.com/YoungHector;;https://scholar.google.com/citations?user=vLT3OXgAAAAJ&hl=en;;https://teacher.ucas.ac.cn/~0012246;https://github.com/SpursGoZmy;http://people.ucas.edu.cn/~linzheng", "dblp": "230/9970-4;96/5583-2;190/7920;209/9570;72/4134-5.html;01/8700;51/3740-1.html", "google_scholar": ";;vLT3OXgAAAAJ;;zH_wmdwAAAAJ;;", "or_profile": "~Hao_Yang15;~Wenbin_Jiang3;~Yajuan_Lyu1;~Qiaoqiao_She1;~Weiping_Wang4;~mingyu_Zheng1;~zheng_Lin4", "aff": ";Baidu;;;IIE;University of Chinese Academy of Sciences;Institute of Information Engineering, Chinese Academy of Sciences", "aff_domain": ";baidu.com;;;iie.ac.cn;ucas.edu.cn;iie.ac.cn", "position": ";Researcher;;;Full Professor;China;Full Professor", "bibtex": "@inproceedings{\nzheng2023chainofthought,\ntitle={Chain-of-Thought Reasoning in Tabular Language Models},\nauthor={Mingyu Zheng and Hao Yang and Wenbin Jiang and Zheng Lin and Yajuan Lyu and Qiaoqiao She and Weiping Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=K5o8oDa0Z0}\n}", "github": "", "project": "", "reviewers": "Z9oN;xSdW;UoLv", "site": "https://openreview.net/forum?id=K5o8oDa0Z0", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "3;4;3", "reproducibility": "4;5;3", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-8618-4992;0000-0002-6150-3790;0000-0002-8432-1658", "linkedin": ";;;;;;", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Baidu;Institute of Industrial Engineers;University of Chinese Academy of Sciences;Chinese Academy of Sciences", "aff_unique_dep": "Baidu, Inc.;;;Institute of Information Engineering", "aff_unique_url": "https://www.baidu.com;https://www.iie.org;http://www.ucas.ac.cn;http://www.cas.cn", "aff_unique_abbr": "Baidu;IIE;UCAS;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;United States" }, { "id": "K6KcA4ODql", "title": "Improving Bias Mitigation through Bias Experts in Natural Language Understanding", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Biases in the dataset often enable the model to achieve high performance on in-distribution data, while poorly performing on out-of-distribution data. To mitigate the detrimental effect of the bias on the networks, previous works have proposed debiasing methods that down-weight the biased examples identified by an auxiliary model, which is trained with explicit bias labels. However, finding a type of bias in datasets is a costly process. Therefore, recent studies have attempted to make the auxiliary model biased without the guidance (or annotation) of bias labels, by constraining the model's training environment or the capability of the model itself. Despite the promising debiasing results of recent works, the multi-class learning objective, which has been naively used to train the auxiliary model, may harm the bias mitigation effect due to its regularization effect and competitive nature across classes. As an alternative, we propose a new debiasing framework that introduces binary classifiers between the auxiliary model and the main model, coined bias experts. Specifically, each bias expert is trained on a binary classification task derived from the multi-class classification task via the One-vs-Rest approach. Experimental results demonstrate that our proposed strategy improves the bias identification ability of the auxiliary model. Consequently, our debiased model consistently outperforms the state-of-the-art on various challenge datasets.", "keywords": "Debiasing;natural language understanding;spurious correlation", "primary_area": "", "supplementary_material": "", "author": "Eojin Jeon;Mingyu Lee;Juhyeong Park;Yeachan Kim;Wing-Lam Mok;SangKeun Lee", "authorids": "~Eojin_Jeon1;~Mingyu_Lee1;~Juhyeong_Park1;~Yeachan_Kim3;~Wing-Lam_Mok1;~SangKeun_Lee1", "gender": "M;M;;M;;M", "homepage": "http://xai.korea.ac.kr/;https://sites.google.com/view/mingyulee92/;;https://sites.google.com/view/yeachan/;;http://dilab.korea.ac.kr", "dblp": "339/2527;;;224/6085;;73/3458-1", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;d7FBdkQAAAAJ;;zyOyBzwAAAAJ;;BGSUpLgAAAAJ", "or_profile": "~Eojin_Jeon1;~Mingyu_Lee1;~Juhyeong_Park1;~Yeachan_Kim3;~Wing-Lam_Mok1;~SangKeun_Lee1", "aff": "Korea University;Korea University;Korea University;Korea University;;Korea University", "aff_domain": "korea.ac.kr;korea.ac.kr;korea.ac.kr;korea.ac.kr;;korea.ac.kr", "position": "PhD student;PhD student;MS student;PhD student;;Full Professor", "bibtex": "@inproceedings{\njeon2023improving,\ntitle={Improving Bias Mitigation through Bias Experts in Natural Language Understanding},\nauthor={Eojin Jeon and Mingyu Lee and Juhyeong Park and Yeachan Kim and Wing-Lam Mok and SangKeun Lee},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=K6KcA4ODql}\n}", "github": "", "project": "", "reviewers": "R64f;Vam3;JToF", "site": "https://openreview.net/forum?id=K6KcA4ODql", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;2", "excitement": "3;3;4", "reproducibility": "3;4;5", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0002-6249-8217", "linkedin": ";mingyu-lee-329338197/;juhyeong-park-10a18526b;yeachan-kim-8719281aa/;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Korea University", "aff_unique_dep": "", "aff_unique_url": "https://www.korea.ac.kr", "aff_unique_abbr": "KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "K7p2SnqFoN", "title": "Toward Human Readable Prompt Tuning: Kubrick\u2019s The Shining is a good movie, and a good prompt too?", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models can perform downstream tasks in a zero-shot fashion, given natural language prompts that specify the desired behavior. Such prompts are typically hand engineered, but can also be learned with gradient-based methods from labeled data. However, it is underexplored what factors make the prompts effective, especially when the prompts are in natural language. In this paper, we investigate common attributes shared by effective prompts in classification problems. We first propose a human readable prompt tuning method (FluentPrompt) based on Langevin dynamics that incorporates a fluency constraint to find a distribution of effective and fluent prompts. Our analysis reveals that effective prompts are topically related to the task domain and calibrate the prior probability of output labels. Based on these findings, we also propose a method for generating prompts using only unlabeled data, outperforming strong baselines by an average of 7.0% accuracy across three tasks.", "keywords": "Prompt tuning;Analysis;Interpretability", "primary_area": "", "supplementary_material": "", "author": "Weijia Shi;Xiaochuang Han;Hila Gonen;Ari Holtzman;Yulia Tsvetkov;Luke Zettlemoyer", "authorids": "~Weijia_Shi1;~Xiaochuang_Han1;~Hila_Gonen1;~Ari_Holtzman1;~Yulia_Tsvetkov1;~Luke_Zettlemoyer1", "gender": ";M;;M;F;M", "homepage": "https://weijiashi.notion.site/;https://xhan77.github.io/;https://gonenhila.github.io/;http://ariholtzman.com;https://homes.cs.washington.edu/~yuliats/;https://www.cs.washington.edu/people/faculty/lsz/", "dblp": "132/80601;216/6755;167/5312;https://dblp.uni-trier.de/pers/hd/h/Holtzman:Ari;75/8157;21/6793", "google_scholar": "https://scholar.google.com/citations?hl=en;GamSVF0AAAAJ;URThmtMAAAAJ;https://scholar.google.com/citations?authuser=2;SEDPkrsAAAAJ;https://scholar.google.com.tw/citations?user=UjpbO6IAAAAJ", "or_profile": "~Weijia_Shi1;~Xiaochuang_Han1;~Hila_Gonen1;~Ari_Holtzman1;~Yulia_Tsvetkov1;~Luke_Zettlemoyer1", "aff": "University of Washington, Seattle;Department of Computer Science, University of Washington;Meta Facebook;Department of Computer Science, University of Washington;Department of Computer Science, University of Washington;Meta", "aff_domain": "uw.edu;cs.washington.edu;facebook.com;cs.was;cs.washington.edu;meta.com", "position": "PhD student;PhD student;Postdoc;PhD student;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nshi2023toward,\ntitle={Toward Human Readable Prompt Tuning: Kubrick{\\textquoteright}s The Shining is a good movie, and a good prompt too?},\nauthor={Weijia Shi and Xiaochuang Han and Hila Gonen and Ari Holtzman and Yulia Tsvetkov and Luke Zettlemoyer},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=K7p2SnqFoN}\n}", "github": "", "project": "", "reviewers": "ceBW;mZkP;xFvG;oTX2;Uwpm;RhgP;HPcZ", "site": "https://openreview.net/forum?id=K7p2SnqFoN", "pdf_size": 0, "rating": "4;4;4;4;4;4;4", "confidence": "3;3;3;2;3;2;3", "excitement": "4;3;4;4;4;3;3", "reproducibility": "3;4;3;3;4;2;2", "correctness": "4;3;4;5;3;2;3", "rating_avg": 4.0, "confidence_avg": 2.7142857142857144, "excitement_avg": 3.5714285714285716, "reproducibility_avg": 3.0, "correctness_avg": 3.4285714285714284, "replies_avg": 23, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-3200-0000-0011;;;;0000-0002-4634-7128;", "linkedin": "weijia-shi-773768112;;;;;luke-zettlemoyer-a0109b226/", "aff_unique_index": "0;0;1;0;0;1", "aff_unique_norm": "University of Washington;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.washington.edu;https://meta.com", "aff_unique_abbr": "UW;Meta", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "K8ixbJPkMQ", "title": "TaskWeb: Selecting Better Source Tasks for Multi-task NLP", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recent work in NLP has shown promising results in training models on large amounts of tasks to achieve better generalization. However, it is not well-understood how tasks are related, and how helpful training tasks can be chosen for a new task. In this work, we investigate whether knowing task relationships via pairwise task transfer improves choosing one or more source tasks that help to learn a new target task. We provide TaskWeb, a large-scale benchmark of pairwise task transfers for 22 NLP tasks using three different model types, sizes, and adaptation methods, spanning about 25,000 experiments. Then, we design a new method TaskShop based on our analysis of TaskWeb. TaskShop uses TaskWeb to estimate the benefit of using a source task for learning a new target task, and to choose a subset of helpful training tasks for multi-task training. Our method improves overall rankings and top-k precision of source tasks by 10% and 38%, respectively. We also use TaskShop to build much smaller multi-task training sets that improve zero-shot performances across 11 different target tasks by at least 4.3%.", "keywords": "NLP;machine learning;task transfer;task selection;multi-task training", "primary_area": "", "supplementary_material": "", "author": "Joongwon Kim;Akari Asai;Gabriel Ilharco;Hannaneh Hajishirzi", "authorids": "~Joongwon_Kim1;~Akari_Asai2;~Gabriel_Ilharco1;~Hannaneh_Hajishirzi1", "gender": "M;F;M;F", "homepage": "https://danieljkim0118.github.io/;https://akariasai.github.io/;http://gabrielilharco.com/;https://homes.cs.washington.edu/~hannaneh/", "dblp": "301/9365.html;;249/2616;52/1296", "google_scholar": "BPD-J_MAAAAJ;gqB4u_wAAAAJ;https://scholar.google.com/citations?hl=en;LOV6_WIAAAAJ", "or_profile": "~Joongwon_Kim1;~Akari_Asai2;~Gabriel_Ilharco1;~Hannaneh_Hajishirzi1", "aff": "Paul G. Allen School of Computer Science and Engineering, University of Washington;Paul G. Allen School of Computer Science & Engineering, University of Washington;Department of Computer Science, University of Washington;University of Washington", "aff_domain": "cs.washington.edu;cs.washington.edu;cs.washington.edu;uw.edu", "position": "PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nkim2023taskweb,\ntitle={TaskWeb: Selecting Better Source Tasks for Multi-task {NLP}},\nauthor={Joongwon Kim and Akari Asai and Gabriel Ilharco and Hannaneh Hajishirzi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=K8ixbJPkMQ}\n}", "github": "", "project": "", "reviewers": "5RiK;yFyo;jiqJ", "site": "https://openreview.net/forum?id=K8ixbJPkMQ", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "4;4;3", "reproducibility": "5;4;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-2736-9532;;;", "linkedin": "daniel-joongwon-kim/;;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "Paul G. Allen School of Computer Science and Engineering", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Seattle;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "KCe98ynJl3", "title": "Zero-shot Faithfulness Evaluation for Text Summarization with Foundation Language Model", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Despite tremendous improvements in natural language generation, summarization models still suffer from the unfaithfulness issue. Previous work evaluates faithfulness either using models trained on the other tasks or in-domain synthetic data, or prompting a large model such as ChatGPT. This paper proposes to do zero-shot faithfulness evaluation simply with a moderately-sized foundation language model. We introduce a new metric FFLM, which is a combination of probability changes based on the intuition that prefixing a piece of text that is consistent with the output will increase the probability of predicting the output. Experiments show that FFLM performs competitively with or even outperforms ChatGPT on both inconsistency detection and faithfulness rating with 24x fewer parameters. FFLM also achieves improvements over other strong baselines.", "keywords": "Faithfulness Evaluation;Summarization Evaluation;Hallucination Detection", "primary_area": "", "supplementary_material": "", "author": "Qi Jia;Siyu Ren;Yizhu Liu;Kenny Q. Zhu", "authorids": "~Qi_Jia3;~Siyu_Ren1;~Yizhu_Liu2;~Kenny_Q._Zhu1", "gender": "F;M;;M", "homepage": ";https://drsy.github.io/;;http://www.cs.sjtu.edu.cn/~kzhu/", "dblp": "69/1921-3;;https://dblp.uni-trier.de/pid/219/0670.html;z/KennyQiliZhu", "google_scholar": "https://scholar.google.com/citations?hl=en;jkJDyrkAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=ZIRJ6lIAAAAJ", "or_profile": "~Qi_Jia3;~Siyu_Ren1;~Yizhu_Liu2;~Kenny_Q._Zhu1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Meituan;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;meituan.com;cs.sjtu.edu.cn", "position": "PhD student;PhD student;Instructor;Full Professor", "bibtex": "@inproceedings{\njia2023zeroshot,\ntitle={Zero-shot Faithfulness Evaluation for Text Summarization with Foundation Language Model},\nauthor={Qi Jia and Siyu Ren and Yizhu Liu and Kenny Q. Zhu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=KCe98ynJl3}\n}", "github": "", "project": "", "reviewers": "df1Z;cG7n;TWe7", "site": "https://openreview.net/forum?id=KCe98ynJl3", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;4", "excitement": "3;3;4", "reproducibility": "4;4;5", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-6104-7249;;;", "linkedin": ";;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Shanghai Jiao Tong University;Meituan", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.meituan.com", "aff_unique_abbr": "SJTU;Meituan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "KE5QunlXcr", "title": "Incorporating Syntactic Knowledge into Pre-trained Language Model using Optimization for Overcoming Catastrophic Forgetting", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Syntactic knowledge is invaluable information for many tasks which handle complex or long sentences, but typical pre-trained language models do not contain sufficient syntactic knowledge. Thus it results in failures in downstream tasks that require syntactic knowledge.\nIn this paper, we explore additional training to incorporate syntactic knowledge to a language model. We designed four pre-training tasks that learn different syntactic perspectives. \nFor adding new syntactic knowledge and keeping a good balance between the original and additional knowledge, we addressed the problem of catastrophic forgetting that prevents the model from keeping semantic information when the model learns additional syntactic knowledge. We demonstrated that additional syntactic training produced consistent performance gains while clearly avoiding catastrophic forgetting.", "keywords": "syntax;BERT;language model;optimization;catastrophic forgetting", "primary_area": "", "supplementary_material": "", "author": "Ran Iwamoto;Issei Yoshida;Hiroshi Kanayama;Takuya Ohko;Masayasu Muraoka", "authorids": "~Ran_Iwamoto1;~Issei_Yoshida1;~Hiroshi_Kanayama1;~Takuya_Ohko1;~Masayasu_Muraoka1", "gender": "F;M;M;M;M", "homepage": "https://raniwamoto.github.io/;;https://research.ibm.com/people/hiroshi-kanayama;;https://researcher.watson.ibm.com/researcher/view.php?person=jp-MMURAOKA", "dblp": "253/6233;05/5670;49/4896;99/4788;174/7321", "google_scholar": ";;V6LNLfsAAAAJ;;https://scholar.google.co.jp/citations?user=ta8v4nAAAAAJ", "or_profile": "~Ran_Iwamoto1;~Issei_Yoshida1;~Hiroshi_Kanayama1;~Takuya_Ohko1;~Masayasu_Muraoka1", "aff": "Keio University;International Business Machines;International Business Machines;IBM Research - Tokyo, International Business Machines;IBM Research - Tokyo, International Business Machines", "aff_domain": "keio.jp;jp.ibm.com;ibm.com;jp.ibm.com;jp.ibm.com", "position": "PhD student;Researcher;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\niwamoto2023incorporating,\ntitle={Incorporating Syntactic Knowledge into Pre-trained Language Model using Optimization for Overcoming Catastrophic Forgetting},\nauthor={Ran Iwamoto and Issei Yoshida and Hiroshi Kanayama and Takuya Ohko and Masayasu Muraoka},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=KE5QunlXcr}\n}", "github": "", "project": "", "reviewers": "ahUV;9i8A;TqHP", "site": "https://openreview.net/forum?id=KE5QunlXcr", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "3;4;2", "reproducibility": "3;3;3", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;hiroshi-kanayama-b856132a/;;", "aff_unique_index": "0;1;1;2;2", "aff_unique_norm": "Keio University;International Business Machines Corporation;International Business Machines", "aff_unique_dep": ";;IBM Research", "aff_unique_url": "https://www.keio.ac.jp;https://www.ibm.com;https://www.ibm.com/research", "aff_unique_abbr": "Keio;IBM;IBM", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Tokyo", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "Japan;United States" }, { "id": "KE9MKZOOca", "title": "ConPrompt: Pre-training a Language Model with Machine-Generated Data for Implicit Hate Speech Detection", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Implicit hate speech detection is a challenging task in text classification since no explicit cues (e.g., swear words) exist in the text. While some pre-trained language models have been developed for hate speech detection, they are not specialized in implicit hate speech. Recently, an implicit hate speech dataset with a massive number of samples has been proposed by controlling machine generation. We propose a pre-training approach, ConPrompt, to fully leverage such machine-generated data. Specifically, given a machine-generated statement, we use example statements of its origin prompt as positive samples for contrastive learning. Through pre-training with ConPrompt, we present ToxiGen-ConPrompt, a pre-trained language model for implicit hate speech detection. We conduct extensive experiments on several implicit hate speech datasets and show the superior generalization ability of ToxiGen-ConPrompt compared to other pre-trained models. Additionally, we empirically show that ConPrompt is effective in mitigating identity term bias, demonstrating that it not only makes a model more generalizable but also reduces unintended bias. We analyze the representation quality of ToxiGen-ConPrompt and show its ability to consider target group and toxicity, which are desirable features in terms of implicit hate speeches.", "keywords": "implicit hate speech detection;pre-training;pre-trained language model;machine-generated data;contrastive learning;prompt", "primary_area": "", "supplementary_material": "", "author": "Youngwook Kim;Shinwoo Park;Youngsoo Namgoong;Yo-Sub Han", "authorids": "~Youngwook_Kim1;~Shinwoo_Park1;~Youngsoo_Namgoong2;~Yo-Sub_Han1", "gender": "M;M;M;", "homepage": ";;https://github.com/ngys321;http://toc.yonsei.ac.kr/~emmous/", "dblp": "22/3081;331/1122;;h/YoSubHan", "google_scholar": "9Qwtf9QAAAAJ;b0o8hOYAAAAJ;;yDOh26sAAAAJ", "or_profile": "~Youngwook_Kim1;~Shinwoo_Park1;~Youngsoo_Namgoong2;~Yo-Sub_Han1", "aff": "Yonsei University;Yonsei University;Yonsei University;Yonsei University", "aff_domain": "yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr", "position": "MS student;PhD student;MS student;Full Professor", "bibtex": "@inproceedings{\nkim2023conprompt,\ntitle={ConPrompt: Pre-training a Language Model with Machine-Generated Data for Implicit Hate Speech Detection},\nauthor={Youngwook Kim and Shinwoo Park and Youngsoo Namgoong and Yo-Sub Han},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=KE9MKZOOca}\n}", "github": "", "project": "", "reviewers": "poeE;tecQ;jhCf;hY1k", "site": "https://openreview.net/forum?id=KE9MKZOOca", "pdf_size": 0, "rating": "2;2;2;2", "confidence": "5;2;3;4", "excitement": "3;4;4;4", "reproducibility": "3;5;4;5", "correctness": "3;3;3;4", "rating_avg": 2.0, "confidence_avg": 3.5, "excitement_avg": 3.75, "reproducibility_avg": 4.25, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-6660-2755;;;", "linkedin": ";;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Yonsei University", "aff_unique_dep": "", "aff_unique_url": "https://www.yonsei.ac.kr", "aff_unique_abbr": "Yonsei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "KEH6Cqjdw2", "title": "Legally Enforceable Hate Speech Detection for Public Forums", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Hate speech causes widespread and deep-seated societal issues. Proper enforcement of hate speech laws is key for protecting groups of people against harmful and discriminatory language. However, determining what constitutes hate speech is a complex task that is highly open to subjective interpretations. Existing works do not align their systems with enforceable definitions of hate speech, which can make their outputs inconsistent with the goals of regulators. This research introduces a new perspective and task for enforceable hate speech detection centred around legal definitions, and a dataset annotated on violations of eleven possible definitions by legal experts. Given the challenge of identifying clear, legally enforceable instances of hate speech, we augment the dataset with expert-generated samples and an automatically mined challenge set. We experiment with grounding the model decision in these definitions using zero-shot and few-shot prompting. We then report results on several large language models (LLMs). With this task definition, automatic hate speech detection can be more closely aligned to enforceable laws, and hence assist in more rigorous enforcement of legal protections against harmful speech in public forums.", "keywords": "hate speech;legal AI;human in the loop;large language models;prompt tuning", "primary_area": "", "supplementary_material": "", "author": "Chu Fei Luo;Rohan V Bhambhoria;Samuel Dahan;Xiaodan Zhu", "authorids": "~Chu_Fei_Luo1;~Rohan_V_Bhambhoria1;~Samuel_Dahan1;~Xiaodan_Zhu1", "gender": "Non-Binary;M;M;M", "homepage": "https://chufeiluo.github.io/;https://rohanvb.com;;http://www.xiaodanzhu.com", "dblp": ";281/1442;;93/310.html", "google_scholar": "VX7OFqoAAAAJ;eQ4QORcAAAAJ;;https://scholar.google.ca/citations?user=a6MYnuUAAAAJ", "or_profile": "~Chu_Fei_Luo1;~Rohan_V_Bhambhoria1;~Samuel_Dahan1;~Xiaodan_Zhu1", "aff": "Queen's University;Queens University;Queen's University;Queen's University", "aff_domain": "queensu.ca;queensu.ca;queensu.ca;queensu.ca", "position": "PhD student;PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nluo2023legally,\ntitle={Legally Enforceable Hate Speech Detection for Public Forums},\nauthor={Chu Fei Luo and Rohan V Bhambhoria and Samuel Dahan and Xiaodan Zhu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=KEH6Cqjdw2}\n}", "github": "", "project": "", "reviewers": "G4KL;Ukeu;9nW2", "site": "https://openreview.net/forum?id=KEH6Cqjdw2", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;5", "excitement": "4;3;3", "reproducibility": "4;3;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-2597-670X;;0000-0003-3856-3696", "linkedin": ";rohanvb/;samuel-dahan/?lipi=urn%3Ali%3Apage%3Ad_flagship3_feed%3BTFAP6b6eTs2oPztOXD5Hig%3D%3D;xiaodan-zhu-066833101/?originalSubdomain=ca", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Queen's University;Queens University", "aff_unique_dep": ";", "aff_unique_url": "https://www.queensu.ca;https://www.queensu.ca", "aff_unique_abbr": "Queen's;Queen's U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "id": "KFieG8rclT", "title": "Contrastive Learning-based Sentence Encoders Implicitly Weight Informative Words", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "The performance of sentence encoders can be significantly improved through the simple practice of fine-tuning using contrastive loss. A natural question arises: what characteristics do models acquire during contrastive learning? This paper theoretically and experimentally shows that contrastive-based sentence encoders implicitly weight words based on information-theoretic quantities; that is, more informative words receive greater weight, while others receive less. The theory states that, in the lower bound of the optimal value of the contrastive learning objective, the norm of word embedding reflects the information gain associated with the distribution of surrounding words. We also conduct comprehensive experiments using various models, multiple datasets, two methods to measure the implicit weighting of models (Integrated Gradients and SHAP), and two information-theoretic quantities (information gain and self-information). The results provide empirical evidence that contrastive fine-tuning emphasizes informative words.", "keywords": "sentence embedding;contrastive learning;information gain;integrated gradients", "primary_area": "", "supplementary_material": "", "author": "Hiroto Kurita;Goro Kobayashi;Sho Yokoi;Kentaro Inui", "authorids": "~Hiroto_Kurita1;~Goro_Kobayashi1;~Sho_Yokoi1;~Kentaro_Inui1", "gender": ";M;;M", "homepage": "https://kurita.dev;https://sites.google.com/view/goro-kobayashi;http://www.cl.ecei.tohoku.ac.jp/~yokoi/;http://www.cl.ecei.tohoku.ac.jp/~inui/", "dblp": ";;184/8316;90/3315", "google_scholar": "72QbISAAAAAJ;AT-ybe0AAAAJ;https://scholar.google.co.jp/citations?user=EW2QPKoAAAAJ;https://scholar.google.co.jp/citations?user=38_o3-kAAAAJ", "or_profile": "~Hiroto_Kurita1;~Goro_Kobayashi1;~Sho_Yokoi1;~Kentaro_Inui1", "aff": "Tohoku University;Tohoku University;Tohoku University;Tohoku University", "aff_domain": "tohoku.ac.jp;tohoku.ac.jp;tohoku.ac.jp;tohoku.ac.jp", "position": "MS student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nkurita2023contrastive,\ntitle={Contrastive Learning-based Sentence Encoders Implicitly Weight Informative Words},\nauthor={Hiroto Kurita and Goro Kobayashi and Sho Yokoi and Kentaro Inui},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=KFieG8rclT}\n}", "github": "", "project": "", "reviewers": "3ibg;aaka;4dqe;un9Z", "site": "https://openreview.net/forum?id=KFieG8rclT", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;4;2;2", "excitement": "4;3;3;3", "reproducibility": "3;4;3;3", "correctness": "4;2;2;3", "rating_avg": 4.0, "confidence_avg": 2.75, "excitement_avg": 3.25, "reproducibility_avg": 3.25, "correctness_avg": 2.75, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0009-0002-4437-5245;0000-0001-6510-604X", "linkedin": "hiroto-kurita;;shoyokoi/;kentaro-inui-52401a31/", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tohoku University", "aff_unique_dep": "", "aff_unique_url": "https://www.tohoku.ac.jp", "aff_unique_abbr": "Tohoku U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Japan" }, { "id": "KHfQKygNSc", "title": "Robustness of Named-Entity Replacements for In-Context Learning", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "A key feature of modern large language models (LLMs) is their ability to perform in-context learning, a prompting technique where query- answer demonstrations are shown before the final query. This allows for generalization to novel distributions at inference time where the LLM can learn new rules without parameter updates. However, the choice of demonstrations and their relationship to a particular query can have a profound impact on model accuracy, raising concerns about the true in-context generalization capabilities (Zhao et al., 2021). In this work, we explore the robustness of the in-context learning paradigm by focusing on entities. In particular, we seek to understand the robustness of LLM in-context learning with respect to named entity replacements. We discover a significant variance in downstream performance based on the choice of the named entities, across three popular reasoning tasks and two popular LLMs. Specifically, model accuracy on the test sets can fluctuate between -2.7 to +8.0 points depending on the choice of named entity replacements. Our analysis exposes the sensitivity of LLM in-context learning with respect to named entities, and offers a simple recipe to improve test performance by hyper-parameter tuning the named entities for a given dataset. Code and datasets for reproducing the results are publicly available.", "keywords": "large language models;in-context learning;named entities;robustness;natural language understanding", "primary_area": "", "supplementary_material": "", "author": "Saeed Goodarzi;Nikhil Kagita;Dennis Minn;Shufan Wang;Roberto Dessi;Shubham Toshniwal;Adina Williams;Jack Lanchantin;Koustuv Sinha", "authorids": "~Saeed_Goodarzi1;~Nikhil_Kagita1;~Dennis_Minn1;~Shufan_Wang1;~Roberto_Dessi1;~Shubham_Toshniwal1;~Adina_Williams1;~Jack_Lanchantin1;~Koustuv_Sinha1", "gender": ";M;M;M;M;;F;;M", "homepage": ";;https://github.com/dennisminn;https://people.cs.umass.edu/~shufanwang/;https://robertodessi.github.io/;;http://www.adinawilliams.com;https://www.jacklanchantin.com/;https://koustuvsinha.com/", "dblp": ";;;192/1552;228/9267.html;;199/2104;178/8538.html;210/0890", "google_scholar": ";;;;LElX2I4AAAAJ;;MUtbKt0AAAAJ;35PmAZwAAAAJ;9P9QcckAAAAJ", "or_profile": "~Saeed_Goodarzi1;~Nikhil_Kagita1;~Dennis_Minn1;~Shufan_Wang1;~Roberto_Dessi1;~Shubham_Toshniwal1;~Adina_Williams1;~Jack_Lanchantin1;~Koustuv_Sinha1", "aff": ";University of Massachusetts at Amherst;University of Massachusetts at Amherst;University of Massachusetts, Amherst;Meta;;FAIR (Meta Platforms Inc.);Meta;Meta (FAIR)", "aff_domain": ";umass.edu;umass.edu;umass.edu;fb.com;;facebook.com;facebook.com;meta.com", "position": ";MS student;MS student;PhD student;PhD student;;Research Scientist;Postdoc;Researcher", "bibtex": "@inproceedings{\ngoodarzi2023robustness,\ntitle={Robustness of Named-Entity Replacements for In-Context Learning},\nauthor={Saeed Goodarzi and Nikhil Kagita and Dennis Minn and Shufan Wang and Roberto Dessi and Shubham Toshniwal and Adina Williams and Jack Lanchantin and Koustuv Sinha},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=KHfQKygNSc}\n}", "github": "", "project": "", "reviewers": "a4iF;iCLV;4TcR", "site": "https://openreview.net/forum?id=KHfQKygNSc", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "3;2;4", "reproducibility": "4;3;5", "correctness": "3;4;4", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 5, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;0000-0001-5281-3343;;", "linkedin": ";https://linkedin.com/in/nikhil-kagita;;;;;;;", "aff_unique_index": "0;0;0;1;1;1;1", "aff_unique_norm": "University of Massachusetts Amherst;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.umass.edu;https://meta.com", "aff_unique_abbr": "UMass Amherst;Meta", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Amherst;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "KIysY1fMCJ", "title": "Aspect-to-Scope Oriented Multi-view Contrastive Learning for Aspect-based Sentiment Analysis", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Aspect-based sentiment analysis (ABSA) aims to align aspects and corresponding sentiment expressions, so as to identify the sentiment polarities of specific aspects. Most existing ABSA methods focus on mining syntactic or semantic information, which still suffers from noisy interference introduced by the attention mechanism and dependency tree when multiple aspects exist in a sentence. To address these issues, in this paper, we revisit ABSA from a novel perspective by proposing a novel scope-assisted multi-view graph contrastive learning framework. It not only mitigates noisy interference for better locating aspect and its corresponding sentiment opinion with aspect-specific scope, but also captures the correlation and difference between sentiment polarities and syntactic/semantic information. Extensive experiments on five benchmark datasets show that our proposed approach substantially outperforms state-of-the-art methods and verifies the effectiveness and robustness of our model.", "keywords": "Graph contrastive learning;aspect-based sentiment analysis", "primary_area": "", "supplementary_material": "", "author": "Heyan Chai;Ziyi Yao;Siyu Tang;Ye Wang;Liqiang Nie;Binxing Fang;Qing Liao", "authorids": "~Heyan_Chai1;~Ziyi_Yao1;~Siyu_Tang3;~Ye_Wang4;~Liqiang_Nie2;~Binxing_Fang1;~Qing_Liao1", "gender": "M;;;F;M;M;F", "homepage": "https://csse.szu.edu.cn/en/pages/user/index?id=1340;https://mail.hit.edu.cn/;https://github.com/Lluvia-Tang;;https://liqiangnie.github.io/index.html;;", "dblp": "252/8379;;;44/6292-15;92/8277;87/254;09/8600-1", "google_scholar": "5EsW7mUAAAAJ;;;;yywVMhUAAAAJ;;umEIUwwAAAAJ", "or_profile": "~Heyan_Chai1;~Ziyi_Yao1;~Siyu_Tang3;~Ye_Wang4;~Liqiang_Nie2;~Binxing_Fang1;~Qing_Liao1", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;National University of Defense Technology;Shandong University;Harbin Institute of Technology;Harbin Institute of Technology", "aff_domain": "hit.edu;hit.edu.cn;hit.edu.cn;nudt.edu.cn;sdu.edu.cn;hit.edu.cn;hit.edu.cn", "position": "PhD student;Undergrad student;MS student;Lecturer;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nchai2023aspecttoscope,\ntitle={Aspect-to-Scope Oriented Multi-view Contrastive Learning for Aspect-based Sentiment Analysis},\nauthor={Heyan Chai and Ziyi Yao and Siyu Tang and Ye Wang and Liqiang Nie and Binxing Fang and Qing Liao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=KIysY1fMCJ}\n}", "github": "", "project": "", "reviewers": "viZs;PEgC;etwT;WYcF", "site": "https://openreview.net/forum?id=KIysY1fMCJ", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;3;4;4", "excitement": "2;3;3;3", "reproducibility": "3;3;4;3", "correctness": "3;3;3;3", "rating_avg": 3.0, "confidence_avg": 3.75, "excitement_avg": 2.75, "reproducibility_avg": 3.25, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-4470-9364;;;0000-0002-4752-5280;0000-0003-1476-0273;;", "linkedin": ";;;;;;", "aff_unique_index": "0;0;0;1;2;0;0", "aff_unique_norm": "Harbin Institute of Technology;National University of Defense Technology;Shandong University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.hit.edu.cn/;http://www.nudt.edu.cn/;http://www.sdu.edu.cn", "aff_unique_abbr": "HIT;NUDT;SDU", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Harbin;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "KNFG5KLXD3", "title": "We Are What We Repeatedly Do: Inducing and Deploying Habitual Schemas in Persona-Based Responses", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Many practical applications of dialogue technology require the generation of responses according to a particular developer-specified persona. While a variety of personas can be elicited from recent large language models, the opaqueness and unpredictability of these models make it desirable to be able to specify personas in an explicit form. In previous work, personas have typically been represented as sets of one-off pieces of self-knowledge that are retrieved by the dialogue system for use in generation. However, in realistic human conversations, personas are often revealed through story-like narratives that involve rich habitual knowledge -- knowledge about kinds of events that an agent often participates in (e.g., work activities, hobbies, sporting activities, favorite entertainments, etc.), including typical goals, sub-events, preconditions, and postconditions of those events. We capture such habitual knowledge using an explicit schema representation, and propose an approach to dialogue generation that retrieves relevant schemas to condition a large language model to generate persona-based responses. Furthermore, we demonstrate a method for bootstrapping the creation of such schemas by first generating generic passages from a set of simple facts, and then inducing schemas from the generated passages.", "keywords": "Dialogue;Response;Generation;Persona;Schema;LLM", "primary_area": "", "supplementary_material": "", "author": "Benjamin Kane;Lenhart K. Schubert", "authorids": "~Benjamin_Kane2;~Lenhart_K._Schubert1", "gender": "M;M", "homepage": "http://cs.rochester.edu/u/bkane2;https://www.cs.rochester.edu/~schubert/", "dblp": ";s/LenhartKSchubert", "google_scholar": ";", "or_profile": "~Benjamin_Kane2;~Lenhart_K._Schubert1", "aff": "Department of Computer Science, University of Rochester;University of Rochester", "aff_domain": "cs.rochester.edu;rochester.edu", "position": "PhD student;Emeritus", "bibtex": "@inproceedings{\nkane2023we,\ntitle={We Are What We Repeatedly Do: Inducing and Deploying Habitual Schemas in Persona-Based Responses},\nauthor={Benjamin Kane and Lenhart K. Schubert},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=KNFG5KLXD3}\n}", "github": "", "project": "", "reviewers": "qnGv;gBoD;x1KL", "site": "https://openreview.net/forum?id=KNFG5KLXD3", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "4;4;4", "reproducibility": "5;3;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "University of Rochester", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.rochester.edu", "aff_unique_abbr": "U of R", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "KOxEqQzvOZ", "title": "Debias NLU Datasets via Training-free Perturbations", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Several recent studies have shown that advanced models for natural language understanding (NLU) are prone to capture biased features that are independent of the task but spuriously correlated to labels. Such models often perform well on in-distribution (ID) datasets but fail to generalize to out-of-distribution (OOD) datasets. Existing solutions can be separated into two orthogonal approaches: model-centric methods and data-centric methods. Model-centric methods improve OOD performance at the expense of ID performance. Data-centric strategies usually boost both of them via data-level manipulations such as generative data augmentation. However, the high cost of fine-tuning a generator to produce valid samples limits the potential of such approaches. To address this issue, we propose PDD, a framework that conducts training-free Perturbations on samples containing biased features to Debias NLU Datasets. PDD works by iteratively conducting perturbations via pre-trained mask language models (MLM). PDD exhibits the advantage of low cost by adopting a training-free \nperturbation strategy and further improves the label consistency by utilizing label information during perturbations. Extensive experiments demonstrate that PDD shows competitive performance with previous state-of-the-art debiasing strategies. When combined with the model-centric debiasing methods, PDD establishes a new state-of-the-art.", "keywords": "natural language understanding; out-of-distribution generalization; data-centric debiasing;", "primary_area": "", "supplementary_material": "", "author": "Qi Guo;yuanhang tang;Yawen Ouyang;Zhen Wu;Xinyu Dai", "authorids": "~Qi_Guo11;~yuanhang_tang1;~Yawen_Ouyang1;~Zhen_Wu2;~Xinyu_Dai1", "gender": "M;;M;M;M", "homepage": ";;https://yawenouyang.github.io/about/;https://wuzhen247.github.io/;http://cs.nju.edu.cn/daixinyu", "dblp": ";;;16/4485-2;39/5815", "google_scholar": ";;;IoGlgtoAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Qi_Guo11;~yuanhang_tang1;~Yawen_Ouyang1;~Zhen_Wu2;~Xinyu_Dai1", "aff": "Nanjing University;Nanjing University;Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "Undergrad student;MS student;PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nguo2023debias,\ntitle={Debias {NLU} Datasets via Training-free Perturbations},\nauthor={Qi Guo and yuanhang tang and Yawen Ouyang and Zhen Wu and Xinyu Dai},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=KOxEqQzvOZ}\n}", "github": "", "project": "", "reviewers": "syBo;gCCW;TmsF", "site": "https://openreview.net/forum?id=KOxEqQzvOZ", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;3", "excitement": "3;3;4", "reproducibility": "4;4;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0009-4681-6404;0009-0000-6629-816X;;0000-0002-7678-103X;", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "KRQADH68fG", "title": "HuatuoGPT, Towards Taming Language Model to Be a Doctor", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In this paper, we present HuatuoGPT, a Large Language Model (LLM) for medical consultation. The core recipe of HuatuoGPT is to leverage both distilled data from **ChatGPT** and real-world data from **doctors** in the supervised fine-tuning stage. This is not only because purely using **ChatGPT**-distilled data might cause 'model collapse', but also because real-world data from **doctors** would be complementary to **ChatGPT**-distilled data. The responses from ChatGPT are usually detailed, well-presented, fluent, and instruction-followed, but it cannot perform like a doctor in many aspects, e.g. for interactive diagnosis. Therefore, the extra doctors' data could tame a distilled language model to perform like doctors. To synergize the strengths of both data sources, we introduce RLMF (Reinforcement Learning from Mixed Feedback) where a reward model is trained to align the language model with the merits that both sources (ChatGPT and doctors) bring. Experimental results (in GPT-4 evaluation, human evaluation, and medical benchmark datasets) demonstrate that HuatuoGPT achieves state-of-the-art results in performing medical consultation among open-source LLMs. It is worth noting that by using additional real-world data and RLMF, the distilled language model (i.e., HuatuoGPT) outperforms its teacher model (i.e., ChatGPT) in most cases.", "keywords": "large language models;medical application;ChatGPT", "primary_area": "", "supplementary_material": "", "author": "Hongbo Zhang;Junying Chen;Feng Jiang;Fei Yu;Zhihong Chen;Guiming Hardy Chen;Jianquan Li;Xiangbo Wu;Zhang Zhiyi;Qingying Xiao;Xiang Wan;Benyou Wang;Haizhou Li", "authorids": "~Hongbo_Zhang5;~Junying_Chen2;~Feng_Jiang4;~Fei_Yu3;~Zhihong_Chen2;~Guiming_Hardy_Chen1;~Jianquan_Li1;~Xiangbo_Wu1;~Zhang_Zhiyi2;~Qingying_Xiao1;~Xiang_Wan1;~Benyou_Wang2;~Haizhou_Li3", "gender": "M;M;M;F;M;;M;;M;F;M;M;M", "homepage": "https://hongbozhang.site/;;;;;;;;https://github.com/zhangzhiyi23;;http://www.sribd.cn/teacher/28;https://wabyking.github.io/old.html;https://colips.org/~eleliha/", "dblp": ";;75/1693-7;;78/3726;;;;;;;169/1793;36/4118", "google_scholar": "mv7nG38AAAAJ;https://scholar.google.com.hk/citations?user=I0raPTYAAAAJ;zrxpiWYAAAAJ;EsCgPkQAAAAJ;y55sF8cAAAAJ;;https://scholar.google.com/citations?hl=en;;;;;Jk4vJU8AAAAJ;https://scholar.google.com.sg/citations?user=z8_x7C8AAAAJ", "or_profile": "~Hongbo_Zhang5;~Junying_Chen2;~Feng_Jiang4;~Fei_Yu3;~Zhihong_Chen2;~Guiming_Hardy_Chen1;~Jianquan_Li1;~Xiangbo_Wu1;~Zhang_Zhiyi2;~Qingying_Xiao1;~Xiang_Wan1;~Benyou_Wang2;~Haizhou_Li3", "aff": "The Chinese University of Hong Kong, Shenzhen;Harbin Institute of Technology, Shenzhen;The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong, Shenzhen;The Chinese University of Hong Kong, Shenzhen;;;;;National Health Data Institute,Shenzhen;Shenzhen Research Institute of Big Data;The Chinese University of Hong Kong, Shenzhen;National University of Singapore", "aff_domain": "cuhk.edu.cn;hit.edu;cuhk.edu.cn;link.cuhk.edu.cn;cuhk.edu.cn;;;;;nhdisz.cn;sribd.cn;cuhk.edu.cn;nus.edu.sg", "position": "Researcher;MS student;Postdoc;PhD student;PhD student;;;;;Researcher;Principal Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2023huatuogpt,\ntitle={Huatuo{GPT}, Towards Taming Language Model to Be a Doctor},\nauthor={Hongbo Zhang and Junying Chen and Feng Jiang and Fei Yu and Zhihong Chen and Guiming Hardy Chen and Jianquan Li and Xiangbo Wu and Zhang Zhiyi and Qingying Xiao and Xiang Wan and Benyou Wang and Haizhou Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=KRQADH68fG}\n}", "github": "", "project": "", "reviewers": "WjkR;Dcp8;rXzS", "site": "https://openreview.net/forum?id=KRQADH68fG", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;3;3", "reproducibility": "4;4;4", "correctness": "4;3;2", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 13, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-0425-3673;;0000-0002-3465-311X;;;;;;;0009-0006-9560-4529;;0000-0002-1501-9914;0000-0001-9158-9401", "linkedin": ";;;;;;;;;;;;haizhou-li-4ba74b6/", "aff_unique_index": "0;1;0;0;0;2;3;0;4", "aff_unique_norm": "Chinese University of Hong Kong;Harbin Institute of Technology;National Health Data Institute;Shenzhen Research Institute of Big Data;National University of Singapore", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.cuhk.edu.cn;http://en.hhit.edu.cn/;;http://www.sribd.cn;https://www.nus.edu.sg", "aff_unique_abbr": "CUHK;HIT;;;NUS", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;1", "aff_country_unique": "China;Singapore" }, { "id": "KSjnVt9awC", "title": "Revisiting the Knowledge Injection Frameworks", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In recent years, large language models (LLMs), such as GPTs, have attained great impact worldwide.\nHowever, how to adapt these LLMs to better suit the vertical domain-specific tasks by utilizing external knowledge remains not completely solved.\nIndeed, there have emerged a few works on this line where most of them rely on an alignment heuristic that is built to inject the corresponding knowledge tuple into the associated text sample.\n\nHowever, despite the promise, we identify a pivotal problem in this work ubiquitously.\nSimply put, we find that injecting unaligned (i.e., random) knowledge tuple into the LLMs achieves comparable (and sometimes better) results than the aligned knowledge being injected.\nWe therefore take a thorough investigation of this frustrating finding on a variety of related prior work and further provide a chain of potential interpretations for the phenomenon.\nBased on all that, we offer a simple remediated technique.\nBriefly, the core of this technique roots in an ideological emphasis on the pruning and purification of the external knowledge base to be injected into LLMs.\nAt last, we show that by integrating this technique into most (if not all) knowledge injection frameworks and recent LLMs, it manages to overcome the aforementioned sanity problem and further pushes the boundary of the performance of the domain-adaptive LLMs.", "keywords": "language models;knowledge pruning;large language models for downstream task", "primary_area": "", "supplementary_material": "", "author": "Peng Fu;Yiming Zhang;Haobo Wang;Weikang Qiu;Junbo Zhao", "authorids": "~Peng_Fu2;~Yiming_Zhang3;~Haobo_Wang1;~Weikang_Qiu1;~Junbo_Zhao1", "gender": "M;;M;M;M", "homepage": ";;https://hbzju.github.io/;https://www.boltzmachine.com;http://jakezhao.net/", "dblp": ";;;336/1936;191/6665", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;;DnN-rggAAAAJ;OLRjhHAAAAAJ;8ipao8MAAAAJ", "or_profile": "~Peng_Fu2;~Yiming_Zhang3;~Haobo_Wang1;~Weikang_Qiu1;~Junbo_Zhao1", "aff": "Zhejiang University;;Zhejiang University;Yale University;Zhejiang University", "aff_domain": "zju.edu.cn;;zju.edu.cn;yale.edu;zju.edu.cn", "position": "MS student;;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nfu2023revisiting,\ntitle={Revisiting the Knowledge Injection Frameworks},\nauthor={Peng Fu and Yiming Zhang and Haobo Wang and Weikang Qiu and Junbo Zhao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=KSjnVt9awC}\n}", "github": "", "project": "", "reviewers": "ic1H;jdMq;v97Y;Lzwd", "site": "https://openreview.net/forum?id=KSjnVt9awC", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;3;3;4", "excitement": "4;3;3;4", "reproducibility": "3;4;4;4", "correctness": "4;4;4;4", "rating_avg": 4.0, "confidence_avg": 3.25, "excitement_avg": 3.5, "reproducibility_avg": 3.75, "correctness_avg": 4.0, "replies_avg": 13, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-8586-3048;;", "linkedin": ";;;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Zhejiang University;Yale University", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.yale.edu", "aff_unique_abbr": "ZJU;Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United States" }, { "id": "KTFxOnrbvu", "title": "Argument mining as a multi-hop generative machine reading comprehension task", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Argument mining (AM) is a natural language processing task that aims to generate an argumentative graph given an unstructured argumentative text. An argumentative graph that consists of argumentative components and argumentative relations contains completed information of an argument and exhibits the logic of an argument. As the argument structure of an argumentative text can be regarded as an answer to a \"why\" question, the whole argument structure is therefore similar to the \"chain of thought\" concept, i.e., the sequence of ideas that lead to a specific conclusion for a given argument (Wei et al., 2022). For argumentative texts in the same specific genre, the \"chain of thought\" of such texts is usually similar, i.e., in a student essay, there is usually a major claim supported by several claims, and then a number of premises which are related to the claims are included (Eger et al., 2017). In this paper, we propose a new perspective which transfers the argument mining task into a multi-hop reading comprehension task, allowing the model to learn the argument structure as a \"chain of thought\". We perform a comprehensive evaluation of our approach on two AM benchmarks and find that we surpass SOTA results. A detailed analysis shows that specifically the \"chain of thought\" information is helpful for the argument mining task.", "keywords": "Argument mining; Machine reading comprehension; Generative model; Chain of thought", "primary_area": "", "supplementary_material": "", "author": "Boyang Liu;Viktor Schlegel;Riza Batista-Navarro;Sophia Ananiadou", "authorids": "~Boyang_Liu2;~Viktor_Schlegel1;~Riza_Batista-Navarro1;~Sophia_Ananiadou1", "gender": "M;;F;F", "homepage": ";https://schlevik.net;https://research.manchester.ac.uk/en/persons/riza.batista;http://www.manchester.ac.uk/research/Sophia.ananiadou/", "dblp": ";236/0362;92/11424;47/4142", "google_scholar": ";m4473TAAAAAJ;fRBJmp9gk_cC;https://scholar.google.com.tw/citations?user=quhi-K0AAAAJ", "or_profile": "~Boyang_Liu2;~Viktor_Schlegel1;~Riza_Batista-Navarro1;~Sophia_Ananiadou1", "aff": "University of Manchester;ASUS Intelligent Cloud Solutions;University of Manchester;University of Manchester", "aff_domain": "cs.manchester.ac.uk;asus.com;manchester.ac.uk;manchester.ac.uk", "position": "PhD student;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nliu2023argument,\ntitle={Argument mining as a multi-hop generative machine reading comprehension task},\nauthor={Boyang Liu and Viktor Schlegel and Riza Batista-Navarro and Sophia Ananiadou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=KTFxOnrbvu}\n}", "github": "", "project": "", "reviewers": "As1i;SLiR;g9cj", "site": "https://openreview.net/forum?id=KTFxOnrbvu", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;2;2", "excitement": "3;4;4", "reproducibility": "2;4;4", "correctness": "4;4;3", "rating_avg": 3.0, "confidence_avg": 2.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-4097-9191", "linkedin": "https://linkedin.com/in/boyang-liu-a71073238;;;sophia-ananiadou-ba98b63/", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Manchester;ASUS", "aff_unique_dep": ";Intelligent Cloud Solutions", "aff_unique_url": "https://www.manchester.ac.uk;https://www.asus.com", "aff_unique_abbr": "UoM;ASUS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Taiwan", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United Kingdom;China" }, { "id": "KUSzNKRI2g", "title": "Improving Pacing in Long-Form Story Planning", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Existing LLM-based systems for writing long-form stories or story outlines frequently suffer from unnatural pacing, whether glossing over important events or over-elaborating on insignificant details, resulting in a jarring experience for the reader.\nWe propose a **CONC**rete **O**utline **C**on**T**rol (CONCOCT) system to improve pacing when automatically generating story outlines. We first train a *concreteness evaluator* to judge which of two events is more concrete (low-level-detailed). \nThis evaluator can then be used to control pacing in hierarchical outline generation; in this work, we explore a *vaguest-first* expansion procedure that aims for uniform pacing. We further use the evaluator to filter new outline items based on predicted concreteness. Compared to a baseline hierarchical outline generator, humans judge CONCOCT's pacing to be more consistent over 57% of the time across multiple outline lengths; the gains also translate to downstream stories. \nAll code, data, and models are open-sourced.", "keywords": "Story Generation;Pacing;Hierarchical Planning", "primary_area": "", "supplementary_material": "", "author": "Yichen Wang;Kevin Yang;Xiaoming Liu;Dan Klein", "authorids": "~Yichen_Wang4;~Kevin_Yang2;~Xiaoming_Liu8;~Dan_Klein1", "gender": "M;M;;M", "homepage": "https://yichenzw.com;https://gr.xjtu.edu.cn/zh/web/xm.liu;http://people.eecs.berkeley.edu/~klein/;https://people.eecs.berkeley.edu/~yangk/", "dblp": ";;;13/10565", "google_scholar": "86XiOcsAAAAJ;FepcM0IAAAAJ;;sRpY9TIAAAAJ", "or_profile": "~Yichen_Wang4;~Xiaoming_Liu8;~Dan_Klein1;~Kevin_Yang1", "aff": "University of California, Berkeley;Xi'an Jiaotong University;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;xjtu.edu.cn;berkeley.edu;berkeley.edu", "position": "Intern;Associate Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nwang2023improving,\ntitle={Improving Pacing in Long-Form Story Planning},\nauthor={Yichen Wang and Kevin Yang and Xiaoming Liu and Dan Klein},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=KUSzNKRI2g}\n}", "github": "", "project": "", "reviewers": "cJCu;1ZA2;7mEN", "site": "https://openreview.net/forum?id=KUSzNKRI2g", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "3;3;4", "reproducibility": "3;4;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-0901-6028;;", "linkedin": ";;dan-klein/;", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of California, Berkeley;Xi'an Jiao Tong University", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://www.xjtu.edu.cn", "aff_unique_abbr": "UC Berkeley;XJTU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;China" }, { "id": "KfJffhdWO1", "title": "Evaluating Evaluation Metrics: A Framework for Analyzing NLG Evaluation Metrics using Measurement Theory", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We address a fundamental challenge in Natural Language Generation (NLG) model evaluation---the design and evaluation of evaluation metrics. Recognizing the limitations of existing automatic metrics and noises from how current human evaluation was conducted, we propose MetricEval, a framework informed by measurement theory, the foundation of educational test design, for conceptualizing and evaluating the reliability and validity of NLG evaluation metrics. The framework formalizes the source of measurement error and offers statistical tools for evaluating evaluation metrics based on empirical data. With our framework, one can quantify the uncertainty of the metrics to better interpret the result. To exemplify the use of our framework in practice, we analyzed a set of evaluation metrics for summarization and identified issues related to conflated validity structure in human-eval and reliability in LLM-based metrics. Through MetricEval, we aim to promote the design, evaluation, and interpretation of valid and reliable metrics to advance robust and effective NLG models.", "keywords": "Evaluation;Evaluation Metrics;Measurement Theory;NLG;Large Language Model", "primary_area": "", "supplementary_material": "", "author": "Ziang Xiao;Susu Zhang;Vivian Lai;Q.Vera Liao", "authorids": "~Ziang_Xiao1;~Susu_Zhang1;~Vivian_Lai1;~Q.Vera_Liao1", "gender": ";F;F;F", "homepage": ";https://psychology.illinois.edu/directory/profile/szhan105;https://vivlai.github.io/;http://www.qveraliao.com", "dblp": "196;;;01/7985.html", "google_scholar": "MjkODLEAAAAJ;;https://scholar.google.com/citations?hl=en;bbe_MZEAAAAJ", "or_profile": "~Ziang_Xiao1;~Susu_Zhang1;~Vivian_Lai1;~Q.Vera_Liao1", "aff": "Department of Computer Science, Whiting School of Engineering;University of Illinois Urbana-Champaign;VISA;Microsoft", "aff_domain": "cs.jhu.edu;illinois.edu;visa.com;microsoft.com", "position": "Assistant Professor;Assistant Professor;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nxiao2023evaluating,\ntitle={Evaluating Evaluation Metrics: A Framework for Analyzing {NLG} Evaluation Metrics using Measurement Theory},\nauthor={Ziang Xiao and Susu Zhang and Vivian Lai and Q.Vera Liao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=KfJffhdWO1}\n}", "github": "", "project": "", "reviewers": "DnGL;rppc;QqGJ", "site": "https://openreview.net/forum?id=KfJffhdWO1", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "4;3;4", "reproducibility": "5;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-4543-7196", "linkedin": ";;;", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Johns Hopkins University;University of Illinois Urbana-Champaign;VISA;Microsoft", "aff_unique_dep": "Department of Computer Science;;;Microsoft Corporation", "aff_unique_url": "https://www.jhu.edu;https://illinois.edu;https://www.visa.com;https://www.microsoft.com", "aff_unique_abbr": "JHU;UIUC;VISA;Microsoft", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Baltimore;Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "KgcuY2KIkf", "title": "Systematic word meta-sense extension", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The meaning of polysemous words often varies in a highly productive yet predictable way. Generalizing the regularity between conventional senses to derive novel word meaning is crucial for automated processing of non-literal language uses such as figurative expressions. We introduce a novel task called systematic word meta-sense extension (SWORME) to test and improve language models' ability to extend word meaning to denote new semantic domains (also called meta-senses) that bear regular semantic relations with existing senses. We found that language models prefer incremental lexical semantic change toward conceptually similar meta-senses such as logical metonymy, and are much worse at predicting highly non-literal meaning extensions such as metaphors. We propose a novel analogy-based method of word meaning extension, and show that it effectively improves language model systematicity in making both gradual and radical types of meta-sense extension. We further demonstrate that learning systematic meta-sense extensions benefits language models on multiple benchmarks of figurative language understanding.", "keywords": "lexical creativity;regular polysemy;systematicity;contextualized language model;analogical inference;figurative language processing", "primary_area": "", "supplementary_material": "", "author": "Lei Yu", "authorids": "~Lei_Yu12", "gender": "M", "homepage": "https://jadeleiyu.github.io/", "dblp": "", "google_scholar": "https://scholar.google.com/citations?hl=en", "or_profile": "~Lei_Yu12", "aff": "University of Toronto", "aff_domain": "cs.toronto.edu", "position": "PhD student", "bibtex": "@inproceedings{\nyu2023systematic,\ntitle={Systematic word meta-sense extension},\nauthor={Lei Yu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=KgcuY2KIkf}\n}", "github": "", "project": "", "reviewers": "rNR6;zLV7;91N3", "site": "https://openreview.net/forum?id=KgcuY2KIkf", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;5;5", "excitement": "4;4;4", "reproducibility": "4;5;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.666666666666667, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 1, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "", "linkedin": "", "aff_unique_index": "0", "aff_unique_norm": "University of Toronto", "aff_unique_dep": "", "aff_unique_url": "https://www.utoronto.ca", "aff_unique_abbr": "U of T", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "id": "KivNpBsfAS", "title": "NLP Evaluation in trouble: On the Need to Measure LLM Data Contamination for each Benchmark", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "In this position paper we argue that the classical evaluation on Natural Language Processing\n(NLP) tasks using annotated benchmarks is in\ntrouble. The worst kind of data contamination\nhappens when a Large Language Model (LLM)\nis trained on the test split of a benchmark, and\nthen evaluated in the same benchmark. The extent of the problem is unknown, as it is not\nstraightforward to measure. Contamination\ncauses an overestimation of the performance\nof a contaminated model in a target benchmark\nand associated task with respect to their non-contaminated counterparts. The consequences\ncan be very harmful, with wrong scientific conclusions being published while other correct\nones are discarded. This position paper defines different levels of data contamination and\nargues for a community effort, including the\ndevelopment of automatic and semi-automatic\nmeasures to detect when data from a benchmark was exposed to a model, and suggestions\nfor flagging papers with conclusions that are\ncompromised by data contamination.", "keywords": "evaluation;data contamination;large language models;benchmark", "primary_area": "", "supplementary_material": "", "author": "Oscar Sainz;Jon Ander Campos;Iker Garc\u00eda-Ferrero;Julen Etxaniz;Oier Lopez de Lacalle;Eneko Agirre", "authorids": "~Oscar_Sainz1;~Jon_Ander_Campos1;~Iker_Garc\u00eda-Ferrero1;~Julen_Etxaniz1;~Oier_Lopez_de_Lacalle1;~Eneko_Agirre1", "gender": "M;M;M;M;M;M", "homepage": "https://osainz59.github.io/;;https://ikergarcia1996.github.io/Iker-Garcia-Ferrero/;https://julenetxaniz.eus/en;https://oierldl.github.io/;http://ixa.si.ehu.eus/eneko", "dblp": "266/1113;262/3907.html;305/9880;354/6422;11/4461;a/EnekoAgirre", "google_scholar": "https://scholar.google.es/citations?user=3Z5zok8AAAAJ;wlcPPygAAAAJ;https://scholar.google.es/citations?user=yoOzj1MAAAAJ;BDGXAjgAAAAJ;nieh6tUAAAAJ;https://scholar.google.es/citations?user=kSuqts0AAAAJ", "or_profile": "~Oscar_Sainz1;~Jon_Ander_Campos1;~Iker_Garc\u00eda-Ferrero1;~Julen_Etxaniz1;~Oier_Lopez_de_Lacalle1;~Eneko_Agirre1", "aff": "University of the Basque Country (UPV/EHU);University of the Basque Country;University of Pennsylvania;HiTZ Center, University of the Basque Country (UPV/EHU);Universidad del Pa\u00eds Vasco;University of the Basque Country (UPV/EHU)", "aff_domain": "ehu.eus;ehu.eus;upenn.edu;ehu.eus;ehu.eus;ehu.eus", "position": "PhD student;PhD student;PhD student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nsainz2023nlp,\ntitle={{NLP} Evaluation in trouble: On the Need to Measure {LLM} Data Contamination for each Benchmark},\nauthor={Oscar Sainz and Jon Ander Campos and Iker Garc{\\'\\i}a-Ferrero and Julen Etxaniz and Oier Lopez de Lacalle and Eneko Agirre},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=KivNpBsfAS}\n}", "github": "", "project": "", "reviewers": "z6oP;stGG;mDTS", "site": "https://openreview.net/forum?id=KivNpBsfAS", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "3;3;3", "reproducibility": "", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-0890-7670;;0000-0001-9612-7134;0009-0000-2099-7766;0000-0003-4969-2055;", "linkedin": ";;iker-garc%C3%ADa-ferrero-75343b172/;juletxara;oier-lopez-de-lacalle-4044a36/;", "aff_unique_index": "0;0;1;0;2;0", "aff_unique_norm": "University of the Basque Country;University of Pennsylvania;Universidad del Pa\u00eds Vasco", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ehu.eus/en;https://www.upenn.edu;https://www.ehu.eus/en", "aff_unique_abbr": "UPV/EHU;UPenn;UPV/EHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "Spain;United States" }, { "id": "Kjs0mpGJwb", "title": "A Structure-Aware Generative Adversarial Network for Bilingual Lexicon Induction", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Bilingual lexicon induction (BLI) is the task of inducing word translations with a learned mapping function that aligns monolingual word embedding spaces in two different languages. However, most previous methods treat word embeddings as isolated entities and fail to jointly consider both the intra-space and inter-space topological relations between words. This limitation makes it challenging to align words from embedding spaces with distinct topological structures, especially when the assumption of isomorphism may not hold. To this end, we propose a novel approach called the Structure-Aware Generative Adversarial Network (SA-GAN) model to explicitly capture multiple topological structure information to achieve accurate BLI. Our model first incorporates two lightweight graph convolutional networks (GCNs) to leverage intra-space topological correlations between words for generating source and target embeddings. We then employ a GAN model to explore inter-space topological structures by learning a global mapping function that initially maps the source embeddings to the target embedding space. To further align the coarse-grained structures, we develop a pair-wised local mapping (PLM) strategy that enables word-specific transformations in an unsupervised manner. Extensive experiments conducted on public datasets, including languages with both distant and close etymological relationships, demonstrate the effectiveness of our proposed SA-GAN model.", "keywords": "Cross-lingual word embedding;unsupervised;low isomorphic;bilingual lexicon induction", "primary_area": "", "supplementary_material": "", "author": "Bocheng Han;Qian Tao;Lusi Li;Zhihao Xiong", "authorids": "~Bocheng_Han1;~Qian_Tao5;~Lusi_Li1;~Zhihao_Xiong1", "gender": ";F;M;M", "homepage": ";https://sites.google.com/view/lusili/?pli=1;;", "dblp": ";222/9328;;", "google_scholar": ";RxQfQvcAAAAJ;;", "or_profile": "~Qian_Tao5;~Lusi_Li1;~Zhihao_Xiong1;~Han_Bocheng1", "aff": "South China University of Technology;Old Dominion University;;South China University of Technology", "aff_domain": "scut.edu.cn;odu.edu;;scut.edu.cn", "position": "Associate Professor;Assistant Professor;;MS student", "bibtex": "@inproceedings{\nhan2023a,\ntitle={A Structure-Aware Generative Adversarial Network for Bilingual Lexicon Induction},\nauthor={Bocheng Han and Qian Tao and Lusi Li and Zhihao Xiong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Kjs0mpGJwb}\n}", "github": "", "project": "", "reviewers": "ggx5;vh2V;qVkS", "site": "https://openreview.net/forum?id=Kjs0mpGJwb", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;3;3", "reproducibility": "3;4;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-7313-2109;0000-0002-4323-2632;0000-0002-4572-7751;0000-0002-4595-0419", "linkedin": ";;;", "aff_unique_index": "0;1;0", "aff_unique_norm": "South China University of Technology;Old Dominion University", "aff_unique_dep": ";", "aff_unique_url": "https://www.scut.edu.cn;https://www.odu.edu", "aff_unique_abbr": "SCUT;ODU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "id": "KkHY1WGDII", "title": "Grammar-Constrained Decoding for Structured NLP Tasks without Finetuning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Despite their impressive performance, large language models (LMs) still\nstruggle with reliably generating complex output structures when not finetuned\nto follow the required output format exactly. To address this issue,\ngrammar-constrained decoding (GCD) can be used to control the generation of\nLMs, guaranteeing that the output follows a given structure. Most existing GCD\nmethods are, however, limited to specific tasks, such as parsing or code\ngeneration. In this work, we demonstrate that formal grammars can describe the\noutput space for a much wider range of tasks and argue that GCD can serve as a\nunified framework for structured NLP tasks in general. For increased\nflexibility, we introduce input-dependent grammars, which allow the grammar to\ndepend on the input and thus enable the generation of different output\nstructures for different inputs. We then empirically demonstrate the power and\nflexibility of GCD-enhanced LMs on (1) information extraction, (2) entity\ndisambiguation, and (3) constituency parsing. Our results indicate that\ngrammar-constrained LMs substantially outperform unconstrained LMs or even beat\ntask-specific finetuned models. Grammar constraints thus hold great promise for\nharnessing off-the-shelf LMs for a wide range of structured NLP tasks,\nespecially where training data is scarce or finetuning is expensive. Code and\ndata: https://github.com/epfl-dlab/GCD.", "keywords": "Grammar-Constrained Decoding;Large Language Model;LLM;Structured NLP;Information Extraction;Entity Disambiguation", "primary_area": "", "supplementary_material": "", "author": "Saibo Geng;Martin Josifoski;Maxime Peyrard;Robert West", "authorids": "~Saibo_Geng1;~Martin_Josifoski1;~Maxime_Peyrard2;~Robert_West1", "gender": "M;M;M;M", "homepage": "https://saibo-creator.github.io;;https://peyrardm.github.io;https://dlab.epfl.ch/people/west/", "dblp": "280/3053;234/6886.html;184/3721;20/7441-1", "google_scholar": "Y194NbQAAAAJ;XpzKdlkAAAAJ;RFMdKLMAAAAJ;ZiFn598AAAAJ", "or_profile": "~Saibo_Geng1;~Martin_Josifoski1;~Maxime_Peyrard2;~Robert_West1", "aff": "EPFL - EPF Lausanne;Swiss Federal Institute of Technology Lausanne;Swiss Federal Institute of Technology Lausanne;EPFL - EPF Lausanne", "aff_domain": "epfl.ch;epfl.ch;epfl.ch;epfl.ch", "position": "PhD student;PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\ngeng2023grammarconstrained,\ntitle={Grammar-Constrained Decoding for Structured {NLP} Tasks without Finetuning},\nauthor={Saibo Geng and Martin Josifoski and Maxime Peyrard and Robert West},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=KkHY1WGDII}\n}", "github": "", "project": "", "reviewers": "N4GZ;NQmR;w4y4;3Wr2", "site": "https://openreview.net/forum?id=KkHY1WGDII", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "2;4;3;4", "excitement": "4;4;3;4", "reproducibility": "4;4;4;4", "correctness": "4;3;3;4", "rating_avg": 4.0, "confidence_avg": 3.25, "excitement_avg": 3.75, "reproducibility_avg": 4.0, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";martin-josifoski-56b395104/;;", "aff_unique_index": "0;1;1;0", "aff_unique_norm": "EPFL;Swiss Federal Institute of Technology Lausanne", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch", "aff_unique_abbr": "EPFL;EPFL", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "id": "KkR8wahYQN", "title": "FaMeSumm: Investigating and Improving Faithfulness of Medical Summarization", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Summaries of medical text shall be faithful by being consistent and factual with source inputs, which is an important but understudied topic for safety and efficiency in healthcare. In this paper, we investigate and improve faithfulness in summarization on a broad range of medical summarization tasks. Our investigation reveals that current summarization models often produce unfaithful outputs for medical input text. We then introduce FaMeSumm, a framework to improve faithfulness by fine-tuning pre-trained language models based on medical knowledge. FaMeSumm performs contrastive learning on designed sets of faithful and unfaithful summaries, and it incorporates medical terms and their contexts to encourage faithful generation of medical terms. We conduct comprehensive experiments on three datasets in two languages: health question and radiology report summarization datasets in English, and a patient-doctor dialogue dataset in Chinese. Results demonstrate that FaMeSumm is flexible and effective by delivering consistent improvements over mainstream language models such as BART, T5, mT5, and PEGASUS, yielding state-of-the-art performances on metrics for faithfulness and general quality. Human evaluation by doctors also shows that FaMeSumm generates more faithful outputs. Our code is available at https://github.com/psunlpgroup/FaMeSumm.", "keywords": "Medical Summarization;Factuality;Faithful Summarization;Abstractive Summarization", "primary_area": "", "supplementary_material": "", "author": "Nan Zhang;Yusen Zhang;Wu Guo;Prasenjit Mitra;Rui Zhang", "authorids": "~Nan_Zhang9;~Yusen_Zhang1;~Wu_Guo3;~Prasenjit_Mitra1;~Rui_Zhang7", "gender": "M;M;;M;M", "homepage": "https://zn1010.github.io;https://www.yuszh.com;;http://www.personal.psu.edu/pum10/;https://ryanzhumich.github.io/", "dblp": ";38/10863-1.html;;19/3308;60/2536-37", "google_scholar": "PDuBGKYAAAAJ;FGyMx88AAAAJ;;8PbgiPkAAAAJ;nhuB5CEAAAAJ", "or_profile": "~Nan_Zhang9;~Yusen_Zhang1;~Wu_Guo3;~Prasenjit_Mitra1;~Rui_Zhang7", "aff": "Pennsylvania State University;Pennsylvania State University;;Pennsylvania State University;Pennsylvania State University", "aff_domain": "psu.edu;psu.edu;;psu.edu;psu.edu", "position": "PhD student;PhD student;;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023famesumm,\ntitle={FaMeSumm: Investigating and Improving Faithfulness of Medical Summarization},\nauthor={Nan Zhang and Yusen Zhang and Wu Guo and Prasenjit Mitra and Rui Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=KkR8wahYQN}\n}", "github": "", "project": "", "reviewers": "UEQF;fbJn;Ptjw;BQfn", "site": "https://openreview.net/forum?id=KkR8wahYQN", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;5;3;4", "excitement": "4;3;4;3", "reproducibility": "3;4;4;5", "correctness": "4;3;4;3", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 3.5, "reproducibility_avg": 4.0, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "nan-zhang-2ba83795/;;;prasenjit-mitra-962471/;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Pennsylvania State University", "aff_unique_dep": "", "aff_unique_url": "https://www.psu.edu", "aff_unique_abbr": "PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "KxGI7hLxAo", "title": "Mind the Gap Between Conversations for Improved Long-Term Dialogue Generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Knowing how to end and resume conversations over time is a natural part of communication, allowing for discussions to span weeks, months, or years. The duration of gaps between conversations dictates which topics are relevant and which questions to ask, and dialogue systems which do not explicitly model time may generate responses that are unnatural. In this work we explore the idea of making dialogue models aware of time, and present GapChat, a multi-session dialogue dataset in which the time between each session varies. While the dataset is constructed in real-time, progress on events in speakers' lives is simulated in order to create realistic dialogues occurring across a long timespan. We expose time information to the model and compare different representations of time and event progress. In human evaluation we show that time-aware models perform better in metrics that judge the relevance of the chosen topics and the information gained from the conversation.", "keywords": "Dialogue system;time-aware dialogue model;long-term conversation generation;multi-session dialogue", "primary_area": "", "supplementary_material": "", "author": "Qiang Zhang;Jason Naradowsky;Yusuke Miyao", "authorids": "~Qiang_Zhang15;~Jason_Naradowsky2;~Yusuke_Miyao2", "gender": "M;M;M", "homepage": "https://akitohisano.wixsite.com/ultraprofile;http://narad.github.io;https://mynlp.is.s.u-tokyo.ac.jp/en/", "dblp": ";47/7442;34/467.html", "google_scholar": "m_Qqt6MAAAAJ;w4d5WRcAAAAJ;", "or_profile": "~Qiang_Zhang15;~Jason_Naradowsky2;~Yusuke_Miyao2", "aff": "Tokyo University;The University of Tokyo;The University of Tokyo", "aff_domain": "u-tokyo.ac.jp;u-tokyo.ac.jp;u-tokyo.ac.jp", "position": "PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nzhang2023mind,\ntitle={Mind the Gap Between Conversations for Improved Long-Term Dialogue Generation},\nauthor={Qiang Zhang and Jason Naradowsky and Yusuke Miyao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=KxGI7hLxAo}\n}", "github": "", "project": "", "reviewers": "hTeo;b1Jg;j5Mt", "site": "https://openreview.net/forum?id=KxGI7hLxAo", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;3;4", "excitement": "4;4;4", "reproducibility": "3;5;5", "correctness": "3;4;4", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0006-0303-534X;;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Tokyo", "aff_unique_dep": "", "aff_unique_url": "https://www.u-tokyo.ac.jp", "aff_unique_abbr": "UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "id": "L0SEfyrLsW", "title": "SELFOOD: Self-Supervised Out-Of-Distribution Detection via Learning to Rank", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Deep neural classifiers trained with cross-entropy loss (CE loss) often suffer from poor calibration, necessitating the task of out-of-distribution (OOD) detection. Traditional supervised OOD detection methods require expensive manual annotation of in-distribution and OOD samples. To address the annotation bottleneck, we introduce SELFOOD, a self-supervised OOD detection method that requires only in-distribution samples as supervision. We cast OOD detection as an inter-document intra-label (IDIL) ranking problem and train the classifier with our pairwise ranking loss, referred to as IDIL loss. Specifically, given a set of in-distribution documents and their labels, for each label, we train the classifier to rank the softmax scores of documents belonging to that label to be higher than the scores of documents that belong to other labels. Unlike CE loss, our IDIL loss function reaches zero when the desired confidence ranking is achieved and gradients are backpropagated to decrease probabilities associated with incorrect labels rather than continuously increasing the probability of the correct label. Extensive experiments with several classifiers on multiple classification datasets demonstrate the effectiveness of our method in both coarse- and fine-grained settings.", "keywords": "OOD detection;self-supervised;ranking", "primary_area": "", "supplementary_material": "", "author": "Dheeraj Mekala;Adithya Samavedhi;Chengyu Dong;Jingbo Shang", "authorids": "~Dheeraj_Mekala1;~Adithya_Samavedhi2;~Chengyu_Dong1;~Jingbo_Shang2", "gender": "M;M;;M", "homepage": "https://dheeraj7596.github.io/;;https://www.chengyu-dong.me/;https://shangjingbo1226.github.io/", "dblp": "192/1233;;14/3155;151/3145.html", "google_scholar": "QdE5rgkAAAAJ;HvXfmuUAAAAJ;Ppfi7j0AAAAJ;0SkFI4MAAAAJ", "or_profile": "~Dheeraj_Mekala1;~Adithya_Samavedhi2;~Chengyu_Dong1;~Jingbo_Shang2", "aff": "University of California, San Diego;University of California, San Diego;University of California, San Diego;University of California, San Diego", "aff_domain": "ucsd.edu;ucsd.edu;ucsd.edu;ucsd.edu", "position": "PhD student;MS student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nmekala2023selfood,\ntitle={{SELFOOD}: Self-Supervised Out-Of-Distribution Detection via Learning to Rank},\nauthor={Dheeraj Mekala and Adithya Samavedhi and Chengyu Dong and Jingbo Shang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=L0SEfyrLsW}\n}", "github": "", "project": "", "reviewers": "bvkd;At17;6CvH;MNGK;NgSb", "site": "https://openreview.net/forum?id=L0SEfyrLsW", "pdf_size": 0, "rating": "2;2;2;2;2", "confidence": "3;3;3;4;3", "excitement": "2;3;4;4;3", "reproducibility": "4;3;4;4;4", "correctness": "3;2;3;3;4", "rating_avg": 2.0, "confidence_avg": 3.2, "excitement_avg": 3.2, "reproducibility_avg": 3.8, "correctness_avg": 3.0, "replies_avg": 16, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "dheeraj7596/;;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "L0u9Dkito7", "title": "Image and Text: Fighting the same Battle? Super Resolution Learning for Imbalanced Text Classification", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In this paper, we propose SRL4NLP, a new approach for data augmentation by drawing an analogy between image and text processing: Super-resolution learning. This method is based on using high-resolution images to overcome the problem of low resolution images. While this technique is a common usage in image processing when images have a low resolution or are too noisy, it has never been used in NLP. We therefore propose the first adaptation of this method for text classification and evaluate its effectiveness on urgency detection from tweets posted in crisis situations, a very challenging task where messages are scarce and highly imbalanced. We show that this strategy is efficient when compared to competitive state-of-the-art data augmentation techniques on several benchmarks datasets in two languages.", "keywords": "Data Augmentation;Social media;Crisis management", "primary_area": "", "supplementary_material": "", "author": "Romain Meunier;Benamara Farah;V\u00e9ronique Moriceau;Patricia Stolf", "authorids": "~Romain_Meunier1;~Benamara_Farah1;~V\u00e9ronique_Moriceau1;~Patricia_Stolf1", "gender": "M;F;F;F", "homepage": ";https://www.irit.fr/~Farah.Benamara/;https://www.irit.fr/~Veronique.Moriceau/;https://www.irit.fr/~Patricia.Stolf/", "dblp": ";63/1299.html;42/6383;", "google_scholar": ";0TeXcVMAAAAJ;https://scholar.google.com/citations?hl=fr;", "or_profile": "~Romain_Meunier1;~Benamara_Farah1;~V\u00e9ronique_Moriceau1;~Patricia_Stolf1", "aff": "IRIT;CNRS@CREATE;IRIT, universit\u00e9 de Toulouse;Universit\u00e9 Toulouse Jean Jaur\u00e8s - IRIT", "aff_domain": "irit.fr;cnrsatcreate.sg;irit.fr;irit.fr", "position": "PhD student;Full Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nmeunier2023image,\ntitle={Image and Text: Fighting the same Battle? Super Resolution Learning for Imbalanced Text Classification},\nauthor={Romain Meunier and Benamara Farah and V{\\'e}ronique Moriceau and Patricia Stolf},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=L0u9Dkito7}\n}", "github": "", "project": "", "reviewers": "SHwF;p7BT;tS2X", "site": "https://openreview.net/forum?id=L0u9Dkito7", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;3;2", "reproducibility": "4;4;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "romain-meunier-b04a041a0/;;;", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Institut de Recherche en Informatique de Toulouse;CNRS;Universit\u00e9 de Toulouse;Universit\u00e9 Toulouse Jean Jaur\u00e8s", "aff_unique_dep": ";CREATE;Institut de Recherche en Informatique de Toulouse (IRIT);Institut de Recherche en Informatique de Toulouse (IRIT)", "aff_unique_url": "https://www.irit.fr;https://www.cnrs.fr;https://www.univ-toulouse.fr;https://www.univ-tlse3.fr", "aff_unique_abbr": "IRIT;CNRS;UT;UT3", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Toulouse", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "id": "L4yVLb6cLu", "title": "Hi-ToM: A Benchmark for Evaluating Higher-Order Theory of Mind Reasoning in Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Theory of Mind (ToM) is the ability to reason about one's own and others' mental states. ToM plays a critical role in the development of intelligence, language understanding, and cognitive processes. While previous work has primarily focused on first and second-order ToM, we explore higher-order ToM, which involves recursive reasoning on others' beliefs. %We also incorporate a new deception mechanism in ToM reasoning. We introduce Hi-ToM, a Higher Order Theory of Mind benchmark. Our experimental evaluation using various Large Language Models (LLMs) indicates a decline in performance on higher-order ToM tasks, demonstrating the limitations of current LLMs. We conduct a thorough analysis of different failure cases of LLMs, and share our thoughts on the implications of our findings on the future of NLP.", "keywords": "Higher Order Theory of Mind;Chain of Thought Prompting;Large Language Models;Deception", "primary_area": "", "supplementary_material": "", "author": "Yufan Wu;Yinghui He;Yilin Jia;Rada Mihalcea;Yulong Chen;Naihao Deng", "authorids": "~Yufan_Wu1;~Yinghui_He1;~Yilin_Jia1;~Rada_Mihalcea1;~Yulong_Chen2;~Naihao_Deng1", "gender": "M;F;M;F;M;M", "homepage": ";https://ying-hui-he.github.io/;;https://web.eecs.umich.edu/~mihalcea/;https://cylnlp.github.io/;https://dnaihao.github.io", "dblp": ";;;m/RadaMihalcea;157/4604-1;303/0640", "google_scholar": ";https://scholar.google.com/citations?hl=en;;https://scholar.google.com.tw/citations?user=UetM7FgAAAAJ;8P23zSkAAAAJ;3_qUtH4AAAAJ", "or_profile": "~Yufan_Wu1;~Yinghui_He1;~Yilin_Jia1;~Rada_Mihalcea1;~Yulong_Chen2;~Naihao_Deng1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;;University of Michigan;Westlake University;University of Michigan - Ann Arbor", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;;umich.edu;westlake.edu.cn;umich.edu", "position": "Undergrad student;Undergrad student;;Full Professor;PhD student;PhD student", "bibtex": "@inproceedings{\nwu2023hitom,\ntitle={Hi-ToM: A Benchmark for Evaluating Higher-Order Theory of Mind Reasoning in Large Language Models},\nauthor={Yufan Wu and Yinghui He and Yilin Jia and Rada Mihalcea and Yulong Chen and Naihao Deng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=L4yVLb6cLu}\n}", "github": "", "project": "", "reviewers": "ZmFn;Trb7;YSgP", "site": "https://openreview.net/forum?id=L4yVLb6cLu", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;3;3", "reproducibility": "3;4;5", "correctness": "3;4;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-0767-6703;;0000-0003-0294-2897", "linkedin": "yufan-wu-a27b6b24b;yinghui-he-8b147321a/;yilin-jia-1277a1250/;;;naihao-deng/", "aff_unique_index": "0;0;1;2;1", "aff_unique_norm": "Shanghai Jiao Tong University;University of Michigan;Westlake University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.umich.edu;https://www.westlake.edu.cn", "aff_unique_abbr": "SJTU;UM;WU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;0;1;0;1", "aff_country_unique": "China;United States" }, { "id": "L7IW2foTq4", "title": "Attention-Enhancing Backdoor Attacks Against BERT-based Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recent studies have revealed that Backdoor Attacks can threaten the safety of natural language processing (NLP) models. Investigating the strategies of backdoor attacks will help to understand the model's vulnerability. \nMost existing textual backdoor attacks focus on generating stealthy triggers or modifying model weights. In this paper, we directly target the interior structure of neural networks and the backdoor mechanism. We propose a novel Trojan Attention Loss (TAL), which enhances the Trojan behavior by directly manipulating the attention patterns. Our loss can be applied to different attacking methods to boost their attack efficacy in terms of attack successful rates and poisoning rates. It applies to not only traditional dirty-label attacks, but also the more challenging clean-label attacks. We validate our method on different backbone models (BERT, RoBERTa, and DistilBERT) and various tasks (Sentiment Analysis, Toxic Detection, and Topic Classification).", "keywords": "Backdoor Attack;BERT;Attention Loss;natural language processing", "primary_area": "", "supplementary_material": "", "author": "Weimin Lyu;Songzhu Zheng;Lu Pang;Haibin Ling;Chao Chen", "authorids": "~Weimin_Lyu1;~Songzhu_Zheng1;~Lu_Pang2;~Haibin_Ling1;~Chao_Chen1", "gender": "M;M;;M;M", "homepage": "https://weimin17.github.io/;;;https://www3.cs.stonybrook.edu/~hling/;https://chaochen.github.io/", "dblp": "241/6097;226/4925;191/4669-1;93/3488;66/3019-12", "google_scholar": "IVed47cAAAAJ;vq0hpV4AAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;J-iIIFAAAAAJ", "or_profile": "~Weimin_Lyu1;~Songzhu_Zheng1;~Lu_Pang2;~Haibin_Ling1;~Chao_Chen1", "aff": "State University of New York at Stony Brook;Morgan Stanley;State University of New York at Stony Brook;State University of New York, Stony Brook;State University of New York, Stony Brook", "aff_domain": "stonybrook.edu;morganstanley.com;stonybrook.edu;stonybrook.edu;stonybrook.edu", "position": "PhD student;Researcher;PhD student;Professor;Assistant Professor", "bibtex": "@inproceedings{\nlyu2023attentionenhancing,\ntitle={Attention-Enhancing Backdoor Attacks Against {BERT}-based Models},\nauthor={Weimin Lyu and Songzhu Zheng and Lu Pang and Haibin Ling and Chao Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=L7IW2foTq4}\n}", "github": "", "project": "", "reviewers": "7i2G;QosW;LMRb", "site": "https://openreview.net/forum?id=L7IW2foTq4", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "3;3;3", "reproducibility": "4;4;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0003-1703-6483", "linkedin": ";;;;", "aff_unique_index": "0;1;0;2;2", "aff_unique_norm": "State University of New York at Stony Brook;Morgan Stanley;State University of New York", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stonybrook.edu;https://www.morganstanley.com;https://www.stonybrook.edu", "aff_unique_abbr": "SUNY Stony Brook;Morgan Stanley;SUNY Stony Brook", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stony Brook;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "L7YoWxQq5t", "title": "Program Translation via Code Distillation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Software version migration and program translation are an important and costly part of the lifecycle of large codebases. Traditional machine translation relies on parallel corpora for supervised translation, which is not feasible for program translation due to a dearth of aligned data. Recent unsupervised neural machine translation techniques have overcome data limitations by included techniques such as back translation and low level compiler intermediate representations (IR). These methods face significant challenges due to the noise in code snippet alignment and the diversity of IRs respectively. In this paper we propose a novel model called Code Distillation (CoDist) whereby we capture the semantic and structural equivalence of code in a language agnostic intermediate representation. Distilled code serves as a translation pivot for any programming language, leading by construction to parallel corpora which scale to all available source code by simply applying the distillation compiler. We demonstrate that our approach achieves state-of-the-art performance on CodeXGLUE and TransCoder GeeksForGeeks translation benchmarks, with an average absolute increase of 12.7% on the TransCoder GeeksforGeeks translation benchmark compare to TransCoder-ST.", "keywords": "Program Translation;Intermediate Representations;Neural Machine Translation;Multilingual Code Generation;Pre-training", "primary_area": "", "supplementary_material": "", "author": "Yufan Huang;Mengnan Qi;Yongqiang Yao;Maoquan Wang;Bin Gu;Colin Clement;Neel Sundaresan", "authorids": "~Yufan_Huang3;~Mengnan_Qi2;~Yongqiang_Yao2;~Maoquan_Wang2;~Bin_Gu1;~Colin_Clement1;~Neel_Sundaresan3", "gender": "M;M;M;M;M;;", "homepage": "https://www.microsoft.com/en-us/research/people/yufanhuang/;https://github.com/Mnangua;https://github.com/yongqiang-yao;https://github.com/ms-maoquan;https://mbzuai.ac.ae/study/faculty/bin-gu/;https://cbclement.com;https://www.linkedin.com/in/neel-sundaresan-a964a2/", "dblp": ";305/9760.html;;;29/1758-1;;s/NeelSundaresan.html", "google_scholar": ";;;;Vo8OgCgAAAAJ;J2aZLEYAAAAJ;", "or_profile": "~Yufan_Huang3;~Mengnan_Qi2;~Yongqiang_Yao2;~Maoquan_Wang2;~Bin_Gu1;~Colin_Clement1;~Neel_Sundaresan3", "aff": ";;Microsoft;Microsoft;Mohamed bin Zayed University of Artificial Intelligence;Microsoft;University of California, Santa Cruz", "aff_domain": ";;microsoft.com;microsoft.com;mbzuai.ac.ae;microsoft.com;ucsc.edu", "position": ";;Researcher;Researcher;Assistant Professor;Senior Research Manager;Full Professor (adjunct)", "bibtex": "@inproceedings{\nhuang2023program,\ntitle={Program Translation via Code Distillation},\nauthor={Yufan Huang and Mengnan Qi and Yongqiang Yao and Maoquan Wang and Bin Gu and Colin Clement and Neel Sundaresan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=L7YoWxQq5t}\n}", "github": "", "project": "", "reviewers": "soRb;KDdW;U8tt", "site": "https://openreview.net/forum?id=L7YoWxQq5t", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;2", "excitement": "4;4;4", "reproducibility": "3;4;3", "correctness": "3;4;3", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0001-6049-1815;0000-0002-3727-7308;", "linkedin": ";;;maoquan-wang-0917b520a/;;colin-b-clement/;neel-sundaresan-a964a2/", "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Microsoft;Mohamed bin Zayed University of Artificial Intelligence;University of California, Santa Cruz", "aff_unique_dep": "Microsoft Corporation;;", "aff_unique_url": "https://www.microsoft.com;https://mbzuai.ac.ae;https://www.ucsc.edu", "aff_unique_abbr": "Microsoft;MBZUAI;UCSC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Cruz", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;United Arab Emirates" }, { "id": "L7ZBpZZ8Va", "title": "Orthogonal Subspace Learning for Language Model Continual Learning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Benefiting from massive corpora and advanced hardware, large language models (LLMs) exhibit remarkable capabilities in language understanding and generation. However, their performance degrades in scenarios where multiple tasks are encountered sequentially, also known as catastrophic forgetting. In this paper, we propose orthogonal low-rank adaptation (O-LoRA), a simple and efficient approach for continual learning in language models, effectively mitigating catastrophic forgetting while learning new tasks. Specifically, O-LoRA learns tasks in different (low-rank) vector subspaces that are kept orthogonal to each other in order to minimize interference. Our method induces only marginal additional parameter costs and requires no user data storage for replay. Experimental results on continual learning benchmarks show that our method outperforms state-of-the-art methods. Furthermore, compared to previous approaches, our method excels in preserving the generalization ability of LLMs on unseen tasks.", "keywords": "continual learning;orthogonal subspace;paramemter efficient tuning", "primary_area": "", "supplementary_material": "", "author": "Xiao Wang;Tianze Chen;Qiming Ge;Han Xia;Rong Bao;Rui Zheng;Qi Zhang;Tao Gui;Xuanjing Huang", "authorids": "~Xiao_Wang12;~Tianze_Chen1;~Qiming_Ge2;~Han_Xia1;~Rong_Bao1;~Rui_Zheng1;~Qi_Zhang8;~Tao_Gui1;~Xuanjing_Huang1", "gender": "M;M;M;M;M;M;M;M;F", "homepage": "https://xiaowangnlp.github.io/;https://www.pixiv.net/users/55094416;https://icesolitary.github.io/;https://waltersumbon.github.io/;https://github.com/rbao2018;https://github.com/ruizheng20;http://qizhang.info;;https://xuanjing-huang.github.io/", "dblp": ";;;;214/6451;;52/323-1;135/6973;05/6735-1", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;;;;teGqP3kAAAAJ;https://scholar.google.com.hk/citations?user=7Z0V_SoAAAAJ;XfqR3yYAAAAJ;;RGsMgZA4H78C", "or_profile": "~Xiao_Wang12;~Tianze_Chen1;~Qiming_Ge2;~Han_Xia1;~Rong_Bao1;~Rui_Zheng1;~Qi_Zhang8;~Tao_Gui1;~Xuanjing_Huang1", "aff": "Fudan University;;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "PhD student;;MS student;MS student;PhD student;PhD student;Full Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nwang2023orthogonal,\ntitle={Orthogonal Subspace Learning for Language Model Continual Learning},\nauthor={Xiao Wang and Tianze Chen and Qiming Ge and Han Xia and Rong Bao and Rui Zheng and Qi Zhang and Tao Gui and Xuanjing Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=L7ZBpZZ8Va}\n}", "github": "", "project": "", "reviewers": "7Yor;GrPc;jxjZ;PwhU", "site": "https://openreview.net/forum?id=L7ZBpZZ8Va", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;5;5", "excitement": "4;3;2;4", "reproducibility": "4;3;2;3", "correctness": "4;2;2;4", "rating_avg": 3.0, "confidence_avg": 4.5, "excitement_avg": 3.25, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;;0000-0001-9197-9426", "linkedin": ";;;;;;;;", "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "L8Cxea5krb", "title": "BERTie Bott's Every Flavor Labels: A Tasty Introduction to Semantic Role Labeling for Galician", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In this paper, we leverage existing corpora, WordNet, and dependency parsing to build the first Galician dataset for training semantic role labeling systems in an effort to expand available NLP resources. Additionally, we introduce verb indexing, a new pre-processing method, which helps increase the performance when semantically parsing highly-complex sentences. We use transfer-learning to test both the resource and the verb indexing method. Our results show that the effects of verb indexing were amplified in scenarios where the model was both pre-trained and fine-tuned on datasets utilizing the method, but improvements are also noticeable when only used during fine-tuning. The best-performing Galician SRL model achieved an f1 score of 0.74, introducing a baseline for future Galician SRL systems. We also tested our method on Spanish where we achieved an f1 score of 0.83, outperforming the baseline set by the 2009 CoNLL Shared Task by 0.025 showing the merits of our verb indexing method for pre-processing.", "keywords": "semantic role labeling;semantic parsing;Galician;Spanish;srl", "primary_area": "", "supplementary_material": "", "author": "Micaella Bruton;Meriem Beloucif", "authorids": "~Micaella_Bruton1;~Meriem_Beloucif1", "gender": "Not Specified;", "homepage": ";https://www.inf.uni-hamburg.de/en/inst/ab/lt/people/meriem-beloucif.html", "dblp": ";136/9157", "google_scholar": "USVol18AAAAJ;https://scholar.google.com.hk/citations?user=yRo5n7cAAAAJ", "or_profile": "~Micaella_Bruton1;~Meriem_Beloucif1", "aff": "Uppsala University;Uppsala University", "aff_domain": "uu.se;uu.se", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\nbruton2023bertie,\ntitle={{BERT}ie Bott's Every Flavor Labels: A Tasty Introduction to Semantic Role Labeling for Galician},\nauthor={Micaella Bruton and Meriem Beloucif},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=L8Cxea5krb}\n}", "github": "", "project": "", "reviewers": "Dkyz;Swd1;9UHD;x4WB", "site": "https://openreview.net/forum?id=L8Cxea5krb", "pdf_size": 0, "rating": "2;2;2;2", "confidence": "4;2;4;4", "excitement": "3;4;3;4", "reproducibility": "3;3;3;4", "correctness": "3;3;2;4", "rating_avg": 2.0, "confidence_avg": 3.5, "excitement_avg": 3.5, "reproducibility_avg": 3.25, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0006-5621-5440;", "linkedin": "micaellabruton/;", "aff_unique_index": "0;0", "aff_unique_norm": "Uppsala University", "aff_unique_dep": "", "aff_unique_url": "https://www.uu.se", "aff_unique_abbr": "UU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Sweden" }, { "id": "L8W6RyMRmL", "title": "Reduce Human Labor On Evaluating Conversational Information Retrieval System: A Human-Machine Collaboration Approach", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Evaluating conversational information retrieval (CIR) systems is a challenging task that requires a significant amount of human labor for annotation. It is imperative to invest significant effort into researching more labor-effective methods for evaluating CIR systems. To touch upon this challenge, we take the first step to involve active testing in CIR evaluation and propose a novel method, called HomCoE. It strategically selects a few data for human annotation, then calibrates the evaluation results to eliminate evaluation biases. As such, it makes an accurate evaluation of the CIR system at low human labor. We experimentally reveal that it consumes less than 1\\% of human labor and achieves a consistency rate of 95\\%-99\\% with human evaluation results. This emphasizes the superiority of our method over other baselines.", "keywords": "Interactive Evaluation;Human-Machine Collaboration;Conversational Information Retrieval", "primary_area": "", "supplementary_material": "", "author": "Chen Huang;Peixin Qin;Wenqiang Lei;Jiancheng Lv", "authorids": "~Chen_Huang7;~Peixin_Qin1;~Wenqiang_Lei1;~Jiancheng_Lv2", "gender": ";M;M;M", "homepage": ";https://github.com/mumen798;https://sites.google.com/view/wenqianghome/home;https://cs.scu.edu.cn/info/1303/13767.htm", "dblp": ";;167/9604;", "google_scholar": ";;https://scholar.google.com.hk/citations?user=qexdxuEAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Chen_Huang7;~Peixin_Qin1;~Wenqiang_Lei1;~Jiancheng_Lv2", "aff": ";Sichuan University;Sichuan University;Sichuan University", "aff_domain": ";scu.edu.cn;scu.edu.cn;scu.edu.cn", "position": ";Undergrad student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nhuang2023reduce,\ntitle={Reduce Human Labor On Evaluating Conversational Information Retrieval System: A Human-Machine Collaboration Approach},\nauthor={Chen Huang and Peixin Qin and Wenqiang Lei and Jiancheng Lv},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=L8W6RyMRmL}\n}", "github": "", "project": "", "reviewers": "ReMS;sHgf;sq7x;Hngp", "site": "https://openreview.net/forum?id=L8W6RyMRmL", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "1;2;3;3", "excitement": "4;3;3;4", "reproducibility": "3;3;2;4", "correctness": "4;3;4;4", "rating_avg": 4.0, "confidence_avg": 2.25, "excitement_avg": 3.5, "reproducibility_avg": 3.0, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Sichuan University", "aff_unique_dep": "", "aff_unique_url": "https://www.scu.edu.cn", "aff_unique_abbr": "SCU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "LCEbV5nsb8", "title": "SummIt: Iterative Text Summarization via ChatGPT", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Existing text summarization systems have made significant progress in recent years, but typically generate summaries in a single step. The one-shot summarization setting is sometimes inadequate, however, as the generated summary may contain hallucinations or overlook important details related to the reader's interests. In this paper, we address this limitation by proposing SummIt, an iterative text summarization framework based on large language models like ChatGPT. \nOur framework enables the model to refine the generated summary iteratively through self-evaluation and feedback, closely resembling the iterative process humans undertake when drafting and revising summaries. Furthermore, we explore the potential benefits of integrating knowledge and topic extractors into the framework to enhance summary faithfulness and controllability. We evaluate the performance of our framework on three benchmark summarization datasets through empirical and qualitative analyses. We also conduct a human evaluation to validate the effectiveness of the model's refinements and find a potential issue of over-correction.", "keywords": "summarization;large language model;text editing", "primary_area": "", "supplementary_material": "", "author": "Haopeng Zhang;Xiao Liu;Jiawei Zhang", "authorids": "~Haopeng_Zhang3;~Xiao_Liu22;~Jiawei_Zhang3", "gender": "M;M;", "homepage": "https://hpzhang94.github.io/;https://haroldliuj.github.io;http://jiaweizhang.net/", "dblp": "256/5136;82/1364-34;10/239-1", "google_scholar": "https://scholar.google.com/citations?hl=en;E97kG9IAAAAJ;7AkZSJsAAAAJ", "or_profile": "~Haopeng_Zhang3;~Xiao_Liu22;~Jiawei_Zhang3", "aff": "University of California, Davis;University of California, Davis;University of California, Davis", "aff_domain": "ucdavis.edu;ucdavis.edu;ucdavis.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nzhang2023summit,\ntitle={SummIt: Iterative Text Summarization via Chat{GPT}},\nauthor={Haopeng Zhang and Xiao Liu and Jiawei Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=LCEbV5nsb8}\n}", "github": "", "project": "", "reviewers": "h2TX;Ceeh;dwUH", "site": "https://openreview.net/forum?id=LCEbV5nsb8", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;2;3", "excitement": "4;4;3", "reproducibility": "4;4;4", "correctness": "3;3;3", "rating_avg": 2.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-2111-7617", "linkedin": ";%E9%AA%81-%E5%88%98-2777101a1/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Davis", "aff_unique_dep": "", "aff_unique_url": "https://www.ucdavis.edu", "aff_unique_abbr": "UC Davis", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Davis", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "LDAgFeA55o", "title": "A Benchmark for Semi-Inductive Link Prediction in Knowledge Graphs", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Semi-inductive link prediction (LP) in knowledge graphs (KG) is the task of predicting facts for new, previously unseen entities based on context information. Although new entities can be integrated by retraining the model from scratch in principle, such an approach is infeasible for large-scale KGs, where retraining is expensive and new entities may arise frequently. In this paper, we propose and describe a large-scale benchmark to evaluate semi-inductive LP models. The benchmark is based on and extends Wikidata5M: It provides transductive, k-shot, and 0-shot LP tasks, each varying the available information from (i) only KG structure, to (ii) including textual mentions, and (iii) detailed descriptions of the entities. We report on a small study of recent approaches and found that semi-inductive LP performance is far from transductive performance on long-tail entities throughout all experiments. The benchmark provides a test bed for further research into integrating context and textual information in semi-inductive LP models.", "keywords": "semi-inductive;link prediction;knowledge graph;unseen entity", "primary_area": "", "supplementary_material": "", "author": "Adrian Kochsiek;Rainer Gemulla", "authorids": "~Adrian_Kochsiek1;~Rainer_Gemulla1", "gender": "M;M", "homepage": "https://www.uni-mannheim.de/dws/people/researchers/phd-students/adrian-kochsiek/;https://dws.informatik.uni-mannheim.de/en/people/professors/prof-dr-rainer-gemulla/", "dblp": "279/8734;32/5357", "google_scholar": ";https://scholar.google.de/citations?user=OnKo6KkAAAAJ", "or_profile": "~Adrian_Kochsiek1;~Rainer_Gemulla1", "aff": "Universit\u00e4t Mannheim;Universit\u00e4t Mannheim, Germany", "aff_domain": "uni-mannheim.de;uni-mannheim.de", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nkochsiek2023a,\ntitle={A Benchmark for Semi-Inductive Link Prediction in Knowledge Graphs},\nauthor={Adrian Kochsiek and Rainer Gemulla},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=LDAgFeA55o}\n}", "github": "", "project": "", "reviewers": "onsE;jB8R;WJjs;NEnw", "site": "https://openreview.net/forum?id=LDAgFeA55o", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;3;3;3", "excitement": "4;3;3;3", "reproducibility": "4;2;3;5", "correctness": "4;3;4;3", "rating_avg": 3.0, "confidence_avg": 3.25, "excitement_avg": 3.25, "reproducibility_avg": 3.5, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-2762-0050", "linkedin": ";", "aff_unique_index": "0;1", "aff_unique_norm": "University of Mannheim;Universit\u00e4t Mannheim", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-mannheim.de;https://www.uni-mannheim.de", "aff_unique_abbr": "UM;UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "LGX5hFWPK2", "title": "CoMPosT: Characterizing and Evaluating Caricature in LLM Simulations", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recent work has aimed to capture nuances of human behavior by using LLMs to simulate responses from particular demographics in settings like social science experiments and public opinion surveys. However, there are currently no established ways to discuss or evaluate the quality of such LLM simulations. Moreover, there is growing concern that these LLM simulations are flattened caricatures of the personas that they aim to simulate, failing to capture the multidimensionality of people and perpetuating stereotypes. To bridge these gaps, we present CoMPosT, a framework to characterize LLM simulations using four dimensions: Context, Model, Persona, and Topic. We use this framework to measure open-ended LLM simulations\u2019 susceptibility to caricature, defined via two criteria: individuation and exaggeration. We evaluate the level of caricature in scenarios from existing work on LLM simulations. We find that for GPT-4, simulations of certain demographics (political and marginalized groups) and topics (general, uncontroversial) are highly susceptible to caricature.", "keywords": "large language models;caricatures;language model simulations;survey replication", "primary_area": "", "supplementary_material": "", "author": "Myra Cheng;Tiziano Piccardi;Diyi Yang", "authorids": "~Myra_Cheng1;~Tiziano_Piccardi1;~Diyi_Yang2", "gender": ";;F", "homepage": "http://myracheng.github.io;https://piccardi.me/;https://cs.stanford.edu/~diyiy/", "dblp": "226/7067;144/5357;70/11145", "google_scholar": "gaslQl8AAAAJ;hj-gvXIAAAAJ;j9jhYqQAAAAJ", "or_profile": "~Myra_Cheng1;~Tiziano_Piccardi1;~Diyi_Yang2", "aff": "Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\ncheng2023compost,\ntitle={Co{MP}osT: Characterizing and Evaluating Caricature in {LLM} Simulations},\nauthor={Myra Cheng and Tiziano Piccardi and Diyi Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=LGX5hFWPK2}\n}", "github": "", "project": "", "reviewers": "2j88;M2EF;dXuy;ETgT", "site": "https://openreview.net/forum?id=LGX5hFWPK2", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;4;2;4", "excitement": "3;4;3;4", "reproducibility": "5;4;4;4", "correctness": "4;4;4;4", "rating_avg": 4.0, "confidence_avg": 3.5, "excitement_avg": 3.5, "reproducibility_avg": 4.25, "correctness_avg": 4.0, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "LNKGWaRtlE", "title": "Ecologically Valid Explanations for Label Variation in NLI", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Human label variation, or annotation disagreement, exists in many natural language processing (NLP) tasks, including natural language inference (NLI). To gain direct evidence of how NLI label variation arises, we build LiveNLI, an English dataset of 1,415 ecologically valid explanations (annotators explain the NLI labels they chose) for 122 MNLI items (at least 10 explanations per item). The LiveNLI explanations confirm that people can systematically vary on their interpretation and highlight within-label variation: annotators sometimes choose the same label for different reasons. This suggests that explanations are crucial for navigating label interpretations in general. We few-shot prompt large language models to generate explanations but the results are inconsistent: they sometimes produces valid and informative explanations, but it also generates implausible ones that do not support the label, highlighting directions for improvement.", "keywords": "annotation disagreement;explanation;interpretability;natural language inference;human label variation;textual inferences", "primary_area": "", "supplementary_material": "", "author": "Nan-Jiang Jiang;Chenhao Tan;Marie-Catherine de Marneffe", "authorids": "~Nan-Jiang_Jiang1;~Chenhao_Tan1;~Marie-Catherine_de_Marneffe1", "gender": "M;F;", "homepage": "https://chenhaot.com/;https://cental.uclouvain.be/team/mcdm/;https://njjiang.github.io", "dblp": "95/8314;36/6578;245/8631", "google_scholar": "https://scholar.google.com.tw/citations?user=KGMaP18AAAAJ;qBdkqYcAAAAJ;NLIHDucAAAAJ", "or_profile": "~Chenhao_Tan1;~Marie-Catherine_de_Marneffe1;~Nanjiang_Jiang1", "aff": "University of Chicago;The Ohio State University;Ohio State University", "aff_domain": "uchicago.edu;osu.edu;osu.edu", "position": "Assistant Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\njiang2023ecologically,\ntitle={Ecologically Valid Explanations for Label Variation in {NLI}},\nauthor={Nan-Jiang Jiang and Chenhao Tan and Marie-Catherine de Marneffe},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=LNKGWaRtlE}\n}", "github": "", "project": "", "reviewers": "SDvm;9znK;sZVS", "site": "https://openreview.net/forum?id=LNKGWaRtlE", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;4", "excitement": "2;4;3", "reproducibility": "4;4;4", "correctness": "2;4;3", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Chicago;Ohio State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uchicago.edu;https://www.osu.edu", "aff_unique_abbr": "UChicago;OSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "LPtO1evrGa", "title": "Prompting and Evaluating Large Language Models for Proactive Dialogues: Clarification, Target-guided, and Non-collaboration", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Conversational systems based on Large Language Models (LLMs), such as ChatGPT, show exceptional proficiency in context understanding and response generation. However, they still possess limitations, such as failing to ask clarifying questions to ambiguous queries or refuse users' unreasonable requests, both of which are considered as key aspects of a conversational agent's proactivity. This raises the question of whether LLM-based conversational systems are equipped to handle proactive dialogue problems. In this work, we conduct a comprehensive analysis of LLM-based conversational systems, specifically focusing on three key aspects of proactive dialogues: clarification, target-guided, and non-collaborative dialogues. To trigger the proactivity of LLMs, we propose the Proactive Chain-of-Thought prompting scheme, which augments LLMs with the goal planning capability over descriptive reasoning chains. Empirical findings are discussed to promote future studies on LLM-based proactive dialogue systems.", "keywords": "Proactive Dialogue;Asking Clarification Question;Target-guided Conversation", "primary_area": "", "supplementary_material": "", "author": "Yang Deng;Lizi Liao;Liang CHEN;Hongru WANG;Wenqiang Lei;Tat-Seng Chua", "authorids": "~Yang_Deng4;~Lizi_Liao1;~Liang_CHEN15;~Hongru_WANG1;~Wenqiang_Lei1;~Tat-Seng_Chua2", "gender": "M;F;M;M;M;M", "homepage": "https://dengyang17.github.io/;https://liziliao.github.io/;https://chanliang.github.io/;https://rulegreen.github.io/;https://sites.google.com/view/wenqianghome/home;http://www.comp.nus.edu.sg/~chuats/", "dblp": "115/6282-2;149/1249;;72/1462-3;167/9604;", "google_scholar": "https://scholar.google.com.hk/citations?user=OshWT3UAAAAJ;https://scholar.google.com.sg/citations?user=W2b08EUAAAAJ;0iatxnIAAAAJ;s6UtVYUAAAAJ;https://scholar.google.com.hk/citations?user=qexdxuEAAAAJ;https://scholar.google.com.tw/citations?user=Z9DWCBEAAAAJ", "or_profile": "~Yang_Deng4;~Lizi_Liao1;~Liang_CHEN15;~Hongru_WANG1;~Wenqiang_Lei1;~Tat-seng_Chua1", "aff": "The Chinese University of Hong Kong;Singapore Management University;Chinese University of Hong Kong, The Chinese University of Hong Kong;University of Edinburgh;Sichuan University;National University of Singapore", "aff_domain": "cuhk.edu.hk;smu.edu.sg;se.cuhk.edu.hk;ed.ac.uk;scu.edu.cn;nus.edu.sg", "position": "PhD student;Assistant Professor;PhD student;Visiting Student;Full Professor;Full Professor", "bibtex": "@inproceedings{\ndeng2023prompting,\ntitle={Prompting and Evaluating Large Language Models for Proactive Dialogues: Clarification, Target-guided, and Non-collaboration},\nauthor={Yang Deng and Lizi Liao and Liang CHEN and Hongru WANG and Wenqiang Lei and Tat-Seng Chua},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=LPtO1evrGa}\n}", "github": "", "project": "", "reviewers": "eUzu;e85V;NW9t", "site": "https://openreview.net/forum?id=LPtO1evrGa", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "4;3;3", "reproducibility": "3;2;4", "correctness": "3;2;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 9, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-5027-0138;;0000-0001-6097-7807", "linkedin": ";;;;;", "aff_unique_index": "0;1;0;2;3;4", "aff_unique_norm": "Chinese University of Hong Kong;Singapore Management University;University of Edinburgh;Sichuan University;National University of Singapore", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.smu.edu.sg;https://www.ed.ac.uk;https://www.scu.edu.cn;https://www.nus.edu.sg", "aff_unique_abbr": "CUHK;SMU;Edinburgh;SCU;NUS", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;0;2;0;1", "aff_country_unique": "China;Singapore;United Kingdom" }, { "id": "LQqlapYGeR", "title": "An Iteratively Parallel Generation Method with the Pre-Filling Strategy for Document-level Event Extraction", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In document-level event extraction (DEE) tasks, a document typically contains many event records with multiple event roles. Therefore, accurately extracting all event records is a big challenge since the number of event records is not given. Previous works present the entity-based directed acyclic graph (EDAG) generation methods to autoregressively generate event roles, which requires a given generation order. Meanwhile, parallel methods are proposed to generate all event roles simultaneously, but suffer from the inadequate training which manifests zero accuracies on some event roles. In this paper, we propose an Iteratively Parallel Generation method with the Pre-Filling strategy (IPGPF). Event roles in an event record are generated in parallel to avoid order selection, and the event records are iteratively generated to utilize historical results. Experiments on two public datasets show our IPGPF improves $11.7$ F1 than previous parallel models and up to $5.1$ F1 than auto-regressive models under the control variable settings. Moreover, our enhanced IPGPF outperforms other entity-enhanced models and achieves new state-of-the-art performance.", "keywords": "natural language processing;information extraction;document-level event extraction;parallel generation", "primary_area": "", "supplementary_material": "", "author": "Guanhua Huang;Runxin Xu;ying zeng;Jiaze Chen;Zhouwang Yang;Weinan E", "authorids": "~Guanhua_Huang1;~Runxin_Xu2;~ying_zeng4;~Jiaze_Chen1;~Zhouwang_Yang1;~Weinan_E2", "gender": "M;M;F;M;M;", "homepage": ";;;;;https://web.math.princeton.edu/~weinan/", "dblp": "88/9542;267/5291.html;;182/4496;;06/9390.html", "google_scholar": "SEgFVw0AAAAJ;dRp21l4AAAAJ;https://scholar.google.com/citations?hl=en;Vt1j3kEAAAAJ;;", "or_profile": "~Guanhua_Huang1;~Runxin_Xu2;~ying_zeng4;~Jiaze_Chen1;~Zhouwang_Yang1;~Weinan_E2", "aff": "University of Science and Technology of China;Peking University;;;University of Science and Technology of China;Peking University", "aff_domain": "mail.ustc.edu.cn;pku.edu.cn;;;ustc.edu.cn;pku.edu.cn", "position": "PhD student;MS student;;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nhuang2023an,\ntitle={An Iteratively Parallel Generation Method with the Pre-Filling Strategy for Document-level Event Extraction},\nauthor={Guanhua Huang and Runxin Xu and ying zeng and Jiaze Chen and Zhouwang Yang and Weinan E},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=LQqlapYGeR}\n}", "github": "", "project": "", "reviewers": "7svK;wn1f;wZ9b", "site": "https://openreview.net/forum?id=LQqlapYGeR", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;3;1", "excitement": "4;3;4", "reproducibility": "4;3;3", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 9, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-9454-9146;", "linkedin": ";;;;;", "aff_unique_index": "0;1;0;1", "aff_unique_norm": "University of Science and Technology of China;Peking University", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;http://www.pku.edu.cn", "aff_unique_abbr": "USTC;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "LRRThBBiov", "title": "PRESTO: A Multilingual Dataset for Parsing Realistic Task-Oriented Dialogs", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Research interest in task-oriented dialogs has increased as systems such as Google Assistant, Alexa and Siri have become ubiquitous in everyday life. However, the impact of academic research in this area has been limited by the lack of datasets that realistically capture the wide array of user pain points. To enable research on some of the more challenging aspects of parsing realistic conversations, we introduce PRESTO, a public dataset of over 550K contextual multilingual conversations between humans and virtual assistants. PRESTO contains a diverse array of challenges that occur in real-world NLU tasks such as disfluencies, code-switching, and revisions. It is the only large scale human generated conversational parsing dataset that provides structured context such as a user's contacts and lists for each example. Our mT5 model based baselines demonstrate that the conversational phenomenon present in PRESTO are challenging to model, which is further pronounced in a low-resource setup.", "keywords": "task oriented dialogs;semantic parsing;nlp", "primary_area": "", "supplementary_material": "", "author": "Rahul Goel;Waleed Ammar;Aditya Gupta;Siddharth Vashishtha;Motoki Sano;Faiz Surani;Max Chang;HyunJeong Choe;David Greene;Chuan He;Rattima Nitisaroj;Anna Trukhina;Shachi Paul;Pararth Shah;Rushin Shah;Zhou Yu", "authorids": "~Rahul_Goel1;~Waleed_Ammar1;~Aditya_Gupta2;~Siddharth_Vashishtha1;~Motoki_Sano1;~Faiz_Surani1;~Max_Chang1;~HyunJeong_Choe1;~David_Greene1;~Chuan_He4;~Rattima_Nitisaroj1;~Anna_Trukhina1;~Shachi_Paul1;~Pararth_Shah1;~Rushin_Shah2;~Zhou_Yu1", "gender": "M;;M;M;M;M;M;F;;M;;;F;M;M;F", "homepage": ";;https://research.google/people/AdityaGupta/;https://sidsvash26.github.io/;;https://faizsurani.com;;;;;;;;;;http://www.cs.columbia.edu/~zhouyu/", "dblp": "164/1120;38/601.html;;236/4588;136/8709;;;;;;;;230/8553;151/3076;;83/3205", "google_scholar": ";4NZ58cQAAAAJ;HW7IZ6sAAAAJ;4Q4zhC0AAAAJ;;;;;;;;;a_95_VkAAAAJ;F3kVP28AAAAJ;vY6iDeQAAAAJ;https://scholar.google.com.tw/citations?user=jee2Dy0AAAAJ", "or_profile": "~Rahul_Goel1;~Waleed_Ammar1;~Aditya_Gupta2;~Siddharth_Vashishtha1;~Motoki_Sano1;~Faiz_Surani1;~Max_Chang1;~HyunJeong_Choe1;~David_Greene1;~Chuan_He4;~Rattima_Nitisaroj1;~Anna_Trukhina1;~Shachi_Paul1;~Pararth_Shah1;~Rushin_Shah2;~Zhou_Yu1", "aff": ";Holistic Intelligence for Global Good;Google;University of Rochester;Google;University of California, Santa Barbara;;korea university;;;Google;Google;Google;;Google;Columbia University", "aff_domain": ";holistic-intelligence.net;google.com;rochester.edu;google.com;ucsb.edu;;cs.korea;;;google.com;google.com;google.com;;google.com;columbia.edu", "position": ";CEO;Researcher;PhD student;Researcher;Undergrad student;;Researcher;;;Linguist;Analytical Linguist ;Software Engineer;;Researcher;Assistant Professor", "bibtex": "@inproceedings{\ngoel2023presto,\ntitle={{PRESTO}: A Multilingual Dataset for Parsing Realistic Task-Oriented Dialogs},\nauthor={Rahul Goel and Waleed Ammar and Aditya Gupta and Siddharth Vashishtha and Motoki Sano and Faiz Surani and Max Chang and HyunJeong Choe and David Greene and Chuan He and Rattima Nitisaroj and Anna Trukhina and Shachi Paul and Pararth Shah and Rushin Shah and Zhou Yu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=LRRThBBiov}\n}", "github": "", "project": "", "reviewers": "8Nj3;ePrg;WWoB", "site": "https://openreview.net/forum?id=LRRThBBiov", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;5;4", "excitement": "4;4;3", "reproducibility": "4;3;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 16, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-3541-6981;;;;;;;;;;;;;;", "linkedin": ";waleedammar/;;;;;mchangstanford/;hyunjeong-choe-25b51054/;david-greene-5740a954/;che0519/;rattima-nitisaroj-a186531b/;anna-trukhina-b03522133;shachipaul/;pararth/;rushinnshah/;", "aff_unique_index": "1;2;1;3;4;1;1;1;1;5", "aff_unique_norm": ";Google;University of Rochester;University of California, Santa Barbara;Korea University;Columbia University", "aff_unique_dep": ";Google;;;;", "aff_unique_url": ";https://www.google.com;https://www.rochester.edu;https://www.ucsb.edu;https://www.korea.ac.kr;https://www.columbia.edu", "aff_unique_abbr": ";Google;U of R;UCSB;KU;Columbia", "aff_campus_unique_index": "1;1;2;1;1;1;1", "aff_campus_unique": ";Mountain View;Santa Barbara", "aff_country_unique_index": "1;1;1;1;2;1;1;1;1;1", "aff_country_unique": ";United States;South Korea" }, { "id": "LUDljw5VVD", "title": "Large Language Model Is Not a Good Few-shot Information Extractor, but a Good Reranker for Hard Samples!", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large Language Models (LLMs) have made remarkable strides in various tasks. Whether LLMs are competitive few-shot solvers for information extraction (IE) tasks, however, remains an open problem. In this work, we aim to provide a thorough answer to this question. Through extensive experiments on nine datasets across four IE tasks, we demonstrate that current advanced LLMs consistently exhibit inferior performance, higher latency, and increased budget requirements compared to fine-tuned SLMs under most settings. Therefore, we conclude that LLMs are not effective few-shot information extractors in general. Nonetheless, we illustrate that with appropriate prompting strategies, LLMs can effectively complement SLMs and tackle challenging samples that SLMs struggle with. And moreover, we propose an adaptive filter-then-rerank paradigm to combine the strengths of LLMs and SLMs. In this paradigm, SLMs serve as filters and LLMs serve as rerankers. By prompting LLMs to rerank a small portion of difficult samples identified by SLMs, our preliminary system consistently achieves promising improvements ($2.4\\%$ F1-gain on average) on various IE tasks, with an acceptable time and cost investment.", "keywords": "Large Language Models;Information Extraction", "primary_area": "", "supplementary_material": "", "author": "Yubo Ma;Yixin Cao;Yong Ching Hong;Aixin Sun", "authorids": "~Yubo_Ma1;~Yixin_Cao2;~Yong_Ching_Hong1;~Aixin_Sun1", "gender": "M;M;M;M", "homepage": "https://sites.google.com/view/yixin-homepage;;https://personal.ntu.edu.sg/axsun/;https://mayubo2333.github.io/", "dblp": "20/8038-2;;78/5155;229/7323", "google_scholar": "https://scholar.google.co.uk/citations?user=CnhTvdoAAAAJ;;https://scholar.google.com.sg/citations?user=wyKGVKUAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Yixin_Cao2;~Yong_Ching_Hong1;~Aixin_Sun1;~Ma_Yubo1", "aff": "Singapore Management University;Nanyang Technological University;Nanyang Technological University;School of Computer Science and Engineering, Nanyang Technological University", "aff_domain": "smu.edu.sg;ntu.edu.sg;ntu.edu.sg;e.ntu.edu.sg", "position": "Assistant Professor;Researcher;Associate Professor;PhD student", "bibtex": "@inproceedings{\nma2023large,\ntitle={Large Language Model Is Not a Good Few-shot Information Extractor, but a Good Reranker for Hard Samples!},\nauthor={Yubo Ma and Yixin Cao and Yong Ching Hong and Aixin Sun},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=LUDljw5VVD}\n}", "github": "", "project": "", "reviewers": "CUTz;ifw3;A8RC", "site": "https://openreview.net/forum?id=LUDljw5VVD", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "3;3;4", "reproducibility": "5;4;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-0764-4258;", "linkedin": ";yong-ching-hong-9927a2114/;aixin-sun-%E5%AD%99%E7%88%B1%E6%AC%A3-43056622/;yubo-ma-17054b168/", "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Singapore Management University;Nanyang Technological University", "aff_unique_dep": ";", "aff_unique_url": "https://www.smu.edu.sg;https://www.ntu.edu.sg", "aff_unique_abbr": "SMU;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Singapore" }, { "id": "LZq3crn3Bv", "title": "Cross-Lingual Cross-Target Stance Detection with Dual Knowledge Distillation Framework", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Stance detection aims to identify the user\u2019s attitude toward specific \\textit{targets} from text, which is an important research area in text mining and benefits a variety of application domains. Existing studies on stance detection were conducted mainly in English. Due to the low-resource problem in most non-English languages, cross-lingual stance detection was proposed to transfer knowledge from high-resource (source) language to low-resource (target) language. However, previous research has ignored the practical issue of no labeled training data available in target language. Moreover, target inconsistency in cross-lingual stance detection brings about the additional issue of unseen targets in target language, which in essence requires the transfer of both language and target-oriented knowledge from source to target language. To tackle these challenging issues, in this paper, we propose the new task of cross-lingual cross-target stance detection and develop the first computational work with dual knowledge distillation. Our proposed framework designs a cross-lingual teacher and a cross-target teacher using the source language data and a dual distillation process that transfers the two types of knowledge to target language. To bridge the target discrepancy between languages, cross-target teacher mines target category information and generalizes it to the unseen targets in target language via category-oriented learning. Experimental results on multilingual stance datasets demonstrate the effectiveness of our method compared to the competitive baselines.", "keywords": "cross-lingual cross-target stance detection;dual knowledge distillation;category-oriented contrastive learning", "primary_area": "", "supplementary_material": "", "author": "Ruike Zhang;Hanxuan Yang;Wenji Mao", "authorids": "~Ruike_Zhang1;~Hanxuan_Yang1;~Wenji_Mao1", "gender": "F;M;F", "homepage": ";;", "dblp": "277/5913;;16/2159.html", "google_scholar": "https://scholar.google.com.hk/citations?user=0TBXnVQAAAAJ;HJz2cw8AAAAJ;h6m4X_AAAAAJ", "or_profile": "~Ruike_Zhang1;~Hanxuan_Yang1;~Wenji_Mao1", "aff": "Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;ia.ac.cn;ia.ac.cn", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nzhang2023crosslingual,\ntitle={Cross-Lingual Cross-Target Stance Detection with Dual Knowledge Distillation Framework},\nauthor={Ruike Zhang and Hanxuan Yang and Wenji Mao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=LZq3crn3Bv}\n}", "github": "", "project": "", "reviewers": "UMp6;DhrL;W3Pa", "site": "https://openreview.net/forum?id=LZq3crn3Bv", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;4;3", "reproducibility": "5;4;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 9, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-9391-5060;0000-0002-4473-2356;", "linkedin": ";%E7%80%9A%E8%BD%A9-%E6%9D%A8-b0b010182/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation", "aff_unique_url": "http://www.ia.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "LawAC9vh8q", "title": "Enhancing the Ranking Context of Dense Retrieval through Reciprocal Nearest Neighbors", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Sparse annotation poses persistent challenges to training dense retrieval models; for example, it distorts the training signal when unlabeled relevant documents are used spuriously as negatives in contrastive learning. To alleviate this problem, we introduce evidence-based label smoothing, a novel, computationally efficient method that prevents penalizing the model for assigning high relevance to false negatives. To compute the target relevance distribution over candidate documents within the ranking context of a given query, we assign a non-zero relevance probability to those candidates most similar to the ground truth based on the degree of their similarity to the ground-truth document(s).\n\nTo estimate relevance we leverage an improved similarity metric based on reciprocal nearest neighbors, which can also be used independently to rerank candidates in post-processing. Through extensive experiments on two large-scale ad hoc text retrieval datasets, we demonstrate that reciprocal nearest neighbors can improve the ranking effectiveness of dense retrieval models, both when used for label smoothing, as well as for reranking. This indicates that by considering relationships between documents and queries beyond simple geometric distance we can effectively enhance the ranking context.", "keywords": "dense retrieval;reciprocal nearest neighbors;ranking context;contrastive learning;list-wise loss;false negatives;label smoothing;transformers;Large Language Models;information retrieval;natural language processing;deep learning", "primary_area": "", "supplementary_material": "", "author": "George Zerveas;Navid Rekabsaz;Carsten Eickhoff", "authorids": "~George_Zerveas1;~Navid_Rekabsaz2;~Carsten_Eickhoff1", "gender": ";M;M", "homepage": ";https://navid-rekabsaz.github.io;https://health-nlp.org", "dblp": "232/1820;150/5089;42/8700", "google_scholar": "CSL_JUIAAAAJ;lZjyLyEAAAAJ;QQi1_rAAAAAJ", "or_profile": "~George_Zerveas1;~Navid_Rekabsaz2;~Carsten_Eickhoff1", "aff": "Brown University;Johannes Kepler University Linz;Eberhard-Karls-Universit\u00e4t T\u00fcbingen", "aff_domain": "brown.edu;jku.at;uni-tuebingen.de", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzerveas2023enhancing,\ntitle={Enhancing the Ranking Context of Dense Retrieval through Reciprocal Nearest Neighbors},\nauthor={George Zerveas and Navid Rekabsaz and Carsten Eickhoff},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=LawAC9vh8q}\n}", "github": "", "project": "", "reviewers": "GibY;BAUY;LNyP", "site": "https://openreview.net/forum?id=LawAC9vh8q", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;2;4", "reproducibility": "3;4;3", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-1227-5349;0000-0001-5764-8738;0000-0001-9895-4061", "linkedin": ";;", "aff_unique_index": "0;1;2", "aff_unique_norm": "Brown University;Johannes Kepler University;Eberhard Karls University of T\u00fcbingen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.brown.edu;https://www.jku.at;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Brown;JKU;Uni T\u00fcbingen", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Linz;T\u00fcbingen", "aff_country_unique_index": "0;1;2", "aff_country_unique": "United States;Austria;Germany" }, { "id": "LepuyCeWcw", "title": "Causal Inference from Text: Unveiling Interactions between Variables", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Adjusting for latent covariates is crucial for estimating causal effects from observational textual data. Most existing methods only account for confounding covariates that affect both treatment and outcome, potentially leading to biased causal effects. This bias arises from insufficient consideration of non-confounding covariates, which are relevant only to either the treatment or the outcome. In this work, we aim to mitigate the bias by unveiling interactions between different variables to disentangle the non-confounding covariates when estimating causal effects from text. The disentangling process ensures covariates only contribute to their respective objectives, enabling independence between variables. Additionally, we impose a constraint to balance representations from the treated group and control group to alleviate selection bias. We conduct experiments on two different treatment factors under various scenarios, and the proposed model significantly outperforms recent strong baselines. Furthermore, our thorough analysis on earnings call transcripts demonstrates that our model can effectively disentangle the variables, and further investigations into real-world scenarios provide guidance for investors to make informed decisions.", "keywords": "Causal Inference;Natural Language Processing;Non-confounding Covariates;Disentanglement", "primary_area": "", "supplementary_material": "", "author": "Yuxiang Zhou;Yulan He", "authorids": "~Yuxiang_Zhou3;~Yulan_He1", "gender": "M;F", "homepage": "https://zyxnlp.github.io/;https://www.kcl.ac.uk/people/yulan-he", "dblp": "203/4838.html;75/5430", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.co.uk/citations?user=SP9r32UAAAAJ", "or_profile": "~Yuxiang_Zhou3;~Yulan_He1", "aff": "King's College London;King's College London, University of London", "aff_domain": "kcl.ac.uk;kcl.ac.uk", "position": "Postdoc;Full Professor", "bibtex": "@inproceedings{\nzhou2023causal,\ntitle={Causal Inference from Text: Unveiling Interactions between Variables},\nauthor={Yuxiang Zhou and Yulan He},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=LepuyCeWcw}\n}", "github": "", "project": "", "reviewers": "nCyD;5SCp;EVAH", "site": "https://openreview.net/forum?id=LepuyCeWcw", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;4;3", "excitement": "2;3;3", "reproducibility": "4;4;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0004-3720-9083;0000-0003-3948-5845", "linkedin": ";yulan-he-277234a/?originalSubdomain=uk", "aff_unique_index": "0;0", "aff_unique_norm": "King's College London", "aff_unique_dep": "", "aff_unique_url": "https://www.kcl.ac.uk", "aff_unique_abbr": "KCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "Lk1KaQcjaM", "title": "AD-NLP: A Benchmark for Anomaly Detection in Natural Language Processing", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Deep learning models have reignited the interest in Anomaly Detection research in recent years. Methods for Anomaly Detection in text have shown strong empirical results on ad-hoc anomaly setups that are usually made by downsampling some classes of a labeled dataset. This can lead to reproducibility issues and models that are biased toward detecting particular anomalies while failing to recognize them in more sophisticated scenarios. In the present work, we provide a unified benchmark for detecting various types of anomalies, focusing on problems that can be naturally formulated as Anomaly Detection in text, ranging from syntax to stylistics. In this way, we are hoping to facilitate research in Text Anomaly Detection. We also evaluate and analyze two strong shallow baselines, as well as two of the current state-of-the-art neural approaches, providing insights into the knowledge the neural models are learning when performing the anomaly detection task. We provide the code for evaluation, downloading, and preprocessing the dataset at https://github.com/mateibejan1/ad-nlp/.", "keywords": "NLP;anomaly detection;deep learning;machine learning;dataset;benchmark", "primary_area": "", "supplementary_material": "", "author": "Matei Bejan;Andrei Manolache;Marius Popescu", "authorids": "~Matei_Bejan1;~Andrei_Manolache1;~Marius_Popescu1", "gender": "M;M;M", "homepage": ";https://andreimano.github.io;", "dblp": ";290/2275;77/3007", "google_scholar": ";0H7Htc4AAAAJ;https://scholar.google.ro/citations?user=UPWSjkAAAAAJ", "or_profile": "~Matei_Bejan1;~Andrei_Manolache1;~Marius_Popescu1", "aff": "University of Bucharest;Universit\u00e4t Stuttgart;University of Bucharest", "aff_domain": "unibuc.ro;uni-stuttgart.de;unibuc.ro", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nbejan2023adnlp,\ntitle={{AD}-{NLP}: A Benchmark for Anomaly Detection in Natural Language Processing},\nauthor={Matei Bejan and Andrei Manolache and Marius Popescu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Lk1KaQcjaM}\n}", "github": "", "project": "", "reviewers": "UgrG;s43E;vnEh;SUCF", "site": "https://openreview.net/forum?id=Lk1KaQcjaM", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "3;3;3;4", "excitement": "3;3;2;3", "reproducibility": "4;4;3;4", "correctness": "3;2;3;4", "rating_avg": 5.0, "confidence_avg": 3.25, "excitement_avg": 2.75, "reproducibility_avg": 3.75, "correctness_avg": 3.0, "replies_avg": 13, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "matei-b-786a17176/;andreimano/;", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Bucharest;University of Stuttgart", "aff_unique_dep": ";", "aff_unique_url": "https://www.unibuc.ro;https://www.uni-stuttgart.de", "aff_unique_abbr": "Unibuc;Uni Stuttgart", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Romania;Germany" }, { "id": "LkV7Xx06yq", "title": "MEGClass: Extremely Weakly Supervised Text Classification via Mutually-Enhancing Text Granularities", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Text classification is essential for organizing unstructured text. Traditional methods rely on human annotations or, more recently, a set of class seed words for supervision, which can be costly, particularly for specialized or emerging domains. To address this, using class surface names alone as extremely weak supervision has been proposed. However, existing approaches treat different levels of text granularity (documents, sentences, or words) independently, disregarding inter-granularity class disagreements and the context identifiable exclusively through joint extraction. In order to tackle these issues, we introduce MEGClass, an extremely weakly-supervised text classification method that leverages Mutually-Enhancing Text Granularities. MEGClass utilizes coarse- and fine-grained context signals obtained by jointly considering a document's most class-indicative words and sentences. This approach enables the learning of a contextualized document representation that captures the most discriminative class indicators. By preserving the heterogeneity of potential classes, MEGClass can select the most informative class-indicative documents as iterative feedback to enhance the initial word-based class representations and ultimately fine-tune a pre-trained text classifier. Extensive experiments on seven benchmark datasets demonstrate that MEGClass outperforms other weakly and extremely weakly supervised methods.", "keywords": "text classification;extremely weak supervision;weakly-supervised learning;document representations;representation learning;pseudo-document generation", "primary_area": "", "supplementary_material": "", "author": "Priyanka Kargupta;Tanay Komarlu;Susik Yoon;Xuan Wang;Jiawei Han", "authorids": "~Priyanka_Kargupta1;~Tanay_Komarlu1;~Susik_Yoon1;~Xuan_Wang3;~Jiawei_Han1", "gender": "F;M;;F;M", "homepage": "http://pkargupta.github.io/;http://tanaykomarlu.com/;http://www.susikyoon.com;https://xuanwang91.github.io/;http://hanj.cs.illinois.edu/", "dblp": "257/8673;;179/5307;34/4799-8;h/JiaweiHan.html", "google_scholar": "Iu6nUEkAAAAJ;Vw2Du3IAAAAJ;tCJs1zEAAAAJ;_IVJi6UAAAAJ;https://scholar.google.com.tw/citations?user=Kv9AbjMAAAAJ", "or_profile": "~Priyanka_Kargupta1;~Tanay_Komarlu1;~Susik_Yoon1;~Xuan_Wang3;~Jiawei_Han1", "aff": "Department of Computer Science;Department of Computer Science;University of Illinois, Urbana Champaign;Virginia Polytechnic Institute and State University;University of Illinois at Urbana-Champaign (UIUC)", "aff_domain": "cs.illinois.edu;cs.illinois.edu;illinois.edu;vt.edu;illinois.edu", "position": "PhD student;MS student;Postdoc;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nkargupta2023megclass,\ntitle={{MEGC}lass: Extremely Weakly Supervised Text Classification via Mutually-Enhancing Text Granularities},\nauthor={Priyanka Kargupta and Tanay Komarlu and Susik Yoon and Xuan Wang and Jiawei Han},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=LkV7Xx06yq}\n}", "github": "", "project": "", "reviewers": "7Hfk;YqtT;PUtu;pjys", "site": "https://openreview.net/forum?id=LkV7Xx06yq", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;4;3", "excitement": "3;4;3;4", "reproducibility": "4;5;4;5", "correctness": "3;4;3;3", "rating_avg": 3.0, "confidence_avg": 3.75, "excitement_avg": 3.5, "reproducibility_avg": 4.5, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0004-3206-5033;;0000-0001-5596-4972;0000-0002-1381-8958;0000-0002-3629-2696", "linkedin": "pkargupta/;tkomarlu/;;xwang2/;", "aff_unique_index": "0;0;1;2;1", "aff_unique_norm": "Unknown Institution;University of Illinois Urbana-Champaign;Virginia Tech", "aff_unique_dep": "Department of Computer Science;;", "aff_unique_url": ";https://illinois.edu;https://www.vt.edu", "aff_unique_abbr": ";UIUC;VT", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "1;1;1", "aff_country_unique": ";United States" }, { "id": "Lp4CMWnSyb", "title": "Always the Best Fit: Adaptive Domain Gap Filling from Causal Perspective for Few-Shot Relation Extraction", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Cross-domain Relation Extraction aims to transfer knowledge from a source domain to a different target domain to address low-resource challenges. However, the semantic gap caused by data bias between domains is a major challenge, especially in few-shot scenarios.\nPrevious work has mainly focused on transferring knowledge between domains through shared feature representations without analyzing the impact of each factor that may produce data bias based on the characteristics of each domain.\nThis work takes a causal perspective and proposes a new framework CausalGF.\nBy constructing a unified structural causal model, we estimating the causal effects of factors such as syntactic structure, label distribution,and entities on the outcome.\nCausalGF calculates the causal effects among the factors and adjusts them dynamically based on domain characteristics, enabling adaptive gap filling.\nOur experiments show that our approach better fills the domain gap, yielding significantly better results on the cross-domain few-shot relation extraction task.", "keywords": "Cross domain;Causal Inference;Few-Shot;Relation Extraction", "primary_area": "", "supplementary_material": "", "author": "Ge Bai;Chenji Lu;Jiaxiang Geng;Shilong Li;Yidong Shi;Xiyan Liu;Ying Liu;Zhang Zhang;Ruifang Liu", "authorids": "~Ge_Bai2;~Chenji_Lu1;~Jiaxiang_Geng1;~Shilong_Li3;~Yidong_Shi1;~Xiyan_Liu2;~Ying_Liu19;~Zhang_Zhang6;~Ruifang_Liu1", "gender": ";M;M;M;M;F;M;;M", "homepage": ";https://github.com/luchenji;;;https://github.com/Ribb0n;https://blog.csdn.net/qq_43355120?spm=1000.2115.3001.5343;https://2hang2hang.top/;;https://lishilong.site/about/", "dblp": ";;;;;;;;", "google_scholar": ";;;;;;;;9VvLSQ4AAAAJ", "or_profile": "~Ge_Bai2;~Chenji_Lu1;~Jiaxiang_Geng1;~Yidong_Shi1;~Xiyan_Liu2;~Ying_Liu19;~Zhang_Zhang6;~Ruifang_Liu1;~Li_Shilong2", "aff": "Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications", "aff_domain": "bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn", "position": "MS student;MS student;MS student;MS student;MS student;MS student;Undergrad student;Associate Professor;Undergrad student", "bibtex": "@inproceedings{\nbai2023always,\ntitle={Always the Best Fit: Adaptive Domain Gap Filling from Causal Perspective for Few-Shot Relation Extraction},\nauthor={Ge Bai and Chenji Lu and Jiaxiang Geng and Shilong Li and Yidong Shi and Xiyan Liu and Ying Liu and Zhang Zhang and Ruifang Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Lp4CMWnSyb}\n}", "github": "", "project": "", "reviewers": "8uUD;Tq2e;igC2", "site": "https://openreview.net/forum?id=Lp4CMWnSyb", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;2;4", "excitement": "4;4;3", "reproducibility": "3;5;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 9, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0005-1953-3207;;;0000-0002-8954-2660;;;;;", "linkedin": ";;;;;;;;", "aff_unique_index": "0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications", "aff_unique_dep": "", "aff_unique_url": "http://www.bupt.edu.cn/", "aff_unique_abbr": "BUPT", "aff_campus_unique_index": "0;0;0;0;0;0;0;0;0", "aff_campus_unique": "Beijing", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "M1GRz46Ahz", "title": "SHARCS: Efficient Transformers Through Routing with Dynamic Width Sub-networks", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "We introduce SHARCS for adaptive inference that takes into account the hardness of input samples. SHARCS can train a router on any transformer network, enabling the model to direct different samples to sub-networks with varying widths. Our experiments demonstrate that: (1) SHARCS outperforms or complements existing per-sample adaptive inference methods across various classification tasks in terms of accuracy vs. FLOPs; (2) SHARCS generalizes across different architectures and can be even applied to compressed and efficient transformer encoders to further improve their efficiency; (3) SHARCS can provide a 2 times inference speed up at an insignificant drop in accuracy.", "keywords": "Efficiency;Routing;hardness", "primary_area": "", "supplementary_material": "", "author": "Mohammadreza Salehi;Sachin Mehta;Aditya Kusupati;Ali Farhadi;Hannaneh Hajishirzi", "authorids": "~Mohammadreza_Salehi3;~Sachin_Mehta1;~Aditya_Kusupati1;~Ali_Farhadi3;~Hannaneh_Hajishirzi1", "gender": "M;M;M;M;F", "homepage": "https://homes.cs.washington.edu/~mrsalehi/;https://sacmehta.github.io/;http://www.adityakusupati.com/;https://homes.cs.washington.edu/~ali/;https://homes.cs.washington.edu/~hannaneh/", "dblp": ";34/11140;231/7662;37/5826;52/1296", "google_scholar": "NFddT_4AAAAJ;https://scholar.google.co.in/citations?user=cnRJ0GUAAAAJ;https://scholar.google.co.in/citations?user=qULx8g8AAAAJ;jeOFRDsAAAAJ;LOV6_WIAAAAJ", "or_profile": "~Mohammadreza_Salehi3;~Sachin_Mehta1;~Aditya_Kusupati1;~Ali_Farhadi3;~Hannaneh_Hajishirzi1", "aff": "Apple;Apple;Department of Computer Science, University of Washington;University of Washington;University of Washington", "aff_domain": "apple.com;apple.com;cs.washington.edu;cs.uw.edu;uw.edu", "position": "Intern;Researcher;PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nsalehi2023sharcs,\ntitle={{SHARCS}: Efficient Transformers Through Routing with Dynamic Width Sub-networks},\nauthor={Mohammadreza Salehi and Sachin Mehta and Aditya Kusupati and Ali Farhadi and Hannaneh Hajishirzi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=M1GRz46Ahz}\n}", "github": "", "project": "", "reviewers": "cRFX;gQht;sLft", "site": "https://openreview.net/forum?id=M1GRz46Ahz", "pdf_size": 0, "rating": "2;2;2", "confidence": "3;4;4", "excitement": "3;3;2", "reproducibility": "4;3;3", "correctness": "3;3;2", "rating_avg": 2.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-8455-1851;;", "linkedin": "mrezasalehi/;;adityakusupati/;;", "aff_unique_index": "0;0;1;1;1", "aff_unique_norm": "Apple;University of Washington", "aff_unique_dep": "Apple Inc.;Department of Computer Science", "aff_unique_url": "https://www.apple.com;https://www.washington.edu", "aff_unique_abbr": "Apple;UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "M1Nogs3zR5", "title": "DemoNSF: A Multi-task Demonstration-based Generative Framework for Noisy Slot Filling Task", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Recently, prompt-based generative frameworks have shown impressive capabilities in sequence labeling tasks. However, in practical dialogue scenarios, relying solely on simplistic templates and traditional corpora presents a challenge for these methods in generalizing to unknown input perturbations. To address this gap, we propose a multi-task demonstration-based generative framework for noisy slot filling, named DemoNSF. Specifically, we introduce three noisy auxiliary tasks, namely noisy recovery (NR), random mask (RM), and hybrid discrimination (HD), to implicitly capture semantic structural information of input perturbations at different granularities. In the downstream main task, we design a noisy demonstration construction strategy for the generative framework, which explicitly incorporates task-specific information and perturbed distribution during training and inference. Experiments on two benchmarks demonstrate that DemoNSF outperforms all baseline methods and achieves strong generalization. Further analysis provides empirical guidance for the practical application of generative frameworks. Our code is released at https://github.com/dongguanting/Demo-NSF.", "keywords": "Noisy Slot Filling;Input Perturbations;Multi-task Learning;Generative Framework;Large Language Model;Demonstration Learning", "primary_area": "", "supplementary_material": "", "author": "Guanting Dong;Tingfeng Hui;Zhuoma GongQue;Jinxu Zhao;Daichi Guo;Gang Zhao;Keqing He;Weiran Xu", "authorids": "~Guanting_Dong1;~Tingfeng_Hui1;~Zhuoma_GongQue1;~Jinxu_Zhao1;~Daichi_Guo1;~Gang_Zhao2;~Keqing_He1;~Weiran_Xu1", "gender": "M;M;F;M;M;M;;M", "homepage": "https://dongguanting.github.io/;https://github.com/HypherX/;https://www.zhihu.com/people/x-xuan-6;https://pris-nlp.github.io/author/%E8%B5%B5%E9%87%91%E6%97%AD/;https://pris-nlp.github.io/author/%E9%83%AD%E5%B2%B1%E9%A9%B0/;;https://helicqin.github.io/about/index.html;", "dblp": ";341/1331;;;;;79/2314;41/5448", "google_scholar": "amozZDkAAAAJ;rOFW8kYAAAAJ;;;;;811USNoAAAAJ;https://scholar.google.com/citations?view_op=list_works", "or_profile": "~Guanting_Dong1;~Tingfeng_Hui1;~Zhuoma_GongQue1;~Jinxu_Zhao1;~Daichi_Guo1;~Gang_Zhao2;~Keqing_He1;~Weiran_Xu1", "aff": "Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Meituan Group;Beijing University of Post and Telecommunication", "aff_domain": "bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu;bupt.edu.cn;bupt.edu.cn;meituan.com;bupt.edu.cn", "position": "MS student;MS student;Undergrad student;MS student;MS student;MS student;Researcher;Associate Professor", "bibtex": "@inproceedings{\ndong2023demonsf,\ntitle={Demo{NSF}: A Multi-task Demonstration-based Generative Framework for Noisy Slot Filling Task},\nauthor={Guanting Dong and Tingfeng Hui and Zhuoma GongQue and Jinxu Zhao and Daichi Guo and Gang Zhao and Keqing He and Weiran Xu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=M1Nogs3zR5}\n}", "github": "", "project": "", "reviewers": "zUxk;vQ9E;8JBt", "site": "https://openreview.net/forum?id=M1Nogs3zR5", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;3", "excitement": "3;3;3", "reproducibility": "4;3;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0001-7965-8531;;0000-0002-9416-7666", "linkedin": ";;;;;;;", "aff_unique_index": "0;0;0;0;0;0;1;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications;Meituan Group", "aff_unique_dep": ";", "aff_unique_url": "http://www.bupt.edu.cn/;https://www.meituan.com", "aff_unique_abbr": "BUPT;Meituan", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "M3uTqtEgNo", "title": "Rethinking and Improving Multi-task Learning for End-to-end Speech Translation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Significant improvements in end-to-end speech translation (ST) have been achieved through the application of multi-task learning. However, the extent to which auxiliary tasks are highly consistent with the ST task, and how much this approach truly helps, have not been thoroughly studied. In this paper, we investigate the consistency between different tasks, considering different times and modules. We find that the textual encoder primarily facilitates cross-modal conversion, but the presence of noise in speech impedes the consistency between text and speech representations. Furthermore, we propose an improved multi-task learning (IMTL) approach for the ST task, which bridges the modal gap by mitigating the difference in length and representation. We conduct experiments on the MuST-C dataset. The results demonstrate that our method attains state-of-the-art results. Moreover, when additional data is used, we achieve the new SOTA result on MuST-C English to Spanish task with 20.8\\% of the training time required by the current SOTA method.", "keywords": "Speech translation;multi-modal translation;machine translation", "primary_area": "", "supplementary_material": "", "author": "Yuhao Zhang;Chen Xu;Bei Li;Hao Chen;Tong Xiao;Chunliang Zhang;JingBo Zhu", "authorids": "~Yuhao_Zhang4;~Chen_Xu9;~Bei_Li1;~Hao_Chen49;~Tong_Xiao4;~Chunliang_Zhang1;~JingBo_Zhu2", "gender": "M;M;M;M;M;M;", "homepage": "https://xiaozhang521.github.io/;;https://libeineu.github.io/;;https://www.nlplab.com/members/xiaotong.html;;https://dblp.org/pid/73/2129.html", "dblp": ";54/1474-8;;;05/5091;54/8637;", "google_scholar": "p3Om2OcAAAAJ;DmYTrQYAAAAJ;wzbJ5EIAAAAJ;z9V74HQAAAAJ;-fov7zkAAAAJ;;", "or_profile": "~Yuhao_Zhang4;~Chen_Xu9;~Bei_Li1;~Hao_Chen49;~Tong_Xiao4;~Chunliang_Zhang1;~JingBo_Zhu2", "aff": "Northeastern University (China);Northeastern University;Northeastern University;Northeastern University;Northeastern University;Northeastern University;Northeastern University", "aff_domain": "neu.edu.cn;neu.edu.cn;neu.edu.cn;neu.edu.cn;mail.neu.edu.cn;neu.edu.cn;mail.neu.edu.cn", "position": "PhD student;PhD student;PhD student;MS student;Full Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2023rethinking,\ntitle={Rethinking and Improving Multi-task Learning for End-to-end Speech Translation},\nauthor={Yuhao Zhang and Chen Xu and Bei Li and Hao Chen and Tong Xiao and Chunliang Zhang and JingBo Zhu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=M3uTqtEgNo}\n}", "github": "", "project": "", "reviewers": "idg1;uUBZ;SFpv;XztX", "site": "https://openreview.net/forum?id=M3uTqtEgNo", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;4;4;3", "excitement": "3;4;3;3", "reproducibility": "3;3;3;4", "correctness": "3;4;4;4", "rating_avg": 4.0, "confidence_avg": 3.5, "excitement_avg": 3.25, "reproducibility_avg": 3.25, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;", "linkedin": ";;;;tong-xiao-168bb081/;;", "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Northeastern University", "aff_unique_dep": "", "aff_unique_url": "http://www.neu.edu.cn/", "aff_unique_abbr": "NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1;1", "aff_country_unique": "China;United States" }, { "id": "M51c00VxiJ", "title": "AMR Parsing is Far from Solved: GrAPES, the Granular AMR Parsing Evaluation Suite", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We present the Granular AMR Parsing Evaluation Suite (GrAPES), a challenge set for Abstract Meaning Representation (AMR) parsing with accompanying evaluation metrics. AMR parsers now obtain high scores on the standard AMR evaluation metric Smatch, close to or even above reported inter-annotator agreement. But that does not mean that AMR parsing is solved; in fact, human evaluation in previous work indicates that current parsers still quite frequently make errors on node labels or graph structure that substantially distort sentence meaning. Here, we provide an evaluation suite that tests AMR parsers on a range of phenomena of practical, technical, and linguistic interest. Our 36 categories range from seen and unseen labels, to structural generalization, to coreference. GrAPES reveals in depth the abilities and shortcomings of current AMR parsers.", "keywords": "evaluation;dataset;corpus;semantic parsing;AMR;sentence-level semantics;semantic graphs", "primary_area": "", "supplementary_material": "", "author": "Jonas Groschwitz;Shay B Cohen;Lucia Donatelli;Meaghan Fowlie", "authorids": "~Jonas_Groschwitz1;~Shay_B_Cohen1;~Lucia_Donatelli1;~Meaghan_Fowlie1", "gender": "M;M;;F", "homepage": "https://jgroschwitz.github.io;http://homepages.inf.ed.ac.uk/scohen;;https://meaghanfowlie.eu.pythonanywhere.com/", "dblp": "166/1754;04/5629;;146/3144", "google_scholar": "T6vXIdwAAAAJ;;;SHB2cpIAAAAJ", "or_profile": "~Jonas_Groschwitz1;~Shay_B_Cohen1;~Lucia_Donatelli1;~Meaghan_Fowlie1", "aff": "University of Edinburgh, University of Edinburgh;University of Edinburgh;;Utrecht University", "aff_domain": "ed.ac.uk;ed.ac.uk;;uu.nl", "position": "Postdoc;Reader;;Assistant Professor", "bibtex": "@inproceedings{\ngroschwitz2023amr,\ntitle={{AMR} Parsing is Far from Solved: Gr{APES}, the Granular {AMR} Parsing Evaluation Suite},\nauthor={Jonas Groschwitz and Shay B Cohen and Lucia Donatelli and Meaghan Fowlie},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=M51c00VxiJ}\n}", "github": "", "project": "", "reviewers": "KxKy;Pruy;qBgp", "site": "https://openreview.net/forum?id=M51c00VxiJ", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;5;3", "excitement": "4;2;4", "reproducibility": "5;2;3", "correctness": "3;3;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-4753-8353;;", "linkedin": ";;;", "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Edinburgh;Utrecht University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ed.ac.uk;https://www.uu.nl", "aff_unique_abbr": "Edinburgh;UU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Kingdom;Netherlands" }, { "id": "M5knJ7ovgz", "title": "MRRL: Modifying the Reference via Reinforcement Learning for Non-Autoregressive Joint Multiple Intent Detection and Slot Filling", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "With the rise of non-autoregressive approach, some non-autoregressive models for joint multiple intent detection and slot filling have obtained the promising inference speed. However, most existing SLU models (1) suffer from the multi-modality problem that leads to reference intents and slots may not be suitable for training; (2) lack of alignment between the correct predictions of the two tasks, which extremely limits the overall accuracy. Therefore, in this paper, we propose $\\textbf{M}$odifying the $\\textbf{R}$eference via $\\textbf{R}$einforcement $\\textbf{L}$earning (MRRL), a novel method for multiple intent detection and slot filling, which introduces a modifier module and employs reinforcement learning. Specifically, we try to provide the better training target for the non-autoregressive SLU model via modifying the reference based on the output of the non-autoregressive SLU model, and propose a suitability reward to ensure that the output of the modifier module could fit well with the output of the non-autoregressive SLU model and does not deviate too far from the reference. In addition, we also propose a compromise reward to realize a flexible trade-off between the two subtasks. Experiments on two multi-intent datasets and non-autoregressive baselines demonstrate that our MRRL could consistently improve the performance of baselines. More encouragingly, our best variant achieves new state-of-the-art results, outperforming the previous best approach by 3.6 overall accuracy on MixATIS dataset.", "keywords": "Non-Autoregressive;Intent Detection and Slot Filling;Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Xuxin Cheng;Zhihong Zhu;Bowen Cao;Qichen Ye;Yuexian Zou", "authorids": "~Xuxin_Cheng3;~Zhihong_Zhu1;~Bowen_Cao1;~Qichen_Ye1;~Yuexian_Zou2", "gender": ";;M;M;", "homepage": ";;;https://github.com/yeeeqichen;", "dblp": ";;265/5536;333/0906;", "google_scholar": ";;jaI8ym8AAAAJ;6MFWQPsAAAAJ;", "or_profile": "~Xuxin_Cheng3;~Zhihong_Zhu1;~Bowen_Cao1;~Qichen_Ye1;~Yuexian_Zou2", "aff": ";;Peking University;Peking University;", "aff_domain": ";;stu.pku.edu.cn;pku.edu.cn;", "position": ";;MS student;MS student;", "bibtex": "@inproceedings{\ncheng2023mrrl,\ntitle={{MRRL}: Modifying the Reference via Reinforcement Learning for Non-Autoregressive Joint Multiple Intent Detection and Slot Filling},\nauthor={Xuxin Cheng and Zhihong Zhu and Bowen Cao and Qichen Ye and Yuexian Zou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=M5knJ7ovgz}\n}", "github": "", "project": "", "reviewers": "ejsG;cooG;6FMz", "site": "https://openreview.net/forum?id=M5knJ7ovgz", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "4;3;3", "reproducibility": "3;4;3", "correctness": "3;4;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;bowen-cao-0ba2a61a3/;;", "aff_unique_index": "0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "M6BJfQ9oup", "title": "Conceptor-Aided Debiasing of Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Pre-trained large language models (LLMs) reflect the inherent social biases of their training corpus. Many methods have been proposed to mitigate this issue, but they often fail to debias or they sacrifice model accuracy. We use *conceptors*--a soft projection method--to identify and remove the bias subspace in LLMs such as BERT and GPT. We propose two methods of applying conceptors (1) bias subspace projection by post-processing by the conceptor NOT operation; and (2) a new architecture, conceptor-intervened BERT (CI-BERT), which explicitly incorporates the conceptor projection into all layers during training. We find that conceptor post-processing achieves state-of-the-art (SoTA) debiasing results while maintaining LLMs' performance on the GLUE benchmark. Further, it is robust in various scenarios and can mitigate intersectional bias efficiently by its AND operation on the existing bias subspaces. Although CI-BERT's training takes all layers' bias into account and can beat its post-processing counterpart in bias mitigation, CI-BERT reduces the language model accuracy. We also show the importance of carefully constructing the bias subspace. The best results are obtained by removing outliers from the list of biased words, combining them (via the OR operation), and computing their embeddings using the sentences from a cleaner corpus.", "keywords": "Natural Language Processing;Large Language Model;Fairness", "primary_area": "", "supplementary_material": "", "author": "Li S. Yifei;Lyle Ungar;Jo\u00e3o Sedoc", "authorids": "~Li_S._Yifei1;~Lyle_Ungar1;~Jo\u00e3o_Sedoc1", "gender": "M;M;M", "homepage": "http://www.cis.upenn.edu/~ungar/;;https://realliyifei.github.io/", "dblp": "u/LyleHUngar;;", "google_scholar": "https://scholar.google.com.tw/citations?user=KCiDjbkAAAAJ;vv355NgAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Lyle_Ungar1;~Jo\u00e3o_Sedoc1;~Yifei_Li11", "aff": "University of Pennsylvania;New York University;University of Pennsylvania", "aff_domain": "upenn.edu;nyu.edu;upenn.edu", "position": "Full Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nyifei2023conceptoraided,\ntitle={Conceptor-Aided Debiasing of Large Language Models},\nauthor={Li S. Yifei and Lyle Ungar and Jo{\\~a}o Sedoc},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=M6BJfQ9oup}\n}", "github": "", "project": "", "reviewers": "cvA6;ogLa;ZbQ9", "site": "https://openreview.net/forum?id=M6BJfQ9oup", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;1;3", "excitement": "4;3;3", "reproducibility": "3;3;4", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 2.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";joao-sedoc-9085714/;realliyifei/", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Pennsylvania;New York University", "aff_unique_dep": ";", "aff_unique_url": "https://www.upenn.edu;https://www.nyu.edu", "aff_unique_abbr": "UPenn;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "M9NdVElcbs", "title": "Bridging the Digital Divide: Performance Variation across Socio-Economic Factors in Vision-Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Despite the impressive performance of current AI models reported across various tasks, performance reports often do not include evaluations of how these models perform on the specific groups that will be impacted by these technologies. Among the minority groups under-represented in AI, data from low-income households are often overlooked in data collection and model evaluation. We evaluate the performance of a state-of-the-art vision-language model (CLIP) on a geo-diverse dataset containing household images associated with different income values (DollarStreet) and show that performance inequality exists among households of different income levels. Our results indicate that performance for the poorer groups is consistently lower than the wealthier groups across various topics and countries. We highlight insights that can help mitigate these issues and propose actionable steps for economic-level inclusive AI development.", "keywords": "Multimodal;Income;Geodiversity;Evaluation;Analysis", "primary_area": "", "supplementary_material": "", "author": "Joan Nwatu;Oana Ignat;Rada Mihalcea", "authorids": "~Joan_Nwatu1;~Oana_Ignat1;~Rada_Mihalcea1", "gender": "F;F;F", "homepage": "https://anniejoan.github.io/;https://oanaignat.github.io/;https://web.eecs.umich.edu/~mihalcea/", "dblp": "348/0259.html;219/1948;m/RadaMihalcea", "google_scholar": "GGw1q64AAAAJ;RzK4fWkAAAAJ;https://scholar.google.com.tw/citations?user=UetM7FgAAAAJ", "or_profile": "~Joan_Nwatu1;~Oana_Ignat1;~Rada_Mihalcea1", "aff": "University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan", "aff_domain": "umich.edu;umich.edu;umich.edu", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nnwatu2023bridging,\ntitle={Bridging the Digital Divide: Performance Variation across Socio-Economic Factors in Vision-Language Models},\nauthor={Joan Nwatu and Oana Ignat and Rada Mihalcea},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=M9NdVElcbs}\n}", "github": "", "project": "", "reviewers": "bJzE;hF5y;Vvi2", "site": "https://openreview.net/forum?id=M9NdVElcbs", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;3", "excitement": "4;5;3", "reproducibility": "4;5;4", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0006-0380-7370;0000-0003-0272-5147;0000-0002-0767-6703", "linkedin": "joan-nwatu-927050115/;oana-ignat-ro;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "MEByW1upLk", "title": "Learning from Mistakes via Cooperative Study Assistant for Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) have demonstrated their potential to refine their generation based on their own feedback. However, the feedback from LLM itself is often inaccurate, thereby limiting its benefits. In this paper, we propose Study Assistant for Large LAnguage Model (SALAM), a novel framework with an auxiliary agent to assist the main LLM in learning from mistakes through interactive cooperation. In the gathering phase, the student assistant agent probes the main LLM, analyzes its errors, and collects the interaction in a mistake memory. During the examination phase, the study assistant provides guidelines by retrieving relevant cases to help the main LLM anticipate and avoid similar errors. We first investigate the effectiveness of a general study assistant and then customize it to provide LLM-specific guidance through imitation learning from successful guidance experiences. Our experiments on three LLMs using two challenging frameworks demonstrate that SALAM can significantly boost LLMs by an accuracy margin of up to 6.6 on BBH and 12.6 on BBQ.", "keywords": "Large Language Models;Reflection and Feedback", "primary_area": "", "supplementary_material": "", "author": "Danqing Wang;Lei Li", "authorids": "~Danqing_Wang1;~Lei_Li11", "gender": "F;M", "homepage": ";https://www.cs.cmu.edu/~leili", "dblp": "226/6524.html;13/7007-5.html", "google_scholar": "https://scholar.google.com/citations?hl=en-US;BYXqAlwAAAAJ", "or_profile": "~Danqing_Wang1;~Lei_Li11", "aff": "University of California, Santa Barbara;Computer Science Department, UC Santa Barbara", "aff_domain": "ucsb.edu;cs.ucsb.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwang2023learning,\ntitle={Learning from Mistakes via Cooperative Study Assistant for Large Language Models},\nauthor={Danqing Wang and Lei Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=MEByW1upLk}\n}", "github": "", "project": "", "reviewers": "Qwpe;5R6j;hBhU;2bb5", "site": "https://openreview.net/forum?id=MEByW1upLk", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;3;4;2", "excitement": "4;4;4;4", "reproducibility": "4;5;5;3", "correctness": "3;3;4;3", "rating_avg": 4.0, "confidence_avg": 3.25, "excitement_avg": 4.0, "reproducibility_avg": 4.25, "correctness_avg": 3.25, "replies_avg": 13, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-3095-9776", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Santa Barbara", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsb.edu", "aff_unique_abbr": "UCSB", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Santa Barbara", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "MFimS05rLW", "title": "Investigating the Effect of Pre-finetuning BERT Models on NLI Involving Presuppositions", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We explore the connection between presupposition, discourse and sarcasm and propose to leverage that connection in a transfer learning scenario with the goal of improving the performance of NLI models on cases involving presupposition. We exploit advances in training transformer-based models that show that pre-finetuning\u2014--i.e., finetuning the model on an additional task or dataset before the actual finetuning phase\u2014--can help these models, in some cases, achieve a higher performance on a given downstream task. Building on those advances and that aforementioned connection, we propose pre-finetuning NLI models on carefully chosen tasks in an attempt to improve their performance on NLI cases involving presupposition. We notice that, indeed, pre-finetuning on those tasks leads to performance improvements. Furthermore, we run several diagnostic tests to understand whether these gains are merely a byproduct of additional training data. The results show that, while additional training data seems to be helping on its own in some cases, the choice of the tasks plays a role in the performance improvements.", "keywords": "Presupposition;natural language inference;discourse;pragmatics", "primary_area": "", "supplementary_material": "", "author": "Jad Kabbara;Jackie CK Cheung", "authorids": "~Jad_Kabbara1;~Jackie_CK_Cheung1", "gender": "M;M", "homepage": "http://www.mit.edu/~jkabbara/;http://cs.mcgill.ca/~jcheung/", "dblp": "148/9943;00/9012", "google_scholar": ";https://scholar.google.com.tw/citations?user=Um-wmYQAAAAJ", "or_profile": "~Jad_Kabbara1;~Jackie_CK_Cheung1", "aff": "Massachusetts Institute of Technology;Microsoft", "aff_domain": "mit.edu;microsoft.com", "position": "Postdoc;Consulting Researcher", "bibtex": "@inproceedings{\nkabbara2023investigating,\ntitle={Investigating the Effect of Pre-finetuning {BERT} Models on {NLI} Involving Presuppositions},\nauthor={Jad Kabbara and Jackie CK Cheung},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=MFimS05rLW}\n}", "github": "", "project": "", "reviewers": "joCE;Gb86;PE3F", "site": "https://openreview.net/forum?id=MFimS05rLW", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;2;4", "excitement": "4;4;2", "reproducibility": "4;4;4", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;1", "aff_unique_norm": "Massachusetts Institute of Technology;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://web.mit.edu;https://www.microsoft.com", "aff_unique_abbr": "MIT;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "MKjGklW9TP", "title": "ClusterPrompt: Cluster Semantic Enhanced Prompt Learning for New Intent Discovery", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The discovery of new intent categories from user utterances is a crucial task in expanding agent skills. The key lies in how to efficiently solicit semantic evidence from utterances and properly transfer knowledge from existing intents to new intents. However, previous methods laid too much emphasis on relations among utterances or clusters for transfer learning, while paying less attention to the usage of semantics. As a result, these methods suffer from in-domain over-fitting and often generate meaningless new intent clusters due to data distortion. In this paper, we present a novel approach called Cluster Semantic Enhanced Prompt Learning (CsePL) for discovering new intents. Our method leverages two-level contrastive learning with label semantic alignment to learn meaningful representations of intent clusters. These learned intent representations are then utilized as soft prompt initializations for discriminating new intents, reducing the dominance of existing intents. Extensive experiments conducted on three public datasets demonstrate the superiority of our proposed method. It not only outperforms existing methods but also suggests meaningful intent labels and enables early detection of new intents.", "keywords": "dialogue systems;intent discovery;prompt learning;contrastive learning", "primary_area": "", "supplementary_material": "", "author": "Jinggui Liang;Lizi Liao", "authorids": "~Jinggui_Liang1;~Lizi_Liao1", "gender": ";F", "homepage": ";https://liziliao.github.io/", "dblp": ";149/1249", "google_scholar": ";https://scholar.google.com.sg/citations?user=W2b08EUAAAAJ", "or_profile": "~Jinggui_Liang1;~Lizi_Liao1", "aff": ";Singapore Management University", "aff_domain": ";smu.edu.sg", "position": ";Assistant Professor", "bibtex": "@inproceedings{\nliang2023clusterprompt,\ntitle={ClusterPrompt: Cluster Semantic Enhanced Prompt Learning for New Intent Discovery},\nauthor={Jinggui Liang and Lizi Liao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=MKjGklW9TP}\n}", "github": "", "project": "", "reviewers": "ASNm;wx3r;QMDg", "site": "https://openreview.net/forum?id=MKjGklW9TP", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "3;4;4", "reproducibility": "3;4;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0", "aff_unique_norm": "Singapore Management University", "aff_unique_dep": "", "aff_unique_url": "https://www.smu.edu.sg", "aff_unique_abbr": "SMU", "aff_country_unique_index": "0", "aff_country_unique": "Singapore" }, { "id": "MLKLYoXypN", "title": "Cross-Lingual Consistency of Factual Knowledge in Multilingual Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Multilingual large-scale Pretrained Language Models (PLMs) have been shown to store considerable amounts of factual knowledge, but large variations are observed across languages. With the ultimate goal of ensuring that users with different language backgrounds obtain consistent feedback from the same model, we study the cross-lingual consistency (CLC) of factual knowledge in various multilingual PLMs.\nTo this end, we propose a Ranking-based Consistency (RankC) metric to evaluate knowledge consistency across languages independently from accuracy. Using this metric, we conduct an in-depth analysis of the determining factors for CLC, both at model level and at language-pair level. Among other results, we find that increasing model size leads to higher factual probing accuracy in most languages, but does not improve cross-lingual consistency. Finally, we conduct a case study on CLC when new factual associations are inserted in the PLMs via model editing. Results on a small sample of facts inserted in English reveal a clear pattern whereby the new piece of knowledge transfers only to languages with which English has a high RankC score. All code and data are released at https://github.com/Betswish/Cross-Lingual-Consistency.", "keywords": "Model Consistency;Multilinguality;Knowledge Incorporation;Large-scale Pre-trained Language Model;Model Evaluation;Knowledge Probing", "primary_area": "", "supplementary_material": "", "author": "Jirui Qi;Raquel Fern\u00e1ndez;Arianna Bisazza", "authorids": "~Jirui_Qi1;~Raquel_Fern\u00e1ndez1;~Arianna_Bisazza1", "gender": "M;F;F", "homepage": ";http://www.illc.uva.nl/~raquel;https://www.cs.rug.nl/~bisazza/", "dblp": "313/9843;02/5384;32/10934", "google_scholar": "bN9bPVUAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.it/citations?user=biQvUhcAAAAJ", "or_profile": "~Jirui_Qi1;~Raquel_Fern\u00e1ndez1;~Arianna_Bisazza1", "aff": "University of Groningen;University of Amsterdam;University of Groningen", "aff_domain": "rug.nl;uva.nl;rug.nl", "position": "PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nqi2023crosslingual,\ntitle={Cross-Lingual Consistency of Factual Knowledge in Multilingual Language Models},\nauthor={Jirui Qi and Raquel Fern{\\'a}ndez and Arianna Bisazza},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=MLKLYoXypN}\n}", "github": "", "project": "", "reviewers": "tJxb;xdFB;Ukqz", "site": "https://openreview.net/forum?id=MLKLYoXypN", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "4;3;4", "reproducibility": "4;4;4", "correctness": "5;4;5", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.666666666666667, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-5540-5943;", "linkedin": ";raquel-fernandez-13578148/;arianna-bisazza-92754329/", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Groningen;University of Amsterdam", "aff_unique_dep": ";", "aff_unique_url": "https://www.rug.nl;https://www.uva.nl", "aff_unique_abbr": "RUG;UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Netherlands" }, { "id": "MLzoMwlxTh", "title": "Goal-Driven Explainable Clustering via Language Descriptions", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Unsupervised clustering is widely used to explore large corpora, but existing formulations neither consider the users' goals nor explain clusters' meanings. We propose a new task formulation, \"Goal-Driven Clustering with Explanations (GoalEx), which represents both the goal and the explanations as free-form language descriptions. For example, to categorize the errors made by a summarization system, the input to GoalEx is a corpus of annotator-written comments for system-generated summaries and a goal description \"cluster the comments based on why the annotators think the summary is imperfect.\"; the outputs are text clusters each with an explanation (\"this cluster mentions that the summary misses important context information.\"), which relates to the goal and accurately explains which comments should (not) belong to a cluster. To tackle GoalEx, we prompt a language model with \"[corpus subset] + [goal] + Brainstorm a list of explanations each representing a cluster.\"; then we classify whether each sample belongs to a cluster based on its explanation; finally, we use integer linear programming to select a subset of candidate clusters to cover most samples while minimizing overlaps. Under both automatic and human evaluation on corpora with or without labels, our method produces more accurate and goal-related explanations than prior methods.", "keywords": "clustering;explainability;large language models", "primary_area": "", "supplementary_material": "", "author": "Zihan Wang;Jingbo Shang;Ruiqi Zhong", "authorids": "~Zihan_Wang1;~Jingbo_Shang2;~Ruiqi_Zhong1", "gender": "M;M;M", "homepage": "https://zihanwangki.github.io/;https://shangjingbo1226.github.io/;https://ruiqi-zhong.github.io", "dblp": "152/5077-1;151/3145.html;222/3024", "google_scholar": "6UWtYZQAAAAJ;0SkFI4MAAAAJ;GskOShAAAAAJ", "or_profile": "~Zihan_Wang1;~Jingbo_Shang2;~Ruiqi_Zhong1", "aff": "University of California, San Diego;University of California, San Diego;University of California, Berkeley", "aff_domain": "ucsd.edu;ucsd.edu;berkeley.edu", "position": "PhD student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nwang2023goaldriven,\ntitle={Goal-Driven Explainable Clustering via Language Descriptions},\nauthor={Zihan Wang and Jingbo Shang and Ruiqi Zhong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=MLzoMwlxTh}\n}", "github": "", "project": "", "reviewers": "wCff;v3tK;8dya;qK88", "site": "https://openreview.net/forum?id=MLzoMwlxTh", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "3;2;3;3", "excitement": "4;4;3;4", "reproducibility": "3;5;4;4", "correctness": "4;3;3;4", "rating_avg": 5.0, "confidence_avg": 2.75, "excitement_avg": 3.75, "reproducibility_avg": 4.0, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0;1", "aff_unique_norm": "University of California, San Diego;University of California, Berkeley", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsd.edu;https://www.berkeley.edu", "aff_unique_abbr": "UCSD;UC Berkeley", "aff_campus_unique_index": "0;0;1", "aff_campus_unique": "San Diego;Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "MMrqu8SD6y", "title": "\"A Tale of Two Movements\": Identifying and Comparing Perspectives in \\#BlackLivesMatter and \\#BlueLivesMatter Movements-related Tweets using Weakly Supervised Graph-based Structured Prediction", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Social media has become a major driver of social change, by facilitating the formation of online social movements. Automatically understanding the perspectives driving the movement and the voices opposing it, is a challenging task as annotated data is difficult to obtain. We propose a weakly supervised graph-based approach that explicitly models perspectives in \\#BackLivesMatter-related tweets. Our proposed approach utilizes a social-linguistic representation of the data. We convert the text to a graph by breaking it into structured elements and connect it with the social network of authors, then structured prediction is done over the elements for identifying perspectives. Our approach uses a small seed set of labeled examples. We experiment with large language models for generating artificial training examples, compare them to manual annotation, and find that it achieves comparable performance. We perform quantitative and qualitative analyses using a human-annotated test set. Our model outperforms multitask baselines by a large margin, successfully characterizing the perspectives supporting and opposing \\#BLM.", "keywords": "characterization of social movements;social media;discourse analysis;perspective identification", "primary_area": "", "supplementary_material": "", "author": "Shamik Roy;Dan Goldwasser", "authorids": "~Shamik_Roy1;~Dan_Goldwasser1", "gender": "M;M", "homepage": "https://www.linkedin.com/in/shamik-roy-97698288/;https://www.cs.purdue.edu/homes/dgoldwas/", "dblp": "274/6982;38/3382", "google_scholar": "qbbGZ8EAAAAJ;https://scholar.google.com.tw/citations?user=u8358QgAAAAJ", "or_profile": "~Shamik_Roy1;~Dan_Goldwasser1", "aff": "Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nroy2023a,\ntitle={''A Tale of Two Movements'': Identifying and Comparing Perspectives in {\\textbackslash}\\#BlackLivesMatter and {\\textbackslash}\\#BlueLivesMatter Movements-related Tweets using Weakly Supervised Graph-based Structured Prediction},\nauthor={Shamik Roy and Dan Goldwasser},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=MMrqu8SD6y}\n}", "github": "", "project": "", "reviewers": "qRYV;GbZ4;Yvsx", "site": "https://openreview.net/forum?id=MMrqu8SD6y", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "2;4;2", "reproducibility": "1;5;3", "correctness": "4;5;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": "shamik-roy-97698288/;", "aff_unique_index": "0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "MNTCi0i3cU", "title": "InstructSafety: A Unified Framework for Building Multidimensional and Explainable Safety Detector through Instruction Tuning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Safety detection has been an increasingly important topic in recent years and it has become even more necessary to develop reliable safety detection systems with the rapid development of large language models. However, currently available safety detection systems have limitations in terms of their versatility and interpretability. In this paper, we first introduce InstructSafety, a safety detection framework that unifies 7 common sub-tasks for safety detection. These tasks are unified into a similar form through different instructions. We then conduct a comprehensive survey of existing safety detection datasets and process 39 human-annotated datasets for instruction tuning. We also construct adversarial samples to enhance the model's robustness. After fine-tuning Flan-T5 on the collected data, we have developed Safety-Flan-T5, a multidimensional and explainable safety detector. We conduct comprehensive experiments on a variety of datasets and tasks, and demonstrate the strong performance of Safety-Flan-T5 in comparison to supervised baselines and served APIs (Perspective API, ChatGPT and InstructGPT). We will release the processed data, fine-tuned Safety-Flan-T5 and related code for public use.", "keywords": "safety detection;unified framework;instruction tuning", "primary_area": "", "supplementary_material": "", "author": "Zhexin Zhang;Jiale Cheng;Hao Sun;Jiawen Deng;Minlie Huang", "authorids": "~Zhexin_Zhang2;~Jiale_Cheng1;~Hao_Sun7;~Jiawen_Deng1;~Minlie_Huang1", "gender": "M;M;M;F;M", "homepage": "https://github.com/nonstopfor;;;;http://coai.cs.tsinghua.edu.cn/hml", "dblp": "225/5264;275/7964;;;", "google_scholar": "I-Cn8gkAAAAJ;WcAly8wAAAAJ;;fseN_08AAAAJ;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Zhexin_Zhang2;~Jiale_Cheng1;~Hao_Sun7;~Jiawen_Deng1;~Minlie_Huang1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Undergrad student;MS student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nzhang2023instructsafety,\ntitle={InstructSafety: A Unified Framework for Building Multidimensional and Explainable Safety Detector through Instruction Tuning},\nauthor={Zhexin Zhang and Jiale Cheng and Hao Sun and Jiawen Deng and Minlie Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=MNTCi0i3cU}\n}", "github": "", "project": "", "reviewers": "8L5c;XQAP;UwgW", "site": "https://openreview.net/forum?id=MNTCi0i3cU", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "4;2;4", "reproducibility": "4;3;3", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0002-9601-3991;0000-0003-2995-7116;;0000-0003-0602-8250;", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "MQsvD6YOan", "title": "Improving Low-resource Question Answering by Augmenting Question Information", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "In the era of large models, low-resource question-answering tasks lag, emphasizing the importance of data augmentation - a key research avenue in natural language processing. The main challenges include leveraging the large model's internal knowledge for data augmentation, determining which QA data component - the question, passage, or answer - benefits most from augmentation, and retaining consistency in the augmented content without inducing excessive noise. To tackle these, we introduce PQQ, an innovative approach for question data augmentation consisting of Prompt Answer, Question Generation, and Question Filter. Our experiments reveal that ChatGPT underperforms on the experimental data, yet our PQQ method excels beyond existing augmentation strategies. Further, its universal applicability is validated through successful tests on high-resource QA tasks like SQUAD1.1 and TriviaQA.", "keywords": "Question Answering;Data augmentation;Low-resource domains", "primary_area": "", "supplementary_material": "", "author": "Andong Chen;Yuan Sun;Xiaobing Zhao;Rosella P. Galindo Esparza;Kehai Chen;Yang Xiang;Tiejun Zhao;Min zhang", "authorids": "~Andong_Chen1;~Yuan_Sun5;~Xiaobing_Zhao1;~Rosella_P._Galindo_Esparza1;~Kehai_Chen2;~Yang_Xiang4;~Tiejun_Zhao1;~Min_zhang14", "gender": "M;;F;F;M;M;M;M", "homepage": "https://andongblue.github.io/chenandong.github.io/;;https://xingong.muc.edu.cn/info/1051/1132.htm;https://mat.qmul.ac.uk/students/rosella-galindo;https://chenkehai.github.io;;http://mitlab.hit.edu.cn/2018/0608/c9183a210162/page.htm;https://zhangmin-nlp-ai.github.io/", "dblp": "191/0289;;;;78/9623;50/2192-3;35/1787;83/5342-?", "google_scholar": "tcb9VT8AAAAJ;;;;_M4Am0AAAAAJ;zDyL-NoAAAAJ;;https://scholar.google.com/citations?", "or_profile": "~Andong_Chen1;~Yuan_Sun5;~Xiaobing_Zhao1;~Rosella_P._Galindo_Esparza1;~Kehai_Chen2;~Yang_Xiang4;~Tiejun_Zhao1;~Min_zhang14", "aff": "Harbin Institute of Technology;;National Language Resource Monitoring & Research Center of Minority Languages;Newcastle University, UK;Harbin Institute of Technology (Shenzhen);Peng Cheng Laboratory;Harbin Institute of Technology;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;;nmlr.muc.edu.cn;newcastle.ac.uk;hit.edu.cn;pcl.ac;hit.edu.cn;hit.edu.cn", "position": "PhD student;;Full Professor;Postdoc;Assistant Professor;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nchen2023improving,\ntitle={Improving Low-resource Question Answering by Augmenting Question Information},\nauthor={Andong Chen and Yuan Sun and Xiaobing Zhao and Rosella P. Galindo Esparza and Kehai Chen and Yang Xiang and Tiejun Zhao and Min zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=MQsvD6YOan}\n}", "github": "", "project": "", "reviewers": "KiMC;N3fX;xYud", "site": "https://openreview.net/forum?id=MQsvD6YOan", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;2", "excitement": "3;4;3", "reproducibility": "3;4;3", "correctness": "3;4;4", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-2552-0224;0000-0002-4346-7618;0000-0003-1395-6805;;0000-0002-3895-5510", "linkedin": ";;;;;yang-xiang-7554b6195/;;", "aff_unique_index": "0;1;2;0;3;0;0", "aff_unique_norm": "Harbin Institute of Technology;National Language Resource Monitoring & Research Center;Newcastle University;Pengcheng Laboratory", "aff_unique_dep": ";Center of Minority Languages;;Peng Cheng Laboratory", "aff_unique_url": "http://www.hit.edu.cn/;;https://www.ncl.ac.uk;http://www.pcl.ac.cn", "aff_unique_abbr": "HIT;;NU;PCL", "aff_campus_unique_index": "0;2;0;0", "aff_campus_unique": "Harbin;;Shenzhen", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "China;United Kingdom" }, { "id": "MRehcsVc4y", "title": "RSVP: Customer Intent Detection via Agent Response Contrastive and Generative Pre-Training", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The dialogue systems in customer services have been developed with neural models to provide users with precise answers and round-the-clock support in task-oriented conversations by detecting customer intents based on their utterances. Existing intent detection approaches have highly relied on adaptively pre-training language models with large-scale datasets, yet the predominant cost of data collection may hinder their superiority. In addition, they neglect the information within the conversational responses of the agents, which have a lower collection cost, but are significant to customer intent as agents must tailor their replies based on the customers' intent. In this paper, we propose RSVP, a self-supervised framework dedicated to task-oriented dialogues, which utilizes agent responses for pre-training in a two-stage manner. Specifically, we introduce two pre-training tasks to incorporate the relations of utterance-response pairs: 1) Response Retrieval by selecting a correct response from a batch of candidates, and 2) Response Generation by mimicking agents to generate the response to a given utterance. Our benchmark results for two real-world customer service datasets show that RSVP significantly outperforms the state-of-the-art baselines by 4.95% for accuracy, 3.4% for MRR@3, and 2.75% for MRR@5 on average. Extensive case studies are investigated to show the validity of incorporating agent responses into the pre-training stage.", "keywords": "Intent Detection;Task Adaptive Fine-Tuning;Contrastive Learning;Question Answering", "primary_area": "", "supplementary_material": "", "author": "Yu-Chien Tang;Wei-Yao Wang;An-Zi Yen;Wen-Chih Peng", "authorids": "~Yu-Chien_Tang1;~Wei-Yao_Wang1;~An-Zi_Yen1;~Wen-Chih_Peng1", "gender": "M;M;F;M", "homepage": ";https://wywywang.github.io/;https://azyen0522.github.io/;https://sites.google.com/site/wcpeng/wcpeng", "dblp": "46/11210;269/9571.html;204/3583;92/1623", "google_scholar": "yZAfJKIAAAAJ;https://scholar.google.com.tw/citations?user=HMKbOJAAAAAJ;https://scholar.google.com/citations?hl=zh-TW;", "or_profile": "~Yu-Chien_Tang1;~Wei-Yao_Wang1;~An-Zi_Yen1;~Wen-Chih_Peng1", "aff": "National Yang Ming Chiao Tung University;National Yang Ming Chiao Tung University;Department of Computer Science, National Yang Ming Chiao Tung University;National Yang Ming Chiao Tung University", "aff_domain": "nycu.edu.tw;nycu.edu.tw;nycu.edu.tw;nycu.edu.tw", "position": "MS student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\ntang2023rsvp,\ntitle={{RSVP}: Customer Intent Detection via Agent Response Contrastive and Generative Pre-Training},\nauthor={Yu-Chien Tang and Wei-Yao Wang and An-Zi Yen and Wen-Chih Peng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=MRehcsVc4y}\n}", "github": "", "project": "", "reviewers": "GcM6;KLoz;ChMc;tLuo;v7zY", "site": "https://openreview.net/forum?id=MRehcsVc4y", "pdf_size": 0, "rating": "3;3;3;3;3", "confidence": "4;4;4;4;3", "excitement": "3;3;3;4;3", "reproducibility": "4;3;3;4;2", "correctness": "3;3;4;3;3", "rating_avg": 3.0, "confidence_avg": 3.8, "excitement_avg": 3.2, "reproducibility_avg": 3.2, "correctness_avg": 3.2, "replies_avg": 17, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0009-5935-4966;;;0000-0002-0172-7311", "linkedin": "tommytyc/;wei-yao-wang/;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "National Yang Ming Chiao Tung University", "aff_unique_dep": "", "aff_unique_url": "https://www.nycu.edu.tw", "aff_unique_abbr": "NYCU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "MSQrAoa7iy", "title": "3DRP-Net: 3D Relative Position-aware Network for 3D Visual Grounding", "track": "main", "status": "Long Main", "tldr": "", "abstract": "3D visual grounding aims to localize the target object in a 3D point cloud by a free-form language description. Typically, the sentences describing the target object tend to provide information about its relative relation between other objects and its position within the whole scene. In this work, we propose a relation-aware one-stage framework, named 3D Relative Position-aware Network (3DRP-Net), which can effectively capture the relative spatial relationships between objects and enhance object attributes. Specifically, 1) we propose a 3D Relative Position Multi-head Attention (3DRP-MA) module to analyze relative relations from different directions in the context of object pairs, which helps the model to focus on the specific object relations mentioned in the sentence. 2) We designed a soft-labeling strategy to alleviate the spatial ambiguity caused by redundant points, which further stabilizes and enhances the learning process through a constant and discriminative distribution. Extensive experiments conducted on three benchmarks (i.e., ScanRefer and Nr3D/Sr3D) demonstrate that our method outperforms all the state-of-the-art methods in general.", "keywords": "3D visual grounding", "primary_area": "", "supplementary_material": "", "author": "Zehan Wang;Haifeng Huang;Yang Zhao;Linjun Li;Xize Cheng;Yichen Zhu;Aoxiong Yin;Zhou Zhao", "authorids": "~Zehan_Wang2;~Haifeng_Huang3;~Yang_Zhao14;~Linjun_Li2;~Xize_Cheng1;~Yichen_Zhu2;~Aoxiong_Yin1;~Zhou_Zhao3", "gender": "M;M;M;;M;M;;", "homepage": "https://github.com/12zehan17;https://zzzzchs.github.io/;;;https://exgc.github.io/;https://github.com/Echen-Zhu;;", "dblp": "126/7826-1;;50/2082-22;;334/2167;;;", "google_scholar": "euXK0lkAAAAJ;oUm2gZUAAAAJ;;;https://scholar.google.com/citations?hl=zh-CN;9K3a7T8AAAAJ;;", "or_profile": "~Zehan_Wang2;~Haifeng_Huang3;~Yang_Zhao14;~Linjun_Li2;~Xize_Cheng1;~Yichen_Zhu2;~Aoxiong_Yin1;~Zhou_Zhao3", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;;Zhejiang University;Zhejiang University;;", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;;zju.edu.cn;zju.edu.cn;;", "position": "PhD student;MS student;MS student;;PhD student;Undergrad student;;", "bibtex": "@inproceedings{\nwang2023drpnet,\ntitle={3{DRP}-Net: 3D Relative Position-aware Network for 3D Visual Grounding},\nauthor={Zehan Wang and Haifeng Huang and Yang Zhao and Linjun Li and Xize Cheng and Yichen Zhu and Aoxiong Yin and Zhou Zhao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=MSQrAoa7iy}\n}", "github": "", "project": "", "reviewers": "bfbG;Qbww;fZVS", "site": "https://openreview.net/forum?id=MSQrAoa7iy", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;5;4", "excitement": "4;3;4", "reproducibility": "4;4;4", "correctness": "3;3;3", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0007-7509-7563;;;;0000-0001-9708-3225;;;", "linkedin": ";haifeng-huang-784b2b249/;;;;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "MWhwZjFCcq", "title": "StyleBART: Decorate Pretrained Model with Style Adapters for Unsupervised Stylistic Headline Generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Stylistic headline generation is the task to generate a headline that not only summarizes the content of an article, but also reflects a desired style that attracts users. As style-specific article-headline pairs are scarce, previous researches focus on unsupervised approaches with a standard headline generation dataset and mono-style corpora. In this work, we follow this line and propose StyleBART, an unsupervised approach for stylistic headline generation. Our method decorates the pretrained BART model with adapters that are responsible for different styles and allows the generation of headlines with diverse styles by simply switching the adapters. Different from previous works, StyleBART separates the task of style learning and headline generation, making it possible to freely combine the base model and the style adapters during inference. We further propose an inverse paraphrasing task to enhance the style adapters. Extensive automatic and human evaluations show that StyleBART achieves new state-of-the-art performance in the unsupervised stylistic headline generation task, producing high-quality headlines with the desired style.", "keywords": "Headline generation; Style transfer; Efficient NLP; Unsupervised learning", "primary_area": "", "supplementary_material": "", "author": "Hanqing Wang;Yajing Luo;Boya Xiong;Guanhua Chen;Yun Chen", "authorids": "~Hanqing_Wang2;~Yajing_Luo1;~Boya_Xiong1;~Guanhua_Chen1;~Yun_Chen1", "gender": "M;F;M;M;F", "homepage": ";;;https://ghchen.me;https://yunc.me/", "dblp": "35/10182-3;;359/6250;85/3682-1;10/5680-7", "google_scholar": "1sFj7RcAAAAJ;;;https://scholar.google.com/citations?hl=zh-CN;vXd0JQMAAAAJ", "or_profile": "~Hanqing_Wang2;~Yajing_Luo1;~Boya_Xiong1;~Guanhua_Chen1;~Yun_Chen1", "aff": "Shanghai University of Finance and Economics;Shanghai University of Finance and Economics;Shanghai University of Finance and Economics;Southern University of Science and Technology;Shanghai University of Finance and Economics", "aff_domain": "sufe.edu;sufe.edu;sufe.edu;sustech.edu.cn;sufe.edu.cn", "position": "PhD student;MS student;Undergrad student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2023stylebart,\ntitle={Style{BART}: Decorate Pretrained Model with Style Adapters for Unsupervised Stylistic Headline Generation},\nauthor={Hanqing Wang and Yajing Luo and Boya Xiong and Guanhua Chen and Yun Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=MWhwZjFCcq}\n}", "github": "", "project": "", "reviewers": "vmEE;B3qY;ZP6W", "site": "https://openreview.net/forum?id=MWhwZjFCcq", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;5", "excitement": "3;3;3", "reproducibility": "4;4;4", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-5353-9734;0000-0002-3563-7592", "linkedin": ";yajingluo61/;%E5%8D%9A%E9%9B%85-%E7%86%8A-b6343927b?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3B7VunpjPCQFmV9nVAxWR2SA%3D%3D;;", "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Shanghai University of Finance and Economics;Southern University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.sufe.edu.cn;https://www.sustech.edu.cn", "aff_unique_abbr": "SUFE;SUSTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "MWisc5Amup", "title": "ALDi: Quantifying the Arabic Level of Dialectness of Text", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Transcribed speech and user-generated text in Arabic typically contain a mixture of Modern Standard Arabic (MSA), the standardized language taught in schools, and Dialectal Arabic (DA), used in daily communications. To handle this variation, previous work in Arabic NLP has focused on Dialect Identification (DI) on the sentence or the token level. However, DI treats the task as binary, whereas we argue that Arabic speakers perceive a spectrum of dialectness, which we operationalize at the sentence level as the Arabic Level of Dialectness (ALDi), a continuous linguistic variable. \nWe introduce the AOC-ALDi dataset (derived from the AOC dataset), containing 127,835 sentences (17\\% from news articles and 83\\% from user comments on those articles) which are manually labeled with their level of dialectness. We provide a detailed analysis of AOC-ALDi and show that a model trained on it can effectively identify levels of dialectness on a range of other corpora (including dialects and genres not included in AOC-ALDi), providing a more nuanced picture than traditional DI systems. Through case studies, we illustrate how ALDi can reveal Arabic speakers' stylistic choices in different situations, a useful property for sociolinguistic analyses.", "keywords": "Arabic Dialects;Arabic Dialect Identification;Dialectal Variation;Code-switching;Level of Dialectness", "primary_area": "", "supplementary_material": "", "author": "Amr Keleg;Sharon Goldwater;Walid Magdy", "authorids": "~Amr_Keleg1;~Sharon_Goldwater1;~Walid_Magdy1", "gender": ";;", "homepage": "https://amr-keleg.github.io/;;https://homepages.inf.ed.ac.uk/wmagdy/", "dblp": "266/1042;;56/1909", "google_scholar": "4YkMPp0AAAAJ;;ACQD8jMAAAAJ", "or_profile": "~Amr_Keleg1;~Sharon_Goldwater1;~Walid_Magdy1", "aff": "University of Edinburgh, University of Edinburgh;;University of Edinburgh", "aff_domain": "sms.ed.ac.uk;;ed.ac.uk", "position": "PhD student;;Associate Professor", "bibtex": "@inproceedings{\nkeleg2023aldi,\ntitle={{ALD}i: Quantifying the Arabic Level of Dialectness of Text},\nauthor={Amr Keleg and Sharon Goldwater and Walid Magdy},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=MWisc5Amup}\n}", "github": "", "project": "", "reviewers": "t5z9;RifN;sVaX", "site": "https://openreview.net/forum?id=MWisc5Amup", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;5", "excitement": "4;4;3", "reproducibility": "5;4;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 4.666666666666667, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-9676-1338", "linkedin": ";;", "aff_unique_index": "0;0", "aff_unique_norm": "University of Edinburgh", "aff_unique_dep": "", "aff_unique_url": "https://www.ed.ac.uk", "aff_unique_abbr": "Edinburgh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "MXMA6vQtSZ", "title": "Entity-Based Evaluation of Political Bias in Automatic Summarization", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Growing literature has shown that NLP systems may encode social biases; however, the *political* bias of summarization models remains relatively unknown. In this work, we use an entity replacement method to investigate the portrayal of politicians in automatically generated summaries of news articles. We develop an entity-based computational framework to assess the sensitivities of several extractive and abstractive summarizers to the politicians Donald Trump and Joe Biden. We find consistent differences in these summaries upon entity replacement, such as reduced emphasis of Trump's presence in the context of the same article and a more individualistic representation of Trump with respect to the collective US government (i.e., administration). These summary dissimilarities are most prominent when the entity is heavily featured in the source article. Our characterization provides a foundation for future studies of bias in summarization and for normative discussions on the ideal qualities of automatic summaries.", "keywords": "summarization;political bias", "primary_area": "", "supplementary_material": "", "author": "Karen Zhou;Chenhao Tan", "authorids": "~Karen_Zhou1;~Chenhao_Tan1", "gender": ";M", "homepage": "https://karen-zhou.com;https://chenhaot.com/", "dblp": "208/5904;95/8314", "google_scholar": ";https://scholar.google.com.tw/citations?user=KGMaP18AAAAJ", "or_profile": "~Karen_Zhou1;~Chenhao_Tan1", "aff": "University of Chicago;University of Chicago", "aff_domain": "uchicago.edu;uchicago.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhou2023entitybased,\ntitle={Entity-Based Evaluation of Political Bias in Automatic Summarization},\nauthor={Karen Zhou and Chenhao Tan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=MXMA6vQtSZ}\n}", "github": "", "project": "", "reviewers": "f6t5;utt1;7A5T", "site": "https://openreview.net/forum?id=MXMA6vQtSZ", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "4;3;3", "reproducibility": "3;4;3", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "University of Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.uchicago.edu", "aff_unique_abbr": "UChicago", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "MYdmanqfvm", "title": "What do Deck Chairs and Sun Hats Have in Common? Uncovering Shared Properties in Large Concept Vocabularies", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Concepts play a central role in many applications. This includes settings where concepts have to be modelled in the absence of sentence context. Previous work has therefore focused on distilling decontextualised concept embeddings from language models. But concepts can be modelled from different perspectives, whereas concept embeddings typically mostly capture taxonomic structure. To address this issue, we propose a strategy for identifying what different concepts, from a potentially large concept vocabulary, have in common with others. We then represent concepts in terms of the properties they share with the other concepts. To demonstrate the practical usefulness of this way of modelling concepts, we consider the task of ultra-fine entity typing, which is a challenging multi-label classification problem. We show that by augmenting the label set with shared properties, we can improve the performance of the state-of-the-art models for this task.", "keywords": "lexical semantics;commonality detection;ultra-fine entity typing;ontology learning", "primary_area": "", "supplementary_material": "", "author": "Amit Gajbhiye;Zied Bouraoui;Na Li;Usashi Chatterjee;Luis Espinosa-Anke;Steven Schockaert", "authorids": "~Amit_Gajbhiye2;~Zied_Bouraoui1;~Na_Li10;~Usashi_Chatterjee1;~Luis_Espinosa-Anke1;~Steven_Schockaert2", "gender": ";M;F;;M;", "homepage": ";;;;http://www.luisespinosa.net;", "dblp": ";134/4606;18/3173-18;;140/3490.html;", "google_scholar": ";f_6RpYEAAAAJ;JZUxMuwAAAAJ;;;", "or_profile": "~Amit_Gajbhiye2;~Zied_Bouraoui1;~Na_Li10;~Usashi_Chatterjee1;~Luis_Espinosa-Anke1;~Steven_Schockaert2", "aff": ";;School of Optical-Electrical and Computer Engineering, University of Shanghai for Science and Technology;;AMPLYFI;", "aff_domain": ";;usst.edu.cn;;amplyfi.com;", "position": ";;Assistant Professor;;Principal Researcher;", "bibtex": "@inproceedings{\ngajbhiye2023what,\ntitle={What do Deck Chairs and Sun Hats Have in Common? Uncovering Shared Properties in Large Concept Vocabularies},\nauthor={Amit Gajbhiye and Zied Bouraoui and Na Li and Usashi Chatterjee and Luis Espinosa-Anke and Steven Schockaert},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=MYdmanqfvm}\n}", "github": "", "project": "", "reviewers": "zfkC;k24T;vHKK", "site": "https://openreview.net/forum?id=MYdmanqfvm", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;3", "excitement": "3;4;4", "reproducibility": "2;3;3", "correctness": "3;3;3", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-1662-4163;0009-0000-6776-3908;;;", "linkedin": ";;;;;", "aff_unique_index": "0;1", "aff_unique_norm": "University of Shanghai for Science and Technology;AMPLYFI", "aff_unique_dep": "School of Optical-Electrical and Computer Engineering;", "aff_unique_url": "https://www.usst.edu.cn;", "aff_unique_abbr": "USST;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0", "aff_country_unique": "China;" }, { "id": "MZwFbA3DSF", "title": "Pit One Against Many: Leveraging Attention-head Embeddings for Parameter-efficient Multi-head Attention", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Scaling pre-trained language models has resulted in large performance gains in various natural language processing tasks but comes with a large cost in memory requirements. Inspired by the position embeddings in transformers, we aim to simplify and reduce the memory footprint of the multi-head attention (MHA) mechanism. We propose an alternative module that uses only a single shared projection matrix and multiple head embeddings (MHE), i.e. one per head. We empirically demonstrate that our MHE attention is substantially more memory efficient compared to alternative attention mechanisms while achieving high predictive performance retention ratio to vanilla MHA on several downstream tasks. MHE attention only requires a negligible fraction of additional parameters ($3nd$, where $n$ is the number of attention heads and $d$ the size of the head embeddings) compared to a single-head attention, while MHA requires $(3n^2-3n)d^2-3nd$ additional parameters.", "keywords": "multi-head attention; memory efficiency", "primary_area": "", "supplementary_material": "", "author": "Huiyin Xue;Nikolaos Aletras", "authorids": "~Huiyin_Xue1;~Nikolaos_Aletras1", "gender": "F;", "homepage": "https://huiyinxue.github.io/;", "dblp": "331/2682;118/9116", "google_scholar": "65wII5AAAAAJ;https://scholar.google.co.uk/citations?user=uxRWFhoAAAAJ", "or_profile": "~Huiyin_Xue1;~Nikolaos_Aletras1", "aff": "University of Sheffield;Amazon", "aff_domain": "shef.ac.uk;amazon.com", "position": "PhD student;Researcher", "bibtex": "@inproceedings{\nxue2023pit,\ntitle={Pit One Against Many: Leveraging Attention-head Embeddings for Parameter-efficient Multi-head Attention},\nauthor={Huiyin Xue and Nikolaos Aletras},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=MZwFbA3DSF}\n}", "github": "", "project": "", "reviewers": "M1Z9;ymVs;oMfC", "site": "https://openreview.net/forum?id=MZwFbA3DSF", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "4;2;3", "reproducibility": "4;4;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-8705-6431;", "linkedin": ";", "aff_unique_index": "0;1", "aff_unique_norm": "University of Sheffield;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.sheffield.ac.uk;https://www.amazon.com", "aff_unique_abbr": "Sheffield;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "MbKRJUowYX", "title": "E-CORE: Emotion Correlation Enhanced Empathetic Dialogue Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Achieving empathy is a crucial step toward humanized dialogue systems. Current approaches for empathetic dialogue generation mainly perceive an emotional label to generate an empathetic response conditioned on it, which simply treat emotions independently, but ignore the intrinsic emotion correlation in dialogues, resulting in inaccurate emotion perception and unsuitable response generation. In this paper, we propose a novel emotion correlation enhanced empathetic dialogue generation framework, which comprehensively realizes emotion correlation learning, utilization, and supervising. Specifically, a multi-resolution emotion graph is devised to capture context-based emotion interactions from different resolutions, further modeling emotion correlation. Then we propose an emotion correlation enhanced decoder, with a novel correlation-aware aggregation and soft/hard strategy, respectively improving the emotion perception and response generation. Experimental results on the benchmark dataset demonstrate the superiority of our model in both empathetic perception and expression.", "keywords": "empathetic dialogue generation;graph network;emotion perception;natural language generation", "primary_area": "", "supplementary_material": "", "author": "Fengyi Fu;Lei Zhang;Quan Wang;Zhendong Mao", "authorids": "~Fengyi_Fu1;~Lei_Zhang54;~Quan_Wang7;~Zhendong_Mao1", "gender": ";;F;", "homepage": ";;;", "dblp": ";;;", "google_scholar": ";;l2yEbhAAAAAJ;", "or_profile": "~Fengyi_Fu1;~Lei_Zhang54;~Quan_Wang7;~Zhendong_Mao1", "aff": ";;Beijing University of Posts and Telecommunications;", "aff_domain": ";;bupt.edu.cn;", "position": ";;Associate Professor;", "bibtex": "@inproceedings{\nfu2023ecore,\ntitle={E-{CORE}: Emotion Correlation Enhanced Empathetic Dialogue Generation},\nauthor={Fengyi Fu and Lei Zhang and Quan Wang and Zhendong Mao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=MbKRJUowYX}\n}", "github": "", "project": "", "reviewers": "rPyr;9WcU;9bdy", "site": "https://openreview.net/forum?id=MbKRJUowYX", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;4;3", "reproducibility": "3;4;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0", "aff_unique_norm": "Beijing University of Posts and Telecommunications", "aff_unique_dep": "", "aff_unique_url": "http://www.bupt.edu.cn/", "aff_unique_abbr": "BUPT", "aff_campus_unique_index": "0", "aff_campus_unique": "Beijing", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "Md1YdfqAed", "title": "Balance Act: Mitigating Hubness in Cross-Modal Retrieval with Query and Gallery Banks", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In this work, we present a post-processing solution to address the hubness problem in cross-modal retrieval, a phenomenon where a small number of gallery data points are frequently retrieved, resulting in a decline in retrieval performance. We first theoretically demonstrate the necessity of incorporating both the gallery and query data for addressing hubness as hubs always exhibit high similarity with gallery and query data. Second, building on our theoretical results, we propose a novel framework, Dual Bank Normalization (DBNorm). While previous work has attempted to alleviate hubness by only utilizing the query samples, DBNorm leverages two banks constructed from the query and gallery samples to reduce the occurrence of hubs during inference. Next, to complement DBNorm, we introduce two novel methods, dual inverted softmax and dual dynamic inverted softmax, for normalizing similarity based on the two banks. Specifically, our proposed methods reduce the similarity between hubs and queries while improving the similarity between non-hubs and queries. Finally, we present extensive experimental results on diverse language-grounded benchmarks, including text-image, text-video, and text-audio, demonstrating the superior performance of our approaches compared to previous methods in addressing hubness and boosting retrieval performance.", "keywords": "Cross-modal Retrieval;Hubness", "primary_area": "", "supplementary_material": "", "author": "Yimu Wang;Xiangru Jian;Bo Xue", "authorids": "~Yimu_Wang1;~Xiangru_Jian1;~Bo_Xue1", "gender": "M;M;M", "homepage": "https://yimuwangcs.github.io;https://edward-jianqaq.github.io/;https://xueb1996.github.io/", "dblp": "140/7766;326/8022;122/2421-4", "google_scholar": "TV2vnN8AAAAJ;kq17trAAAAAJ;1D4gVmIAAAAJ", "or_profile": "~Yimu_Wang1;~Xiangru_Jian1;~Bo_Xue1", "aff": "University of Waterloo;University of Waterloo;City University of Hong Kong", "aff_domain": "uwaterloo.ca;uwaterloo.ca;cityu.edu.hk", "position": "PhD student;PhD student;PhD student", "bibtex": "@inproceedings{\nwang2023balance,\ntitle={Balance Act: Mitigating Hubness in Cross-Modal Retrieval with Query and Gallery Banks},\nauthor={Yimu Wang and Xiangru Jian and Bo Xue},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Md1YdfqAed}\n}", "github": "", "project": "", "reviewers": "GfvP;htxD;rioa", "site": "https://openreview.net/forum?id=Md1YdfqAed", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;2;4", "excitement": "4;4;4", "reproducibility": "5;4;3", "correctness": "5;4;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-7295-4853", "linkedin": "yimu-wang-854743151/;;", "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Waterloo;City University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://uwaterloo.ca;https://www.cityu.edu.hk", "aff_unique_abbr": "UW;CityU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Canada;China" }, { "id": "Mefvmgkb9G", "title": "CAPSTONE: Curriculum Sampling for Dense Retrieval with Document Expansion", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The dual-encoder has become the de facto architecture for dense retrieval. Typically, it computes the latent representations of the query and document independently, thus failing to fully capture the interactions between the query and document. To alleviate this, recent research has focused on obtaining query-informed document representations. During training, it expands the document with a real query, but during inference, it replaces the real query with a generated one. This inconsistency between training and inference causes the dense retrieval model to prioritize query information while disregarding the document when computing the document representation. Consequently, it performs even worse than the vanilla dense retrieval model because its performance heavily relies on the relevance between the generated queries and the real query. In this paper, we propose a curriculum sampling strategy that utilizes pseudo queries during training and progressively enhances the relevance between the generated query and the real query. By doing so, the retrieval model learns to extend its attention from the document alone to both the document and query, resulting in high-quality query-informed document representations. Experimental results on both in-domain and out-of-domain datasets demonstrate that our approach outperforms previous dense retrieval models.", "keywords": "Dense Retrieval;Document Expansion", "primary_area": "", "supplementary_material": "", "author": "Xingwei He;Yeyun Gong;A-Long Jin;Hang Zhang;Anlei Dong;Jian Jiao;Siu Ming Yiu;Nan Duan", "authorids": "~Xingwei_He1;~Yeyun_Gong2;~A-Long_Jin1;~Hang_Zhang6;~Anlei_Dong1;~Jian_Jiao2;~Siu_Ming_Yiu1;~Nan_Duan1", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://scholar.google.com/citations?user=p1a5WXIAAAAJ&hl=zh-CN;;;;;;https://www.cs.hku.hk/index.php/people/academic-staff/smyiu;https://nanduan.github.io/", "dblp": "18/8988-3;06/10400.html;;49/6156-29;28/6385;29/265-7.html;y/SiuMingYiu.html;", "google_scholar": "p1a5WXIAAAAJ;piUkwMYAAAAJ;YpMInDMAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;;D6KwmF8AAAAJ;QFNVqjEAAAAJ;Qaa6OxIAAAAJ", "or_profile": "~Xingwei_He1;~Yeyun_Gong2;~A-Long_Jin1;~Hang_Zhang6;~Anlei_Dong1;~Jian_Jiao2;~Siu_Ming_Yiu1;~Nan_Duan1", "aff": "The University of Hong Kong;Microsoft;The University of Hong Kong;Sichuan University;Microsoft;Microsoft;University of Hong Kong;Microsoft Research Asia", "aff_domain": "hku.hk;microsoft.com;hku.hk;scu.edu.cn;microsoft.com;microsoft.com;hku.hk;microsoft.com", "position": "Postdoc;Researcher;PhD student;PhD student;Principal Researcher;Principal Researcher;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nhe2023capstone,\ntitle={{CAPSTONE}: Curriculum Sampling for Dense Retrieval with Document Expansion},\nauthor={Xingwei He and Yeyun Gong and A-Long Jin and Hang Zhang and Anlei Dong and Jian Jiao and Siu Ming Yiu and Nan Duan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Mefvmgkb9G}\n}", "github": "", "project": "", "reviewers": "p4HK;4QHG;1aty", "site": "https://openreview.net/forum?id=Mefvmgkb9G", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;3", "excitement": "4;3;3", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0003-4779-9588;0000-0002-3975-8500;", "linkedin": ";;;;;jian-jiao-82897810/;;", "aff_unique_index": "0;1;0;2;1;1;0;1", "aff_unique_norm": "University of Hong Kong;Microsoft;Sichuan University", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "https://www.hku.hk;https://www.microsoft.com;https://www.scu.edu.cn", "aff_unique_abbr": "HKU;Microsoft;SCU", "aff_campus_unique_index": "0;0;0;2", "aff_campus_unique": "Hong Kong SAR;;Asia", "aff_country_unique_index": "0;1;0;0;1;1;0;0", "aff_country_unique": "China;United States" }, { "id": "MhEJqeCzgE", "title": "Unraveling Feature Extraction Mechanisms in Neural Networks", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The underlying mechanism of neural networks in capturing precise knowledge has been the subject of consistent research efforts. In this work, we propose a theoretical approach based on Neural Tangent Kernels (NTKs) to investigate such mechanisms. Specifically, considering the infinite network width, we hypothesize the learning dynamics of target models may intuitively unravel the features they acquire from training data, deepening our insights into their internal mechanisms. We apply our approach to several fundamental models and reveal how these models leverage statistical features during gradient descent and how they are integrated into final decisions. We also discovered that the choice of activation function can affect feature extraction. For instance, the use of the ReLU activation function could potentially introduce a bias in features, providing a plausible explanation for its replacement with alternative functions in recent pre-trained language models. Additionally, we find that while self-attention and CNN models may exhibit limitations in learning n-grams, multiplication-based models seem to excel in this area. We verify these theoretical findings through experiments and find that they can be applied to analyze language modeling tasks, which can be regarded as a special variant of classification. Our work may offer insights into the roles and capacities of fundamental modules within deep neural networks including large language models.", "keywords": "Interpretability;Infinite-width;Feature extraction;Learning dynamics;Neural tangent kernel", "primary_area": "", "supplementary_material": "", "author": "Xiaobing Sun;Jiaxi Li;Wei Lu", "authorids": "~Xiaobing_Sun1;~Jiaxi_Li3;~Wei_Lu10", "gender": "M;F;M", "homepage": ";;https://istd.sutd.edu.sg/people/faculty/lu-wei", "dblp": "30/4077-2;;98/6613-11.html", "google_scholar": "https://scholar.google.com/citations?hl=en;;n41KN9AAAAAJ", "or_profile": "~Xiaobing_Sun1;~Jiaxi_Li3;~Wei_Lu9", "aff": "Singapore University of Technology and Design;Singapore University of Technology and Design;Singapore University of Technology and Design", "aff_domain": "sutd.edu.sg;sutd.edu.sg;sutd.edu.sg", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nsun2023unraveling,\ntitle={Unraveling Feature Extraction Mechanisms in Neural Networks},\nauthor={Xiaobing Sun and Jiaxi Li and Wei Lu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=MhEJqeCzgE}\n}", "github": "", "project": "", "reviewers": "ak69;tfVt;SPeX", "site": "https://openreview.net/forum?id=MhEJqeCzgE", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;2", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "5;4;4", "rating_avg": 5.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-0827-0382", "linkedin": ";https://linkedin.com/in/jiaxi-li-725759195;wei-lu-59aa9615/", "aff_unique_index": "0;0;0", "aff_unique_norm": "Singapore University of Technology and Design", "aff_unique_dep": "", "aff_unique_url": "https://www.sutd.edu.sg", "aff_unique_abbr": "SUTD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "id": "MhU0zxuZ5K", "title": "On the Dimensionality of Sentence Embeddings", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Learning sentence embeddings is a fundamental problem in natural language processing.\nWhile existing research primarily focuses on enhancing the quality of sentence embeddings, the exploration of sentence embedding dimensions is limited.\nHere we present a comprehensive and empirical analysis of the dimensionality of sentence embeddings.\nFirst, we demonstrate that the optimal dimension of sentence embeddings is usually smaller than the default value.\nSubsequently, to compress the dimension of sentence embeddings with minimum performance degradation, we identify two components contributing to the overall performance loss: the encoder's performance loss and the pooler's performance loss.\nTherefore, we propose a two-step training method for sentence representation learning models, wherein the encoder and the pooler are optimized separately to mitigate the overall performance loss in low-dimension scenarios.\nExperimental results on seven STS tasks and seven sentence classification tasks demonstrate that our method significantly improves the performance of low-dimensional sentence embeddings.", "keywords": "Sentence embedding;dimension reduction", "primary_area": "", "supplementary_material": "", "author": "Hongwei Wang;Hongming Zhang;Dong Yu", "authorids": "~Hongwei_Wang1;~Hongming_Zhang2;~Dong_Yu2", "gender": "M;M;M", "homepage": "https://hongweiw.net;http://www.cse.ust.hk/~hzhangal/;https://sites.google.com/view/dongyu888/", "dblp": "https://dblp.org/pers/hd/w/Wang_0004:Hongwei;;71/4598-1", "google_scholar": "3C__4wsAAAAJ;i5ETuuQAAAAJ;tMY31_gAAAAJ", "or_profile": "~Hongwei_Wang1;~Hongming_Zhang2;~Dong_Yu2", "aff": "Tencent AI Lab;Tencent AI Lab Seattle;Tencent AI Lab", "aff_domain": "tencent.com;tencent.com;tencent.com", "position": "Researcher;Researcher;Distinguished Scientist", "bibtex": "@inproceedings{\nwang2023on,\ntitle={On the Dimensionality of Sentence Embeddings},\nauthor={Hongwei Wang and Hongming Zhang and Dong Yu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=MhU0zxuZ5K}\n}", "github": "", "project": "", "reviewers": "5JDc;s3jK;85p1;wHEz", "site": "https://openreview.net/forum?id=MhU0zxuZ5K", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "5;4;4;3", "excitement": "2;3;4;3", "reproducibility": "3;3;4;4", "correctness": "3;4;5;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.5, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-7474-8271;;0000-0003-0520-6844", "linkedin": "hongwei-wang-730a7b72/;;dongyu/", "aff_unique_index": "0;0;0", "aff_unique_norm": "Tencent", "aff_unique_dep": "Tencent AI Lab", "aff_unique_url": "https://ai.tencent.com", "aff_unique_abbr": "Tencent AI Lab", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United States" }, { "id": "MhbiD5FVPF", "title": "People Make Better Edits: Measuring the Efficacy of LLM-Generated Counterfactually Augmented Data for Harmful Language Detection", "track": "main", "status": "Long Main", "tldr": "", "abstract": "NLP models are used in a variety of critical social computing tasks, such as detecting sexist, racist, or otherwise hateful content. Therefore, it is imperative that these models are robust to spurious features. Past work has attempted to tackle such spurious features using training data augmentation, including Counterfactually Augmented Data (CADs). CADs introduce minimal changes to existing training data points and flip their labels; training on them may reduce model dependency on spurious features. However, manually generating CADs can be time-consuming and expensive. Hence in this work, we assess if this task can be automated using generative NLP models. We automatically generate CADs using Polyjuice, ChatGPT, and Flan-T5, and evaluate their usefulness in improving model robustness compared to manually-generated CADs. By testing both model performance on multiple out-of-domain test sets and individual data point efficacy, our results show that while manual CADs are still the most effective, CADs generated by ChatGPT come a close second. One key reason for the lower performance of automated methods is that the changes they introduce are often insufficient to flip the original label.", "keywords": "hate speech;sexism;counterfactually augmented data;data augmentation;model robustness", "primary_area": "", "supplementary_material": "", "author": "Indira Sen;Dennis Assenmacher;Mattia Samory;Isabelle Augenstein;Wil Aalst;Claudia Wagner", "authorids": "~Indira_Sen1;~Dennis_Assenmacher1;~Mattia_Samory1;~Isabelle_Augenstein1;~Wil_Aalst1;~Claudia_Wagner1", "gender": "F;M;M;F;M;", "homepage": "https://indiiigo.github.io/;https://www.wi.uni-muenster.de/de/institut/statistik/personen/dennis-assenmacher;https://hide-ous.github.io/;http://isabelleaugenstein.github.io/;http://www.vdaalst.com/;", "dblp": "219/5568;201/1363.html;;93/11424.html;;32/4045", "google_scholar": "qXzr-p8AAAAJ;https://scholar.google.de/citations?user=MUkf4qAAAAAJ;https://scholar.google.it/citations?user=m5hUWjsAAAAJ;https://scholar.google.co.uk/citations?user=DjJp0dcAAAAJ;;R5-PPJQAAAAJ", "or_profile": "~Indira_Sen1;~Dennis_Assenmacher1;~Mattia_Samory1;~Isabelle_Augenstein1;~Wil_Aalst1;~Claudia_Wagner1", "aff": "Rheinisch Westf\u00e4lische Technische Hochschule Aachen;GESIS \u2013 Leibniz Institute for the Social Sciences;;University of Copenhagen;TU Eindhoven;Rheinisch Westf\u00e4lische Technische Hochschule Aachen", "aff_domain": "rwth-aachen.de;gesis.org;;ku.dk;;rwth-aachen.de", "position": "PhD student;Postdoc;;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nsen2023people,\ntitle={People Make Better Edits: Measuring the Efficacy of {LLM}-Generated Counterfactually Augmented Data for Harmful Language Detection},\nauthor={Indira Sen and Dennis Assenmacher and Mattia Samory and Isabelle Augenstein and Wil Aalst and Claudia Wagner},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=MhbiD5FVPF}\n}", "github": "", "project": "", "reviewers": "Hib4;PwG4;z9F6", "site": "https://openreview.net/forum?id=MhbiD5FVPF", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "3;3;3", "reproducibility": "4;3;3", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-9219-1956;;0000-0003-1562-7909;;", "linkedin": ";;;isabelle-augenstein-82436b7a/;;", "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "RWTH Aachen University;Leibniz Institute for the Social Sciences;University of Copenhagen;Eindhoven University of Technology", "aff_unique_dep": ";Social Sciences;;", "aff_unique_url": "https://www.rwth-aachen.de;https://www.gesis.org;https://www.ku.dk;https://www.tue.nl", "aff_unique_abbr": "RWTH;GESIS;UCPH;TU/e", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Aachen;;Eindhoven", "aff_country_unique_index": "0;0;1;2;0", "aff_country_unique": "Germany;Denmark;Netherlands" }, { "id": "MkD0VGShAq", "title": "GazeVQA: A Video Question Answering Dataset for Multiview Eye-Gaze Task-Oriented Collaborations", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The usage of exocentric and egocentric videos in Video Question Answering (VQA) is a new endeavor in human-robot interaction and collaboration studies. Particularly for egocentric videos, one may leverage eye-gaze information to understand human intentions during the task. In this paper, we build a novel task-oriented VQA dataset, called GazeVQA, for collaborative tasks where gaze information is captured during the task process. GazeVQA is designed with a novel QA format that covers thirteen different reasoning types to capture multiple aspects of task information and user intent. For each participant, GazeVQA consists of more than 1,100 textual questions and more than 500 labeled images that were annotated with the assistance of the Segment Anything Model. In total, 2,967 video clips, 12,491 labeled images, and 25,040 questions from 22 participants were included in the dataset. Additionally, inspired by the assisting models and common ground theory for industrial task collaboration, we propose a new AI model called AssistGaze that is designed to answer the questions with three different answer types, namely textual, image, and video. AssistGaze can effectively ground the perceptual input into semantic information while reducing ambiguities. We conduct comprehensive experiments to demonstrate the challenges of GazeVQA and the effectiveness of AssistGaze.", "keywords": "video question answering;human-robot collaboration", "primary_area": "", "supplementary_material": "", "author": "Muhammet Furkan ILASLAN;Chenan Song;Joya Chen;Difei Gao;Weixian Lei;Qianli Xu;Joo Hwee Lim;Mike Zheng Shou", "authorids": "~Muhammet_Furkan_ILASLAN1;~Chenan_Song1;~Joya_Chen1;~Difei_Gao1;~Weixian_Lei2;~Qianli_Xu1;~Joo_Hwee_Lim1;~Mike_Zheng_Shou1", "gender": "M;;M;;M;M;M;", "homepage": "https://openreview.net/profile?id=~Muhammet_Furkan_ILASLAN1;;https://chenjoya.github.io/;;https://github.com/StanLei52;;;", "dblp": "362/7603.html;;247/9518;;307/5295;30/3276;236/4727;", "google_scholar": "https://scholar.google.com/citations?hl=tr;;https://scholar.google.com.ph/citations?user=IIx9dc8AAAAJ;;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com.sg/citations?user=JLpYAlQAAAAJ;;", "or_profile": "~Muhammet_Furkan_ILASLAN1;~Chenan_Song1;~Joya_Chen1;~Difei_Gao1;~Weixian_Lei2;~Qianli_Xu1;~Joo_Hwee_Lim1;~Mike_Zheng_Shou1", "aff": "National University of Singapore;National University of Singapore;National University of Singapore;;National University of Singapore;Institute for Infocomm Research, A*STAR;I2R, ASTAR;", "aff_domain": "u.nus.edu;u.nus.edu;u.nus.edu;;u.nus.edu;i2r.a-star.edu.sg;i2r.a-star.edu.sg;", "position": "PhD student;Undergrad student;PhD student;;PhD student;Researcher;Principal Researcher;", "bibtex": "@inproceedings{\nilaslan2023gazevqa,\ntitle={Gaze{VQA}: A Video Question Answering Dataset for Multiview Eye-Gaze Task-Oriented Collaborations},\nauthor={Muhammet Furkan ILASLAN and Chenan Song and Joya Chen and Difei Gao and Weixian Lei and Qianli Xu and Joo Hwee Lim and Mike Zheng Shou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=MkD0VGShAq}\n}", "github": "", "project": "", "reviewers": "doaa;3C3r;Ab7b", "site": "https://openreview.net/forum?id=MkD0VGShAq", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;5", "excitement": "3;4;4", "reproducibility": "3;2;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-3896-3091;;;;;0000-0003-0105-5903;;", "linkedin": "muhammet-furkan-ilaslan-1b698b93/;chenan-song-319aa81a4/;;;;;;", "aff_unique_index": "0;0;0;0;1;2", "aff_unique_norm": "National University of Singapore;Institute for Infocomm Research;Agency for Science, Technology and Research", "aff_unique_dep": ";;Institute for Infocomm Research", "aff_unique_url": "https://www.nus.edu.sg;https://www.i2r.a-star.edu.sg;https://www.a-star.edu.sg", "aff_unique_abbr": "NUS;I2R;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Singapore" }, { "id": "Mm5GXKvpXm", "title": "CReTIHC: Designing Causal Reasoning Tasks about Temporal Interventions and Hallucinated Confoundings", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Large language models (LLMs) have demonstrated impressive capabilities in natural language processing. However, their ability to establish causal relationships, particularly in the context of temporal interventions and language hallucinations, remains challenging. This paper presents \\textbf{CReTIHC}, a novel dataset designed to test and enhance the causal reasoning abilities of LLMs. The dataset is constructed using a unique approach that incorporates elements of verbal hallucinations and temporal interventions through the reengineering of existing causal inference datasets. This transformation creates complex scenarios that push LLMs to critically evaluate the information presented and identify cause-and-effect relationships. The CReTIHC dataset serves as a pioneering tool for improving LLM's causal inference capabilities, paving the way for a more nuanced understanding of causal relationships in natural language processing (NLP) tasks. The whole dataset is publicly accessible at: (https://github.com/ChangwooChun/CReTIHC)", "keywords": "Causality;Commonsense reasoning;Large Language Models;Temporal Interventions;Hallucinated Confoundings", "primary_area": "", "supplementary_material": "", "author": "Changwoo Chun;SongEun Lee;Jaehyung Seo;Heuiseok Lim", "authorids": "~Changwoo_Chun1;~SongEun_Lee1;~Jaehyung_Seo1;~Heuiseok_Lim1", "gender": "M;F;M;M", "homepage": "https://github.com/ChangwooChun;https://www.linkedin.com/in/songeun-lee-1332b5213/;https://j-seo.github.io/;http://nlp.korea.ac.kr", "dblp": "331/3048;;298/7721;127/4881", "google_scholar": "https://scholar.google.co.kr/citations?user=uIWB0vkAAAAJ;;V8bFAUIAAAAJ;HMTkz7oAAAAJ", "or_profile": "~Changwoo_Chun1;~SongEun_Lee1;~Jaehyung_Seo1;~Heuiseok_Lim1", "aff": "Hyundai Motor Company;Hyundai Motor Company;Korea University;Korea University", "aff_domain": "hyundai.com;hyundai.com;korea.ac.kr;korea.ac.kr", "position": "Researcher;Researcher;PhD student;Full Professor", "bibtex": "@inproceedings{\nchun2023cretihc,\ntitle={{CR}e{TIHC}: Designing Causal Reasoning Tasks about Temporal Interventions and Hallucinated Confoundings},\nauthor={Changwoo Chun and SongEun Lee and Jaehyung Seo and Heuiseok Lim},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Mm5GXKvpXm}\n}", "github": "", "project": "", "reviewers": "eRnj;NqUz;EU9a", "site": "https://openreview.net/forum?id=Mm5GXKvpXm", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "4;3;3", "reproducibility": "2;3;4", "correctness": "3;3;2", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-4761-9818;", "linkedin": "changwoo-chun/;;jaehyungseo-datascientist/?originalSubdomain=kr;", "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Hyundai Motor Company;Korea University", "aff_unique_dep": ";", "aff_unique_url": "https://www.hyundai.com;https://www.korea.ac.kr", "aff_unique_abbr": "HMC;KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "MmBjKmHIND", "title": "Synthetic Data Generation with Large Language Models for Text Classification: Potential and Limitations", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The collection and curation of high-quality training data is crucial for developing text classification models with superior performance, but it is often associated with significant costs and time investment. Researchers have recently explored using large language models (LLMs) to generate synthetic datasets as an alternative approach. However, the effectiveness of the LLM-generated synthetic data in supporting model training is inconsistent across different classification tasks. To better understand factors that moderate the effectiveness of the LLM-generated synthetic data, in this study, we look into how the performance of models trained on these synthetic data may vary with the $\\textit{subjectivity}$ of classification. Our results indicate that subjectivity, at both the task level and instance level, is negatively associated with the performance of the model trained on synthetic data. We conclude by discussing the implications of our work on the potential and limitations of leveraging LLM for synthetic data generation.", "keywords": "Synthetic Data Generation;Data Augmentation;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "Zhuoyan Li;Hangxiao Zhu;Zhuoran Lu;Ming Yin", "authorids": "~Zhuoyan_Li2;~Hangxiao_Zhu1;~Zhuoran_Lu1;~Ming_Yin2", "gender": ";M;M;", "homepage": "https://xfleezy.github.io/zhuoyanli/;;https://zhuoranlu.github.io;http://mingyin.org/", "dblp": ";358/9642.html;;", "google_scholar": ";;;J8ei9I0AAAAJ", "or_profile": "~Zhuoyan_Li2;~Hangxiao_Zhu1;~Zhuoran_Lu1;~Ming_Yin2", "aff": "Purdue University;Washington University, Saint Louis;, Purdue University;Purdue University", "aff_domain": "purdue.edu;wustl.edu;cs.purdue.edu;purdue.edu", "position": "PhD student;MS student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nli2023synthetic,\ntitle={Synthetic Data Generation with Large Language Models for Text Classification: Potential and Limitations},\nauthor={Zhuoyan Li and Hangxiao Zhu and Zhuoran Lu and Ming Yin},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=MmBjKmHIND}\n}", "github": "", "project": "", "reviewers": "uLMj;PuaF;wx6Z;M41S", "site": "https://openreview.net/forum?id=MmBjKmHIND", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "4;4;4;3", "excitement": "4;3;4;3", "reproducibility": "4;5;4;4", "correctness": "2;4;4;4", "rating_avg": 5.0, "confidence_avg": 3.75, "excitement_avg": 3.5, "reproducibility_avg": 4.25, "correctness_avg": 3.5, "replies_avg": 13, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-7364-139X", "linkedin": ";hangxiao-zhu-7364711a9/;;", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Purdue University;Washington University in St. Louis", "aff_unique_dep": ";", "aff_unique_url": "https://www.purdue.edu;https://wustl.edu", "aff_unique_abbr": "Purdue;WUSTL", "aff_campus_unique_index": "1", "aff_campus_unique": ";Saint Louis", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "MnPnE4xV0H", "title": "Pretraining Language Models with Text-Attributed Heterogeneous Graphs", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In many real-world scenarios (e.g., academic networks, social platforms), different types of entities are not only associated with texts but also connected by various relationships, which can be abstracted as Text-Attributed Heterogeneous Graphs (TAHGs). Current pretraining tasks for Language Models (LMs) primarily focus on separately learning the textual information of each entity and overlook the crucial aspect of capturing topological connections among entities in TAHGs. In this paper, we present a new pretraining framework for LMs that explicitly considers the topological and heterogeneous information in TAHGs. Firstly, we define a context graph as neighborhoods of a target node within specific orders and propose a topology-aware pretraining task to predict nodes involved in the context graph by jointly optimizing an LM and an auxiliary heterogeneous graph neural network. Secondly, based on the observation that some nodes are text-rich while others have little text, we devise a text augmentation strategy to enrich textless nodes with their neighbors' texts for handling the imbalance issue. We conduct link prediction and node classification tasks on three datasets from various domains. Experimental results demonstrate the superiority of our approach over existing methods and the rationality of each design. Our code is available at https://github.com/Hope-Rita/THLM.", "keywords": "language model pretraining;text-attributed heterogeneous graphs;graph neural networks", "primary_area": "", "supplementary_material": "", "author": "Tao Zou;Le Yu;Yifei Huang;Leilei Sun;Bowen Du", "authorids": "~Tao_Zou1;~Le_Yu2;~Yifei_Huang5;~Leilei_Sun2;~Bowen_Du2", "gender": ";M;F;M;M", "homepage": ";https://yule-buaa.github.io/;https://github.com/yifeiHuang623;https://scse.buaa.edu.cn/info/1079/9207.htm;http://scse.buaa.edu.cn/info/1387/8141.htm", "dblp": "61/6876-3.html;23/7122-4;71/8763-3.html;152/1810.html;", "google_scholar": "U-yUt08AAAAJ;-h_ehVsAAAAJ;;QVHvhM4AAAAJ;oEt7RiIAAAAJ", "or_profile": "~Tao_Zou1;~Le_Yu2;~Yifei_Huang5;~Leilei_Sun2;~Bowen_Du2", "aff": "Beihang University;Beihang University;Beihang University;Beihang University;Beihang University", "aff_domain": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn", "position": "PhD student;PhD student;Undergrad student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzou2023pretraining,\ntitle={Pretraining Language Models with Text-Attributed Heterogeneous Graphs},\nauthor={Tao Zou and Le Yu and Yifei Huang and Leilei Sun and Bowen Du},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=MnPnE4xV0H}\n}", "github": "", "project": "", "reviewers": "v8Uk;xFg8;m9df", "site": "https://openreview.net/forum?id=MnPnE4xV0H", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;3;3", "reproducibility": "4;4;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-4964-0274;0000-0002-4908-3199;0000-0002-8645-2811;0000-0002-0157-1716;0000-0003-0975-2367", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Beihang University", "aff_unique_dep": "", "aff_unique_url": "http://www.buaa.edu.cn/", "aff_unique_abbr": "BUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "MoEfm3iPMy", "title": "Self-Knowledge Guided Retrieval Augmentation for Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models (LLMs) have shown superior performance without task-specific fine-tuning. Despite the success, the knowledge stored in the parameters of LLMs could still be incomplete and difficult to update due to the computational costs. As complementary, retrieval-based methods can offer non-parametric world knowledge and improve the performance on tasks such as question answering. However, we find that the retrieved knowledge does not always help and even has a negative impact on original responses occasionally. To better make use of both internal knowledge and external world knowledge, we investigate eliciting the model's ability to recognize what they know and do not know (which is also called \"self-knowledge'') and propose Self-Knowledge guided Retrieval augmentation (SKR), a simple yet effective method which can let LLMs refer to the questions they have previously encountered and adaptively call for external resources when dealing with new questions. We evaluate SKR on multiple datasets and demonstrate that it outperforms chain-of-thought based and fully retrieval-based methods by using either InstructGPT or ChatGPT.", "keywords": "self-knowledge;retrieval augmentation;large language models", "primary_area": "", "supplementary_material": "", "author": "Yile Wang;Peng Li;Maosong Sun;Yang Liu", "authorids": "~Yile_Wang1;~Peng_Li2;~Maosong_Sun1;~Yang_Liu19", "gender": "M;M;M;M", "homepage": "https://ylwangy.github.io/;http://www.lpeng.net/;https://www.cs.tsinghua.edu.cn/csen/info/1312/4394.htm;http://nlp.csai.tsinghua.edu.cn/~ly/", "dblp": "32/1915-1.html;83/6353-30;95/3291-1;51/3710-5", "google_scholar": "v1YnW6gAAAAJ;hgYzkOQAAAAJ;https://scholar.google.com.tw/citations?user=zIgT0HMAAAAJ;https://scholar.google.com.hk/citations?user=lVhoKNcAAAAJ", "or_profile": "~Yile_Wang1;~Peng_Li2;~Maosong_Sun1;~Yang_Liu19", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "Postdoc;Associate Professor;Full Professor;Professor", "bibtex": "@inproceedings{\nwang2023selfknowledge,\ntitle={Self-Knowledge Guided Retrieval Augmentation for Large Language Models},\nauthor={Yile Wang and Peng Li and Maosong Sun and Yang Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=MoEfm3iPMy}\n}", "github": "", "project": "", "reviewers": "spcX;w5Ym;KHre;jnMH", "site": "https://openreview.net/forum?id=MoEfm3iPMy", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;3;4;4", "excitement": "3;3;4;4", "reproducibility": "4;4;3;3", "correctness": "3;3;3;3", "rating_avg": 3.0, "confidence_avg": 3.75, "excitement_avg": 3.5, "reproducibility_avg": 3.5, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-1374-5979;;0000-0002-3087-242X", "linkedin": ";;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "Mq5cyRMGlD", "title": "VERVE: Template-based ReflectiVE Rewriting for MotiVational IntErviewing", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Reflective listening is a fundamental skill that counselors must acquire to achieve proficiency in motivational interviewing (MI). It involves responding in a manner that acknowledges and explores the meaning of what the client has expressed in the conversation. In this work, we introduce the task of counseling response rewriting, which transforms non-reflective statements into reflective responses. We introduce VERVE, a template-based rewriting system with paraphrase-augmented training and adaptive template updating. VERVE first creates a template by identifying and filtering out tokens that are not relevant to reflections and constructs a reflective response using the template. Paraphrase-augmented training allows the model to learn less-strict fillings of masked spans, and adaptive template updating helps discover effective templates for rewriting without significantly removing the original content. Using both automatic and human evaluations, we compare our method against text rewriting baselines and show that our framework is effective in turning non-reflective statements into more reflective responses while achieving a good content preservation-reflection style trade-off.", "keywords": "motivational interviewing;rewriting;counseling", "primary_area": "", "supplementary_material": "", "author": "Do June Min;Veronica Perez-Rosas;Ken Resnicow;Rada Mihalcea", "authorids": "~Do_June_Min1;~Veronica_Perez-Rosas1;~Ken_Resnicow1;~Rada_Mihalcea1", "gender": "M;F;M;F", "homepage": "https://mindojune.github.io/;;https://sph.umich.edu;https://web.eecs.umich.edu/~mihalcea/", "dblp": "279/5395;53/9684.html;;m/RadaMihalcea", "google_scholar": ";https://scholar.google.com/?authuser=1;;https://scholar.google.com.tw/citations?user=UetM7FgAAAAJ", "or_profile": "~Do_June_Min1;~Veronica_Perez-Rosas1;~Ken_Resnicow1;~Rada_Mihalcea1", "aff": "University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan", "aff_domain": "umich.edu;umich.edu;umich.edu;umich.edu", "position": "PhD student;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nmin2023verve,\ntitle={{VERVE}: Template-based Reflecti{VE} Rewriting for MotiVational IntErviewing},\nauthor={Do June Min and Veronica Perez-Rosas and Ken Resnicow and Rada Mihalcea},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Mq5cyRMGlD}\n}", "github": "", "project": "", "reviewers": "AcJk;DEMb;LWEi", "site": "https://openreview.net/forum?id=Mq5cyRMGlD", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;4;5", "excitement": "4;3;2", "reproducibility": "4;3;0", "correctness": "4;2;2", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 2.6666666666666665, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-0767-6703", "linkedin": ";;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "Mte6BK69zv", "title": "Unraveling Downstream Gender Bias from Large Language Models: A Study on AI Educational Writing Assistance", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large Language Models (LLMs) are increasingly utilized in educational tasks such as providing writing suggestions to students. Despite their potential, LLMs are known to harbor inherent biases which may negatively impact learners. Previous studies have investigated bias in models and data representations separately, neglecting the potential impact of LLM bias on human writing. In this paper, we investigate how bias transfers through an AI writing support pipeline. We conduct a large-scale user study with 231 students writing business case peer reviews in German. Students are divided into five groups with different levels of writing support: one in-classroom group with recommender system feature-based suggestions and four groups recruited from Prolific -- a control group with no assistance, two groups with suggestions from fine-tuned GPT-2 and GPT-3 models, and one group with suggestions from pre-trained GPT-3.5. Using GenBit gender bias analysis and Word Embedding Association Tests (WEAT), we evaluate the gender bias at various stages of the pipeline: in reviews written by students, in suggestions generated by the models, and in model embeddings directly. Our results demonstrate that there is no significant difference in gender bias between the resulting peer reviews of groups with and without LLM suggestions. Our research is therefore optimistic about the use of AI writing support in the classroom, showcasing a context where bias in LLMs does not transfer to students\u2019 responses.", "keywords": "Large Language Models;Gender Bias;Writing Support;Human-AI Collaboration", "primary_area": "", "supplementary_material": "", "author": "Thiemo Wambsganss;Xiaotian Su;Vinitra Swamy;Seyed Parsa Neshaei;Roman Rietsche;Tanja K\u00e4ser", "authorids": "~Thiemo_Wambsganss1;~Xiaotian_Su1;~Vinitra_Swamy1;~Seyed_Parsa_Neshaei1;~Roman_Rietsche1;~Tanja_K\u00e4ser1", "gender": "M;;F;;;F", "homepage": "https://thiemowa.github.io/;https://www.linkedin.com/in/xiaotiansu/;http://vinitra.github.io;http://spneshaei.com;https://ai-for-education.com/our-team/;https://www.epfl.ch/labs/ml4ed/", "dblp": ";;221/3628;319/4241;;95/11458.html", "google_scholar": "https://scholar.google.de/citations?user=4fsjAjoAAAAJ;CbftQaoAAAAJ;SX9GAqwAAAAJ;;https://scholar.google.ch/citations?user=0FOGvPwAAAAJ;Uexe7SkAAAAJ", "or_profile": "~Thiemo_Wambsganss1;~Xiaotian_Su1;~Vinitra_Swamy1;~Seyed_Parsa_Neshaei1;~Roman_Rietsche1;~Tanja_K\u00e4ser1", "aff": "EPFL - EPF Lausanne;EPFL - EPF Lausanne;Swiss Federal Institute of Technology Lausanne;Sharif University of Technology;Universit\u00e4t St. Gallen;EPFL", "aff_domain": "epfl.ch;epfl.ch;epfl.ch;sharif.edu;unisg.ch;ic.epfl.ch", "position": "Postdoc;MS student;PhD student;Undergrad student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nwambsganss2023unraveling,\ntitle={Unraveling Downstream Gender Bias from Large Language Models: A Study on {AI} Educational Writing Assistance},\nauthor={Thiemo Wambsganss and Xiaotian Su and Vinitra Swamy and Seyed Parsa Neshaei and Roman Rietsche and Tanja K{\\\"a}ser},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Mte6BK69zv}\n}", "github": "", "project": "", "reviewers": "YUR3;TWkC;bbvs", "site": "https://openreview.net/forum?id=Mte6BK69zv", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "3;3;3", "reproducibility": "2;3;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-7440-9357;;;;0000-0002-6112-1709;0000-0003-0672-0415", "linkedin": ";xiaotiansu/;vinitra;;;", "aff_unique_index": "0;0;1;2;3;0", "aff_unique_norm": "EPFL;Swiss Federal Institute of Technology Lausanne;Sharif University of Technology;University of St. Gallen", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch;https://www.sharif.edu;https://www.unisg.ch", "aff_unique_abbr": "EPFL;EPFL;SUT;HSG", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "Switzerland;Iran" }, { "id": "Mtgbc9XFPU", "title": "Pre-training Intent-Aware Encoders for Zero- and Few-Shot Intent Classification", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Intent classification (IC) plays an important role in task-oriented dialogue systems. However, IC models often generalize poorly when training without sufficient annotated examples for each user intent. We propose a novel pre-training method for text encoders that uses contrastive learning with intent psuedo-labels to produce embeddings that are well-suited for IC tasks, reducing the need for manual annotations. By applying this pre-training strategy, we also introduce Pre-trained Intent-aware Encoder (PIE), which is designed to align encodings of utterances with their intent names. Specifically, we first train a tagger to identify key phrases within utterances that are crucial for interpreting intents. We then use these extracted phrases to create examples for pre-training a text encoder in a contrastive manner. As a result, our PIE model achieves up to 5.4% and 4.0% higher accuracy than the previous state-of-the-art pre-trained text encoder for the N-way zero- and one-shot settings on four IC datasets.", "keywords": "intent classification;task-oriented dialogue system", "primary_area": "", "supplementary_material": "", "author": "Mujeen Sung;James Gung;Elman Mansimov;Nikolaos Pappas;Raphael Shu;Salvatore Romeo;Yi Zhang;Vittorio Castelli", "authorids": "~Mujeen_Sung1;~James_Gung1;~Elman_Mansimov2;~Nikolaos_Pappas1;~Raphael_Shu2;~Salvatore_Romeo2;~Yi_Zhang13;~Vittorio_Castelli1", "gender": "M;M;M;M;M;M;M;M", "homepage": ";https://jgung.github.io/about/;http://mansimov.io/;http://nik0spapp.github.io/;https://nlper.com/raphael_shu;;;", "dblp": "243/4157;116/0530;159/1853;36/8968-2.html;;135/3229;64/6544-3;c/VittorioCastelli", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;Xqc8wk0AAAAJ;znVElZIAAAAJ;https://scholar.google.ch/citations?user=daiFj_cAAAAJ;https://scholar.google.co.jp/citations?user=qT2aZtsAAAAJ;https://scholar.google.com/citations?hl=en;sxs6h_wAAAAJ;d-lg1lEAAAAJ", "or_profile": "~Mujeen_Sung1;~James_Gung1;~Elman_Mansimov2;~Nikolaos_Pappas1;~Raphael_Shu2;~Salvatore_Romeo2;~Yi_Zhang13;~Vittorio_Castelli1", "aff": "Korea University;AWS AI Labs;Amazon;AWS AI Labs;Amazon;Amazon;Amazon;Amazon", "aff_domain": "korea.ac.kr;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com", "position": "PhD student;Researcher;Researcher;Researcher;Researcher;Senior Applied Scientist;Principal Researcher;Senior Science Manager", "bibtex": "@inproceedings{\nsung2023pretraining,\ntitle={Pre-training Intent-Aware Encoders for Zero- and Few-Shot Intent Classification},\nauthor={Mujeen Sung and James Gung and Elman Mansimov and Nikolaos Pappas and Raphael Shu and Salvatore Romeo and Yi Zhang and Vittorio Castelli},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Mtgbc9XFPU}\n}", "github": "", "project": "", "reviewers": "QwHo;Tq8c;qFW1;wRCq", "site": "https://openreview.net/forum?id=Mtgbc9XFPU", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;4;4;3", "excitement": "3;4;4;4", "reproducibility": "4;4;4;4", "correctness": "4;3;3;4", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 3.75, "reproducibility_avg": 4.0, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-2004-8111;;;;", "linkedin": ";;;nik0spapp/;;;;vittorio-castelli-3449604/", "aff_unique_index": "0;1;1;1;1;1;1;1", "aff_unique_norm": "Korea University;Amazon", "aff_unique_dep": ";AWS AI Labs", "aff_unique_url": "https://www.korea.ac.kr;https://aws.amazon.com", "aff_unique_abbr": "KU;AWS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1;1;1", "aff_country_unique": "South Korea;United States" }, { "id": "MxhTQC9AYV", "title": "RealBehavior: A Framework for Faithfully Characterizing Foundation Models\u2019 Human-like Behavior Mechanisms", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Reports of human-like behaviors in foundation models are growing, with psychological theories providing enduring tools to investigate these behaviors. However, current research tends to directly apply these human-oriented tools without verifying the faithfulness of their outcomes. In this paper, we introduce a framework, RealBehavior, which is designed to characterize the humanoid behaviors of models faithfully. Beyond simply measuring behaviors, our framework assesses the faithfulness of results based on reproducibility, internal and external consistency, and generalizability. Our findings suggest that a simple application of psychological tools cannot faithfully characterize all human-like behaviors. Moreover, we discuss the impacts of aligning models with human and social values, arguing for the necessity of diversifying alignment objectives to prevent the creation of models with restricted characteristics.", "keywords": "Foundation Model;Human-like Behavior;Faithfulness", "primary_area": "", "supplementary_material": "", "author": "Enyu Zhou;Rui Zheng;Zhiheng Xi;Songyang Gao;Xiaoran Fan;Zichu Fei;Jingting Ye;Tao Gui;Qi Zhang;Xuanjing Huang", "authorids": "~Enyu_Zhou1;~Rui_Zheng1;~Zhiheng_Xi1;~Songyang_Gao1;~Xiaoran_Fan2;~Zichu_Fei1;~Jingting_Ye1;~Tao_Gui1;~Qi_Zhang8;~Xuanjing_Huang1", "gender": "F;M;;M;;;M;M;F;M", "homepage": "https://zhou-zoey.github.io;https://github.com/ruizheng20;https://woooodyy.github.io/;;;https://www.eva.mpg.de/linguistic-and-cultural-evolution/staff/jingting-ye/;;http://qizhang.info;https://xuanjing-huang.github.io/;", "dblp": ";;333/4268;314/6067;254/1528;336/7632;135/6973;52/323-1;05/6735-1;197/0141", "google_scholar": "gWs_6egAAAAJ;https://scholar.google.com.hk/citations?user=7Z0V_SoAAAAJ;https://scholar.google.com.hk/citations?user=zSVLkqAAAAAJ;O42mLrsAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;NO0tprQAAAAJ;;XfqR3yYAAAAJ;RGsMgZA4H78C;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Enyu_Zhou1;~Rui_Zheng1;~Zhiheng_Xi1;~Songyang_Gao1;~Zichu_Fei1;~Jingting_Ye1;~Tao_Gui1;~Qi_Zhang8;~Xuanjing_Huang1;~Xiaoran_Fan3", "aff": "Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Chinese Academy of Sciences", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;ict.ac.cn", "position": "Undergrad student;PhD student;PhD student;MS student;PhD student;Associate Professor;Assistant Professor;Full Professor;Full Professor;MS student", "bibtex": "@inproceedings{\nzhou2023realbehavior,\ntitle={RealBehavior: A Framework for Faithfully Characterizing Foundation Models{\\textquoteright} Human-like Behavior Mechanisms},\nauthor={Enyu Zhou and Rui Zheng and Zhiheng Xi and Songyang Gao and Xiaoran Fan and Zichu Fei and Jingting Ye and Tao Gui and Qi Zhang and Xuanjing Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=MxhTQC9AYV}\n}", "github": "", "project": "", "reviewers": "3sUb;8Ed2;WqcD", "site": "https://openreview.net/forum?id=MxhTQC9AYV", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;5;3", "excitement": "4;3;4", "reproducibility": "4;4;5", "correctness": "3;2;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 10, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;;0000-0001-9197-9426;", "linkedin": ";;;;;;;;;", "aff_unique_index": "0;0;0;0;0;0;0;0;0;1", "aff_unique_norm": "Fudan University;Chinese Academy of Sciences", "aff_unique_dep": ";", "aff_unique_url": "https://www.fudan.edu.cn;https://www.cas.cn", "aff_unique_abbr": "Fudan;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "My6Rgv7xXV", "title": "Contextual Interaction for Argument Post Quality Assessment", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recently, there has been an increased emphasis on assessing the quality of natural language arguments. Existing approaches primarily focus on evaluating the quality of individual argument posts. However, they often fall short when it comes to effectively distinguishing arguments that possess a narrow quality margin. To address this limitation, this paper delves into two alternative methods for modeling the relative quality of different arguments. These approaches include: 1) Supervised contrastive learning that captures the intricate interactions between arguments. By incorporating this approach, we aim to enhance the assessment of argument quality by effectively distinguishing between arguments with subtle differences in quality. 2) Large language models (LLMs) with in-context examples that harness the power of LLMs and enrich them with in-context examples. Through extensive evaluation and analysis on the publicly available IBM-Rank-30k dataset, we demonstrate the superiority of our contrastive argument quality assessment approach over state-of-the-art baselines. On the other hand, while LLMs with in-context examples showcase a commendable ability to identify high-quality argument posts, they exhibit relatively limited efficacy in discerning between argument posts with a narrow quality gap.", "keywords": "argument;argument quality;contrastive learning;large language models", "primary_area": "", "supplementary_material": "", "author": "Yiran Wang;Xuanang Chen;Ben He;Le Sun", "authorids": "~Yiran_Wang5;~Xuanang_Chen1;~Ben_He1;~Le_Sun1", "gender": ";M;M;M", "homepage": "http://www.icip.org.cn/team/wangyiran/;;http://people.ucas.ac.cn/~benhe;http://www.icip.org.cn/team/sunle/", "dblp": ";260/2156;;78/5897-1", "google_scholar": ";kFEZBOkAAAAJ;https://scholar.google.com/citations?view_op=list_works;6bFNhtwAAAAJ", "or_profile": "~Yiran_Wang5;~Xuanang_Chen1;~Ben_He1;~Le_Sun1", "aff": "University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;Institute of Software, Chinese Academy of Sciences", "aff_domain": "ucas.ac.cn;ucas.ac.cn;ucas.ac.cn;iscas.ac.cn", "position": "PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwang2023contextual,\ntitle={Contextual Interaction for Argument Post Quality Assessment},\nauthor={Yiran Wang and Xuanang Chen and Ben He and Le Sun},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=My6Rgv7xXV}\n}", "github": "", "project": "", "reviewers": "MHeS;eHo2;gESR", "site": "https://openreview.net/forum?id=My6Rgv7xXV", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;3", "excitement": "4;4;4", "reproducibility": "5;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Chinese Academy of Sciences;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Software", "aff_unique_url": "http://www.ucas.ac.cn;http://www.ios.ac.cn", "aff_unique_abbr": "UCAS;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "MyTyc69kKK", "title": "TSTR: Target Similarity Tuning Meets the Real World", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Target similarity tuning (TST) is a method of selecting relevant examples in natural language (NL) to code generation through large language models (LLMs) to improve performance. Its goal is to adapt a sentence embedding model to have the similarity between two NL inputs match the similarity between their associated code outputs. In this paper, we propose different methods to apply and improve TST in the real world. First, we replace the sentence transformer with embeddings from a larger model, which reduces sensitivity to the language distribution and thus provides more flexibility in synthetic generation of examples, and we train a tiny model that transforms these embeddings to a space where embedding similarity matches code similarity, which allows the model to remain a black box and only requires a few matrix multiplications at inference time. Second, we how to efficiently select a smaller number of training examples to train the TST model. Third, we introduce a ranking-based evaluation for TST that does not require end-to-end code generation experiments, which can be expensive to perform.", "keywords": "prompt engineering;code generation;target similarity tuning;example selection", "primary_area": "", "supplementary_material": "", "author": "Anirudh Khatry;Sumit Gulwani;Priyanshu Gupta;Vu Le;Mukul Singh;Ananya Singha;Gust Verbruggen", "authorids": "~Anirudh_Khatry1;~Sumit_Gulwani1;~Priyanshu_Gupta1;~Vu_Le2;~Mukul_Singh1;~Ananya_Singha1;~Gust_Verbruggen1", "gender": "M;M;M;M;F;M;M", "homepage": "https://www.microsoft.com/en-us/research/people/sumitg/;https://www.microsoft.com/en-us/research/people/priyansgupta/;https://www.vuminhle.com/;https://www.microsoft.com/research/people/singhmukul;https://www.linkedin.com/in/ananya-singha-1608;;https://anirudhkhatry.com", "dblp": "g/SumitGulwani;155/3225;00/2651-2.html;291/1609;;;318/1060", "google_scholar": "fZinJ_AAAAAJ;D7fTw_YAAAAJ;mijlpU4AAAAJ;3O7KjiIAAAAJ;;TmU3sKMAAAAJ;vE2VuVwAAAAJ", "or_profile": "~Sumit_Gulwani1;~Priyanshu_Gupta1;~Vu_Le2;~Mukul_Singh1;~Ananya_Singha1;~Gust_Verbruggen1;~Anirudh_Vishal_Khatry1", "aff": "Microsoft Research;Microsoft;Microsoft;Microsoft;Microsoft Research;KU Leuven;Microsoft", "aff_domain": "research.microsoft.com;microsoft.com;microsoft.com;microsoft.com;research.microsoft.com;kuleuven.be;microsoft.com", "position": "Researcher;Researcher;Researcher;Researcher;Intern;PhD student;Research Fellow", "bibtex": "@inproceedings{\nkhatry2023tstr,\ntitle={{TSTR}: Target Similarity Tuning Meets the Real World},\nauthor={Anirudh Khatry and Sumit Gulwani and Priyanshu Gupta and Vu Le and Mukul Singh and Ananya Singha and Gust Verbruggen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=MyTyc69kKK}\n}", "github": "", "project": "", "reviewers": "pWu8;TieY;cbui", "site": "https://openreview.net/forum?id=MyTyc69kKK", "pdf_size": 0, "rating": "2;2;2", "confidence": "2;3;4", "excitement": "4;3;3", "reproducibility": "2;4;3", "correctness": "3;4;3", "rating_avg": 2.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-9226-9634;0000-0002-5599-5004;0000-0003-3727-3291;0000-0001-9510-4512;;;0009-0004-7773-4405", "linkedin": "sumit-gulwani/;priyanshu-gupta-42000;;mukulsingh105/;;;anirudh-khatry/", "aff_unique_index": "0;0;0;0;0;1;0", "aff_unique_norm": "Microsoft;Katholieke Universiteit Leuven", "aff_unique_dep": "Microsoft Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.kuleuven.be", "aff_unique_abbr": "MSR;KU Leuven", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;0", "aff_country_unique": "United States;Belgium" }, { "id": "MzDakXdBbM", "title": "Can Large Language Models Fix Data Annotation Errors? An Empirical Study Using Debatepedia for Query-Focused Text Summarization", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Debatepedia is a publicly available dataset consisting of arguments and counter-arguments on controversial topics that has been widely used for the single-document query-focused abstractive summarization task in recent years. However, it has been recently found that this dataset is limited by noise and even most queries in this dataset do not have any relevance to the respective document. In this paper, we study whether large language models (LLMs) can be utilized to clean the Debatepedia dataset to make it suitable for query-focused abstractive summarization. More specifically, we harness the language generation capabilities of two LLMs, namely, ChatGPT and PaLM to regenerate its queries. Based on our experiments, we find that solely depending on large language models for query correction may not be very useful for data cleaning. However, we observe that leveraging a rule-based approach for\ndata sampling followed by query regeneration using LLMs (especially ChatGPT) for the sampled instances may ensure a higher quality version of this dataset suitable for the development of more generalized query-focused text summarization models.", "keywords": "ChatGPT;PaLM;Large Language Models;Query Focused Abstractive Text Summarization", "primary_area": "", "supplementary_material": "", "author": "Md Tahmid Rahman Laskar;Mizanur Rahman;Israt Jahan;Enamul Hoque;Jimmy Huang", "authorids": "~Md_Tahmid_Rahman_Laskar2;~Mizanur_Rahman1;~Israt_Jahan1;~Enamul_Hoque2;~Jimmy_Huang1", "gender": "M;M;F;;M", "homepage": "https://sites.google.com/view/tahmedge/home;;https://www.researchgate.net/profile/Israt-Jahan-12;https://www.yorku.ca/enamulh/;https://www.yorku.ca/jhuang", "dblp": "250/6292;;;71/4476.html;h/XiangjiHuang.html", "google_scholar": "qpnsWPoAAAAJ;SzJtFg8AAAAJ;;https://scholar.google.ca/citations?user=NySeLFcAAAAJ;https://scholar.google.ca/citations?user=EBeIYOwAAAAJ", "or_profile": "~Md_Tahmid_Rahman_Laskar2;~Mizanur_Rahman1;~Israt_Jahan1;~Enamul_Hoque2;~Jimmy_Huang1", "aff": "Dialpad Inc. ;Royal Bank of Canada;York University;York University;York University", "aff_domain": "dialpad.com;rbc.com;yorku.ca;yorku.ca;yorku.ca", "position": "Applied Scientist;Researcher;MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nlaskar2023can,\ntitle={Can Large Language Models Fix Data Annotation Errors? An Empirical Study Using Debatepedia for Query-Focused Text Summarization},\nauthor={Md Tahmid Rahman Laskar and Mizanur Rahman and Israt Jahan and Enamul Hoque and Jimmy Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=MzDakXdBbM}\n}", "github": "", "project": "", "reviewers": "UJ5T;2X9Y;8uYK", "site": "https://openreview.net/forum?id=MzDakXdBbM", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;3;3", "reproducibility": "4;3;5", "correctness": "4;3;2", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0003-1292-1491", "linkedin": "tahmedge/;m-mizanur/;;;", "aff_unique_index": "0;1;2;2;2", "aff_unique_norm": "Dialpad Inc.;Royal Bank of Canada;York University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.dialpad.com;https://www.rbc.com;https://www.yorku.ca", "aff_unique_abbr": "Dialpad;RBC;York U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "United States;Canada" }, { "id": "N3a2vVk8vu", "title": "Hierarchical Prompting Assists Large Language Model on Web Navigation", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Large language models (LLMs) struggle on processing complicated observations in interactive decision making. To alleviate this issue, we propose a simple hierarchical prompting approach. Diverging from previous prompting approaches that always put the full observation (a web page) to the prompt, we propose to first construct an action-aware observation which is more condensed and relevant with a dedicated Summarizer prompt. The Actor prompt then predicts the next action based on the summarized history. While our method has broad applicability, we particularly demonstrate its efficacy in the complex domain of web navigation where a full observation often contains redundant and irrelevant information. Our approach outperforms the previous state-of-the-art prompting mechanism with the same LLM by 6.2% on task success rate, demonstrating its potential on interactive decision making tasks with long observation traces.", "keywords": "LLM Prompting;Web Navigation", "primary_area": "", "supplementary_material": "", "author": "Abishek Sridhar;Robert Lo;Frank F. Xu;Hao Zhu;Shuyan Zhou", "authorids": "~Abishek_Sridhar1;~Robert_Lo1;~Frank_F._Xu1;~Hao_Zhu1;~Shuyan_Zhou1", "gender": "M;;M;M;Non-Binary", "homepage": ";https://robertlo.tech;https://frankxfz.me/;http://www.zhuhao.me;https://shuyanzhou.github.io/", "dblp": ";;190/4519;10/3520-6;", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;1hXyfIkAAAAJ;-3yFcsMAAAAJ;t6YzEpgAAAAJ", "or_profile": "~Abishek_Sridhar1;~Robert_Lo1;~Frank_F._Xu1;~Hao_Zhu1;~Shuyan_Zhou1", "aff": "School of Computer Science, Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cs.cmu.edu;cmu.edu;cmu.edu;cmu.edu;cs.cmu.edu", "position": "MS student;MS student;PhD student;PhD student;PhD student", "bibtex": "@inproceedings{\nsridhar2023hierarchical,\ntitle={Hierarchical Prompting Assists Large Language Model on Web Navigation},\nauthor={Abishek Sridhar and Robert Lo and Frank F. Xu and Hao Zhu and Shuyan Zhou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=N3a2vVk8vu}\n}", "github": "", "project": "", "reviewers": "KCDF;g69r;NBxP", "site": "https://openreview.net/forum?id=N3a2vVk8vu", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;3", "excitement": "2;3;3", "reproducibility": "3;4;3", "correctness": "2;3;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "abishek-sridhar5/;robert1003/;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "School of Computer Science", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "0", "aff_campus_unique": "Pittsburgh;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "N4VUOeVOfS", "title": "Hidding the Ghostwriters: An Adversarial Evaluation of AI-Generated Student Essay Detection", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) have exhibited remarkable capabilities in text generation tasks. However, the utilization of these models carries inherent risks, including but not limited to plagiarism, the dissemination of fake news, and issues in educational exercises. Although several detectors have been proposed to address these concerns, their effectiveness against adversarial perturbations, specifically in the context of student essay writing, remains largely unexplored. This paper aims to bridge this gap by constructing AIG-ASAP, an AI-generated student essay dataset, employing a range of text perturbation methods that are expected to generate high-quality essays while evading detection. Through empirical experiments, we assess the performance of current AIGC detectors on the AIG-ASAP dataset. The results reveal that the existing detectors can be easily circumvented using straightforward automatic adversarial attacks. Specifically, we explore word substitution and sentence substitution perturbation methods that effectively evade detection while maintaining the quality of the generated essays. This highlights the urgent need for more accurate and robust methods to detect AI-generated student essays in the education domain. Code and data are released for public use.", "keywords": "AIGC detection;AI-generated student essay;education", "primary_area": "", "supplementary_material": "", "author": "Xinlin Peng;Ying Zhou;Ben He;Le Sun;Yingfei Sun", "authorids": "~Xinlin_Peng1;~Ying_Zhou5;~Ben_He1;~Le_Sun1;~Yingfei_Sun1", "gender": "F;M;M;M;M", "homepage": "https://xinlinpeng.github.io/;;http://people.ucas.ac.cn/~benhe;http://www.icip.org.cn/team/sunle/;http://people.ucas.ac.cn/~yfsun", "dblp": ";;;78/5897-1;08/6724", "google_scholar": ";gN73NTMAAAAJ;https://scholar.google.com/citations?view_op=list_works;6bFNhtwAAAAJ;", "or_profile": "~Xinlin_Peng1;~Ying_Zhou5;~Ben_He1;~Le_Sun1;~Yingfei_Sun1", "aff": "University of Chinese Academy of Sciences;Institution of Software, Chinese Academy of Sciences;University of Chinese Academy of Sciences;Institute of Software, Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_domain": "ucas.ac.cn;iscas.ac.cn;ucas.ac.cn;iscas.ac.cn;ucas.ac.cn", "position": "MS student;PhD student;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\npeng2023hidding,\ntitle={Hidding the Ghostwriters: An Adversarial Evaluation of {AI}-Generated Student Essay Detection},\nauthor={Xinlin Peng and Ying Zhou and Ben He and Le Sun and Yingfei Sun},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=N4VUOeVOfS}\n}", "github": "", "project": "", "reviewers": "WQAB;rqmL;4N4g;9VU8", "site": "https://openreview.net/forum?id=N4VUOeVOfS", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;3;4;4", "excitement": "4;4;4;4", "reproducibility": "5;4;5;3", "correctness": "4;2;4;4", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 4.0, "reproducibility_avg": 4.25, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;;", "aff_unique_index": "0;1;0;1;0", "aff_unique_norm": "University of Chinese Academy of Sciences;Chinese Academy of Sciences", "aff_unique_dep": ";Institution of Software", "aff_unique_url": "http://www.ucas.ac.cn;http://www.ios.ac.cn", "aff_unique_abbr": "UCAS;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "N58BZj5JB7", "title": "Improving Diversity of Demographic Representation in Large Language Models via Collective-Critiques and Self-Voting", "track": "main", "status": "Long Main", "tldr": "", "abstract": "A crucial challenge for generative large language models (LLMs) is diversity: when a user's prompt is under-specified, models may follow implicit assumptions while generating a response, which may result in homogenization of the responses, as well as certain demographic groups being under-represented or even erased from the generated responses. In this paper, we formalize the problem diversity of representation in LLM generations. We present evaluation datasets and propose metrics to measure diversity in generated responses along people and culture axes. We find that LLMs understand the notion of diversity, and that they can reason and critique their own responses for that goal. This finding motivated a new prompting technique called collective-critique and self-voting (CCSV) to self-improve people diversity of LLMs by tapping into its diversity reasoning capabilities, without relying on handcrafted examples or prompt tuning. Extensive empirical experiments with both human and automated evaluations show that our proposed approach is effective at improving people and culture diversity, and outperforms all baseline methods by a large margin.", "keywords": "Large language models;Fairness;Diversity;Language model reasoning", "primary_area": "", "supplementary_material": "", "author": "Preethi Lahoti;Nicholas Blumm;Xiao Ma;Raghavendra Kotikalapudi;Sahitya Potluri;Qijun Tan;Hansa Srinivasan;Ben Packer;Ahmad Beirami;Alex Beutel;Jilin Chen", "authorids": "~Preethi_Lahoti1;~Nicholas_Blumm1;~Xiao_Ma10;~Raghavendra_Kotikalapudi1;~Sahitya_Potluri1;~Qijun_Tan1;~Hansa_Srinivasan1;~Ben_Packer1;~Ahmad_Beirami1;~Alex_Beutel1;~Jilin_Chen1", "gender": ";;;M;F;;Non-Binary;M;M;;", "homepage": ";;https://maxiao.info/;;;;;;https://beirami.github.io/;;", "dblp": ";;35/573-10.html;52/10459;;;;https://dblp.uni-trier.de/pers/hd/p/Packer:Benjamin;41/9367;;50/6953", "google_scholar": ";;xLPxJsYAAAAJ;FW9JRloAAAAJ;https://scholar.google.com/citations?hl=en;Uax0srUAAAAJ;;jzsx52EAAAAJ;VuKWbMMAAAAJ;;", "or_profile": "~Preethi_Lahoti1;~Nicholas_Blumm1;~Xiao_Ma10;~Raghavendra_Kotikalapudi1;~Sahitya_Potluri1;~Qijun_Tan1;~Hansa_Srinivasan1;~Ben_Packer1;~Ahmad_Beirami1;~Alex_Beutel1;~Jilin_Chen1", "aff": ";;Google Research;Google;Google;Google;Google;;Massachusetts Institute of Technology;;Google", "aff_domain": ";;google.com;google.com;google.com;google.com;google.com;;mit.edu;;google.com", "position": ";;Researcher;Researcher;Software Engineer;software engineer;Researcher;;Research Affiliate;;Researcher", "bibtex": "@inproceedings{\nlahoti2023improving,\ntitle={Improving Diversity of Demographic Representation in Large Language Models via Collective-Critiques and Self-Voting},\nauthor={Preethi Lahoti and Nicholas Blumm and Xiao Ma and Raghavendra Kotikalapudi and Sahitya Potluri and Qijun Tan and Hansa Srinivasan and Ben Packer and Ahmad Beirami and Alex Beutel and Jilin Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=N58BZj5JB7}\n}", "github": "", "project": "", "reviewers": "3GqZ;wBnm;18RZ", "site": "https://openreview.net/forum?id=N58BZj5JB7", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;3", "excitement": "3;4;4", "reproducibility": "2;3;3", "correctness": "3;4;3", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 11, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;;;;", "linkedin": ";;;;sahitya-potluri/;;hansa-srinivasan-ba5a9ab2/;ben-packer-aa6a613/;ahmad-beirami-97001962;;", "aff_unique_index": "0;0;0;0;0;1;0", "aff_unique_norm": "Google;Massachusetts Institute of Technology", "aff_unique_dep": "Google Research;", "aff_unique_url": "https://research.google;https://web.mit.edu", "aff_unique_abbr": "Google Research;MIT", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "N6f1iHjWvB", "title": "Automatic Analysis of Substantiation in Scientific Peer Reviews", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "With the increasing amount of problematic peer reviews in top AI conferences, the community is urgently in need of automatic quality control measures. In this paper, we restrict our attention to substantiation --- one popular quality aspect indicating whether the claims in a review are sufficiently supported by evidence --- and provide a solution automatizing this evaluation process. To achieve this goal, we first formulate the problem as claim-evidence pair extraction in scientific peer reviews, and collect SubstanReview, the first annotated dataset for this task. SubstanReview consists of 550 reviews from NLP conferences annotated by domain experts. On the basis of this dataset, we train an argument mining system to automatically analyze the level of substantiation in peer reviews. We also perform data analysis on the SubstanReview dataset to obtain meaningful insights on peer reviewing quality in NLP conferences over recent years. The dataset is available at https://github.com/YanzhuGuo/SubstanReview.", "keywords": "Peer review;Substantiation;Argument mining;Dataset", "primary_area": "", "supplementary_material": "", "author": "Yanzhu Guo;Guokan Shang;Virgile Rennard;Michalis Vazirgiannis;Chlo\u00e9 Clavel", "authorids": "~Yanzhu_Guo1;~Guokan_Shang1;~Virgile_Rennard1;~Michalis_Vazirgiannis1;~Chlo\u00e9_Clavel2", "gender": "F;M;M;M;F", "homepage": ";;;;https://clavel.wp.imt.fr/", "dblp": ";220/3989;;v/MVazirgiannis;50/2768", "google_scholar": "v_fvWzQAAAAJ;EcBibPkAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.gr/citations?user=aWGJYcMAAAAJ;https://scholar.google.fr/citations?user=TAZbfksAAAAJ", "or_profile": "~Yanzhu_Guo1;~Guokan_Shang1;~Virgile_Rennard1;~Michalis_Vazirgiannis1;~Chlo\u00e9_Clavel1", "aff": "\u00c9cole Polytechnique;LINAGORA;\u00c9cole Polytechnique;Ecole Polytechnique, France;T\u00e9l\u00e9com ParisTech", "aff_domain": "polytechnique.fr;linagora.com;polytechnique.fr;polytechnique.fr;telecom-paristech.fr", "position": "PhD student;Researcher;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nguo2023automatic,\ntitle={Automatic Analysis of Substantiation in Scientific Peer Reviews},\nauthor={Yanzhu Guo and Guokan Shang and Virgile Rennard and Michalis Vazirgiannis and Chlo{\\'e} Clavel},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=N6f1iHjWvB}\n}", "github": "", "project": "", "reviewers": "3y4q;7zci;M4ST;JsBA;td1b", "site": "https://openreview.net/forum?id=N6f1iHjWvB", "pdf_size": 0, "rating": "4;4;4;4;4", "confidence": "3;4;2;3;4", "excitement": "2;4;4;4;3", "reproducibility": "5;5;4;4;5", "correctness": "2;4;4;4;4", "rating_avg": 4.0, "confidence_avg": 3.2, "excitement_avg": 3.4, "reproducibility_avg": 4.6, "correctness_avg": 3.6, "replies_avg": 16, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";guokan-shang;;;", "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Ecole Polytechnique;LINAGORA;T\u00e9l\u00e9com ParisTech", "aff_unique_dep": ";;", "aff_unique_url": "https://www.polytechnique.edu;https://www.linagora.com;https://www.telecom-paristech.fr", "aff_unique_abbr": "X;LINAGORA;TP", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "France" }, { "id": "N6sXsHuWDE", "title": "ROME: Evaluating Pre-trained Vision-Language Models on Reasoning beyond Visual Common Sense", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Humans possess a strong capability for reasoning beyond common sense. For example, given an unconventional image of a goldfish laying on the table next to an empty fishbowl, a human would effortlessly determine that the fish is not inside the fishbowl. The case, however, may be different for a vision-language model, whose reasoning could gravitate towards the common scenario that the fish is inside the bowl, despite the visual input. In this paper, we introduce a novel probing dataset named ROME (reasoning beyond commonsense knowledge) to evaluate whether the state-of-the-art pre-trained vision-language models have the reasoning capability to correctly interpret counter-intuitive content. ROME contains images that defy commonsense knowledge with regards to color, shape, material, size and positional relation. Experiments on the state-of-the-art pre-trained vision-language models reveal that most of these models are still largely incapable of interpreting counter-intuitive scenarios. We hope that ROME will spur further investigations on reasoning beyond commonsense knowledge in vision-language research.", "keywords": "commonsense reasoning;multimodality;pre-trained vision-language models", "primary_area": "", "supplementary_material": "", "author": "Kankan Zhou;Eason Lai;Wei Bin Au Yeong;Kyriakos Mouratidis;Jing Jiang", "authorids": "~Kankan_Zhou1;~Eason_Lai1;~Wei_Bin_Au_Yeong1;~Kyriakos_Mouratidis1;~Jing_Jiang1", "gender": "M;;M;M;F", "homepage": "https://scholar.google.com/citations?user=dT0UZhIAAAAJ&hl=en&oi=ao;;;http://www.mysmu.edu/faculty/kyriakos/;http://www.mysmu.edu/faculty/jingjiang/", "dblp": ";;;m/KyriakosMouratidis;68/1974-1", "google_scholar": ";;BSK7NF0AAAAJ;https://scholar.google.com.sg/citations?user=vq5oTnEAAAAJ;https://scholar.google.com.sg/citations?user=hVTK2YwAAAAJ", "or_profile": "~Kankan_Zhou1;~Eason_Lai1;~Wei_Bin_Au_Yeong1;~Kyriakos_Mouratidis1;~Jing_Jiang1", "aff": "Singapore Management University;Singapore Management University;Singapore Management University;Singapore Management University;Singapore Management University", "aff_domain": "smu.edu.sg;smu.edu.sg;smu.edu.sg;smu.edu.sg;smu.edu.sg", "position": "PhD student;Researcher;Undergrad student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhou2023rome,\ntitle={{ROME}: Evaluating Pre-trained Vision-Language Models on Reasoning beyond Visual Common Sense},\nauthor={Kankan Zhou and Eason Lai and Wei Bin Au Yeong and Kyriakos Mouratidis and Jing Jiang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=N6sXsHuWDE}\n}", "github": "", "project": "", "reviewers": "8GUZ;2QPL;EaB5", "site": "https://openreview.net/forum?id=N6sXsHuWDE", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "4;3;4", "reproducibility": "3;3;5", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0009-0000-9536-682X;;0000-0002-3035-0074", "linkedin": ";;weibinauyeong/;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Singapore Management University", "aff_unique_dep": "", "aff_unique_url": "https://www.smu.edu.sg", "aff_unique_abbr": "SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Singapore" }, { "id": "N7R2emgl67", "title": "Learning to Rank Context for Named Entity Recognition Using a Synthetic Dataset", "track": "main", "status": "Long Main", "tldr": "", "abstract": "While recent pre-trained transformer-based models can perform named entity recognition (NER) with great accuracy, their limited range remains an issue when applied to long documents such as whole novels. To alleviate this issue, a solution is to retrieve relevant context at the document level. Unfortunately, the lack of supervision for such a task means one has to settle for unsupervised approaches. Instead, we propose to generate a synthetic context retrieval training dataset using Alpaca, an instruction-tuned large language model (LLM). Using this dataset, we train a neural context retriever based on a BERT model that is able to find relevant context for NER. We show that our method outperforms several retrieval baselines for the NER task on an English literary dataset composed of the first chapter of 40 books.", "keywords": "ner;transformers;context retrieval", "primary_area": "", "supplementary_material": "", "author": "Arthur Amalvy;Vincent Labatut;Richard Dufour", "authorids": "~Arthur_Amalvy1;~Vincent_Labatut1;~Richard_Dufour1", "gender": "M;M;M", "homepage": "https://cv.hal.science/aamalvy;https://cv.archives-ouvertes.fr/vlabatut;https://cv.hal.science/richard-dufour/", "dblp": ";10/6591;83/7858", "google_scholar": "yHOkn78AAAAJ;I8GnHekAAAAJ;_DdcidkAAAAJ", "or_profile": "~Arthur_Amalvy1;~Vincent_Labatut1;~Richard_Dufour1", "aff": "Universit\u00e9 d'Avignon;Universit\u00e9 d'Avignon;Nantes University", "aff_domain": "univ-avignon.fr;univ-avignon.fr;univ-nantes.fr", "position": "PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\namalvy2023learning,\ntitle={Learning to Rank Context for Named Entity Recognition Using a Synthetic Dataset},\nauthor={Arthur Amalvy and Vincent Labatut and Richard Dufour},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=N7R2emgl67}\n}", "github": "", "project": "", "reviewers": "NDUN;SivA;1ozW;YYw4", "site": "https://openreview.net/forum?id=N7R2emgl67", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;4;3", "excitement": "3;3;3;3", "reproducibility": "4;4;4;2", "correctness": "4;3;4;2", "rating_avg": 3.0, "confidence_avg": 3.75, "excitement_avg": 3.0, "reproducibility_avg": 3.5, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-4629-0923;0000-0002-2619-2835;0000-0003-1203-9108", "linkedin": ";vincent-labatut/;richarddufour/", "aff_unique_index": "0;0;1", "aff_unique_norm": "Universit\u00e9 d'Avignon;Nantes University", "aff_unique_dep": ";", "aff_unique_url": "https://www.univ-avignon.fr;https://www.univ-nantes.fr", "aff_unique_abbr": "UdA;UN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "id": "N8TTwaIBId", "title": "CCEval: A Representative Evaluation Benchmark for the Chinese-centric Multilingual Machine Translation", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "The Chinese-centric Multilingual Machine Translation (MMT) has gained more importance recently due to increasing demands from international business development and cross-cultural exchanges. \nHowever, an important factor that limits the progress of this area is the lack of highly representative and high-quality evaluation benchmarks. \nTo fill this gap, we propose CCEval, an impartial and representative Chinese-centric MMT evaluation dataset. \nThis benchmark dataset consists of 2500 Chinese sentences we meticulously selected and processed, and covers more diverse linguistic features as compared to other MMT evaluation benchmarks.\nThese sentences have been translated into 11 languages of various resource levels by professional translators via a rigorously controlled process pipeline to ensure their high quality.\nWe conduct experiments to demonstrate our sampling methodology's effectiveness in constructing evaluation datasets strongly correlated with human evaluations.\nThe resulting dataset enables better assessments of the Chinese-centric MMT quality.\nOur CCEval benchmark dataset is available at https://bright.pcl.ac.cn/en/offlineTasks.", "keywords": "Multilingual Machine Translation;Low-resource Languages;Evaluation Benchmark;Evaluation Dataset;Chinese-centric;Translation Evaluation;Test Set", "primary_area": "", "supplementary_material": "", "author": "Lianzhang Lou;Xi Yin;Yutao Xie;Yang Xiang", "authorids": "~Lianzhang_Lou1;~Xi_Yin5;~Yutao_Xie1;~Yang_Xiang4", "gender": ";M;M;M", "homepage": ";https://www.linkedin.com/in/yutao-xie-414680/;;https://github.com/loulianzhang", "dblp": ";;50/2192-3;", "google_scholar": ";;zDyL-NoAAAAJ;", "or_profile": "~Xi_Yin5;~Yutao_Xie1;~Yang_Xiang4;~Lou_Lianzhang1", "aff": "International Digital Economy Academy;International Digital Economy Academy;Peng Cheng Laboratory;Pengcheng Laboratory", "aff_domain": "idea.edu.cn;idea.edu.cn;pcl.ac;pcl.ac.cn", "position": "Principal Researcher;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nlou2023cceval,\ntitle={{CCE}val: A Representative Evaluation Benchmark for the Chinese-centric Multilingual Machine Translation},\nauthor={Lianzhang Lou and Xi Yin and Yutao Xie and Yang Xiang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=N8TTwaIBId}\n}", "github": "", "project": "", "reviewers": "Ucq3;fSg8;DEN2", "site": "https://openreview.net/forum?id=N8TTwaIBId", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;3", "excitement": "3;2;3", "reproducibility": "3;0;0", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 1.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-1395-6805;", "linkedin": "xiyin/;;yang-xiang-7554b6195/;", "aff_unique_index": "0;0;1;1", "aff_unique_norm": "International Digital Economy Academy;Pengcheng Laboratory", "aff_unique_dep": ";Peng Cheng Laboratory", "aff_unique_url": ";http://www.pcl.ac.cn", "aff_unique_abbr": ";PCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1", "aff_country_unique": ";China" }, { "id": "N8nQjYuyhO", "title": "Using Artificial French Data to Understand the Emergence of Gender Bias in Transformer Language Models", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Numerous studies have demonstrated the ability of neural language models to learn various linguistic properties without direct supervision. This work takes an initial step towards exploring the less researched topic of how neural models discover linguistic properties of words, such as gender, as well as the rules governing their usage. We propose to use an artificial corpus generated by a PCFG based on French to precisely control the gender distribution in the training data and determine under which conditions a model correctly captures gender information or, on the contrary, appears gender-biased.", "keywords": "Language Model;Transformer;LM Analysis;Gender Bias", "primary_area": "", "supplementary_material": "", "author": "Lina Conti;Guillaume Wisniewski", "authorids": "~Lina_Conti1;~Guillaume_Wisniewski2", "gender": ";Not Specified", "homepage": ";https://pages.llf-paris.fr/~gwisniewski/", "dblp": ";53/336.html", "google_scholar": ";knVG9GIAAAAJ", "or_profile": "~Lina_Conti1;~Guillaume_Wisniewski2", "aff": ";LLF / Universit\u00e9 Paris Cit\u00e9", "aff_domain": ";u-paris.fr", "position": ";Assistant Professor", "bibtex": "@inproceedings{\nconti2023using,\ntitle={Using Artificial French Data to Understand the Emergence of Gender Bias in Transformer Language Models},\nauthor={Lina Conti and Guillaume Wisniewski},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=N8nQjYuyhO}\n}", "github": "", "project": "", "reviewers": "B4CF;WdXg;bPtj", "site": "https://openreview.net/forum?id=N8nQjYuyhO", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;2;4", "reproducibility": "4;3;3", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-4445-080X", "linkedin": ";", "aff_unique_index": "0", "aff_unique_norm": "Universit\u00e9 Paris Cit\u00e9", "aff_unique_dep": "LLF", "aff_unique_url": "https://www.univ-paris.fr", "aff_unique_abbr": "UPC", "aff_country_unique_index": "0", "aff_country_unique": "France" }, { "id": "N924k3YM8V", "title": "The ACL OCL Corpus: Advancing Open Science in Computational Linguistics", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We present ACL OCL, a scholarly corpus derived from the ACL Anthology to assist Open scientific research in the Computational Linguistics domain. Integrating and enhancing the previous versions of the ACL Anthology, the ACL OCL contributes metadata, PDF files, citation graphs and additional structured full texts with sections, figures, and links to a large knowledge resource (Semantic Scholar). The ACL OCL spans seven decades, containing 73K papers, alongside 210K figures. \n\nWe spotlight how ACL OCL applies to observe trends in computational linguistics. By detecting paper topics with a supervised neural model, we note that interest in \u201cSyntax: Tagging, Chunking and Parsing\u201d is waning and \u201cNatural Language Generation\u201d is resurging. Our dataset is available from HuggingFace (https://huggingface.co/datasets/WINGNUS/ACL-OCL).", "keywords": "scholarly corpus;computational linguistics;topic trend analysis;acl anthology", "primary_area": "", "supplementary_material": "", "author": "Shaurya Rohatgi;Yanxia Qin;Benjamin Aw;Niranjana Anand Unnithan;Min-Yen Kan", "authorids": "~Shaurya_Rohatgi1;~Yanxia_Qin1;~Benjamin_Aw1;~Niranjana_Anand_Unnithan1;~Min-Yen_Kan1", "gender": "M;F;M;;M", "homepage": ";https://sites.google.com/site/qolina/;;;https://www.comp.nus.edu.sg/~kanmy/", "dblp": ";40/10134.html;;;k/MinYenKan", "google_scholar": "UpHQFasAAAAJ;;;;https://scholar.google.com.tw/citations?user=aNVcd3EAAAAJ", "or_profile": "~Shaurya_Rohatgi1;~Yanxia_Qin1;~Benjamin_Aw1;~Niranjana_Anand_Unnithan1;~Min-Yen_Kan1", "aff": ";National University of Singapore;National University of Singapore;Kansas State University;National University of Singapore", "aff_domain": ";nus.edu.sg;u.nus.edu;ksu.edu;nus.edu.sg", "position": ";Postdoc;MS student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nrohatgi2023the,\ntitle={The {ACL} {OCL} Corpus: Advancing Open Science in Computational Linguistics},\nauthor={Shaurya Rohatgi and Yanxia Qin and Benjamin Aw and Niranjana Anand Unnithan and Min-Yen Kan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=N924k3YM8V}\n}", "github": "", "project": "", "reviewers": "XBuc;v8uW;s1m2;q3Sr", "site": "https://openreview.net/forum?id=N924k3YM8V", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "4;4;3;4", "excitement": "3;4;3;4", "reproducibility": "3;5;2;4", "correctness": "3;4;3;4", "rating_avg": 5.0, "confidence_avg": 3.75, "excitement_avg": 3.5, "reproducibility_avg": 3.5, "correctness_avg": 3.5, "replies_avg": 13, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-6186-1651;;;", "linkedin": ";;benjamin-aw-25406b12b/;niranjanaunnithan/;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "National University of Singapore;Kansas State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.k-state.edu", "aff_unique_abbr": "NUS;K-State", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Singapore;United States" }, { "id": "NAmRjAIMkz", "title": "Pointwise Mutual Information Based Metric and Decoding Strategy for Faithful Generation in Document Grounded Dialogs", "track": "main", "status": "Long Main", "tldr": "", "abstract": "A major concern in using deep learning based generative models for document-grounded dialogs is the potential generation of responses that are not faithful to the underlying document. Existing automated metrics used for evaluating the faithfulness of response with respect to the grounding document measure the degree of similarity between the generated response and the document's content. However, these automated metrics are far from being well aligned with human judgments. Therefore, to improve the measurement of faithfulness, we propose a new metric that utilizes (Conditional) Point-wise Mutual Information (PMI) between the generated response and the source document, conditioned on the dialogue. PMI quantifies the extent to which the document influences the generated response -- with a higher PMI indicating a more faithful response. We build upon this idea to create a new decoding technique that incorporates PMI into the response generation process to predict more faithful responses. Our experiments on the BEGIN benchmark demonstrate an improved correlation of our metric with human evaluation. We also show that our decoding technique is effective in generating more faithful responses when compared to standard decoding techniques on a set of publicly available document-grounded dialog datasets.", "keywords": "docuemnt grounded dialogs;dialog response generation;faithful response generation", "primary_area": "", "supplementary_material": "", "author": "Yatin Nandwani;Vineet Kumar;Dinesh Raghu;Sachindra Joshi;Luis A. Lastras", "authorids": "~Yatin_Nandwani1;~Vineet_Kumar3;~Dinesh_Raghu1;~Sachindra_Joshi1;~Luis_A._Lastras1", "gender": "M;M;;M;M", "homepage": "http://www.cse.iitd.ac.in/~yatin;;https://dineshraghu.github.io/;https://researcher.watson.ibm.com/researcher/view.php?person=in-jsachind;", "dblp": "255/7046;;72/11205;96/2418;160/9934", "google_scholar": "https://scholar.google.com/citations?hl=en;hR82_KAAAAAJ;https://scholar.google.co.in/citations?user=kphcPUkAAAAJ;https://scholar.google.co.in/citations?user=aRo6uNEAAAAJ;IxBeLg8AAAAJ", "or_profile": "~Yatin_Nandwani1;~Vineet_Kumar3;~Dinesh_Raghu1;~Sachindra_Joshi1;~Luis_A._Lastras1", "aff": "Indian Institute of Technology Delhi;International Business Machines;Indian Institute of Technology Delhi;;International Business Machines", "aff_domain": "iitd.ac.in;ibm.com;iitd.ac.in;;ibm.com", "position": "PhD student;Senior Research Software Engineer;PhD student;;Director", "bibtex": "@inproceedings{\nnandwani2023pointwise,\ntitle={Pointwise Mutual Information Based Metric and Decoding Strategy for Faithful Generation in Document Grounded Dialogs},\nauthor={Yatin Nandwani and Vineet Kumar and Dinesh Raghu and Sachindra Joshi and Luis A. Lastras},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=NAmRjAIMkz}\n}", "github": "", "project": "", "reviewers": "FY11;RmAJ;AFXM", "site": "https://openreview.net/forum?id=NAmRjAIMkz", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "4;3;4", "reproducibility": "4;4;2", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "yatin-nandwani-0804ba9/;;;;", "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Indian Institute of Technology Delhi;International Business Machines Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.iitd.ac.in;https://www.ibm.com", "aff_unique_abbr": "IIT Delhi;IBM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Delhi;", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "India;United States" }, { "id": "NBH3x0u5oQ", "title": "MixEdit: Revisiting Data Augmentation and Beyond for Grammatical Error Correction", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Data Augmentation through generating pseudo data has been proven effective in mitigating the challenge of data scarcity in the field of Grammatical Error Correction (GEC). Various augmentation strategies have been widely explored, most of which are motivated by two heuristics, i.e., increasing the distribution similarity and diversity of pseudo data. However, the underlying mechanism responsible for the effectiveness of these strategies remains poorly understood. In this paper, we aim to clarify how data augmentation improves GEC models.\nTo this end, we introduce two interpretable and computationally efficient measures: Affinity and Diversity. Our findings indicate that an excellent GEC data augmentation strategy characterized by high Affinity and appropriate Diversity can better improve the performance of GEC models. Based on this observation, we propose MixEdit, a data augmentation approach that strategically and dynamically augments realistic data, without requiring extra monolingual corpora. To verify the correctness of our findings and the effectiveness of the proposed MixEdit, we conduct experiments on mainstream English and Chinese GEC datasets. The results show that MixEdit substantially improves GEC models and is complementary to traditional data augmentation methods. All the source codes of MixEdit are released at https://github.com/THUKElab/MixEdit.", "keywords": "natural language processing;grammatical error correction;data augmentation", "primary_area": "", "supplementary_material": "", "author": "Jingheng Ye;Yinghui Li;Yangning Li;Hai-Tao Zheng", "authorids": "~Jingheng_Ye1;~Yinghui_Li1;~Yangning_Li1;~Hai-Tao_Zheng2", "gender": "M;M;M;M", "homepage": "https://github.com/yejh123;https://github.com/geekjuruo;https://github.com/HUSTLyn;https://www.sigs.tsinghua.edu.cn/fg3/105069.jhtml", "dblp": "331/8438.html;243/8822.html;315/0403;20/134-2", "google_scholar": "Zm_L_EUAAAAJ;xTM9pKsAAAAJ;https://scholar.google.com.hk/citations?user=BmX7lQkAAAAJ;https://scholar.google.com.hk/citations?user=7VPeORoAAAAJ", "or_profile": "~Jingheng_Ye1;~Yinghui_Li1;~Yangning_Li1;~Hai-Tao_Zheng2", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "mail.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nye2023mixedit,\ntitle={MixEdit: Revisiting Data Augmentation and Beyond for Grammatical Error Correction},\nauthor={Jingheng Ye and Yinghui Li and Yangning Li and Hai-Tao Zheng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=NBH3x0u5oQ}\n}", "github": "", "project": "", "reviewers": "RBnA;g98u;ytRu", "site": "https://openreview.net/forum?id=NBH3x0u5oQ", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;3", "excitement": "3;3;4", "reproducibility": "2;3;3", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0005-9366-4985;;;0000-0001-5128-5649", "linkedin": ";;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "NHeAUKlTO8", "title": "PartialFormer: Modeling Part Instead of Whole for Machine Translation", "track": "main", "status": "Reject", "tldr": "", "abstract": "The parameter redundancy problem in Transformer models has been widely acknowledged in the literature. To address this weakness, we introduce PartialFormer, a parameter-efficient Transformer architecture for machine translation. Compared to previous parameter-efficient Transformer architecture, PartialFormer modifies the modeling strategy of the feed-forward network to allow it to spare tremendous parameters while maintaining large hidden dimension. Additionally, PartialFormer applies two efficient scaling strategies, namely depth scaling and width scaling, to improve performance within a given parameter budget. To efficiently benefit from these scaling strategies, PartialFormer is further enhanced by two cost-effective modifications: 1) a head scaling strategy for efficient width scaling and 2) a residual-like attention calculation for better depth scaling. Extensive experiments on 9 translation tasks validate the effectiveness of our PartialFormer approach.", "keywords": "Lightweight Transformer;", "primary_area": "", "supplementary_material": "", "author": "Tong Zheng;Huiwen Bao;Bei Li;Weiqiao Shan;Tong Xiao;JingBo Zhu", "authorids": "~Tong_Zheng1;~Huiwen_Bao1;~Bei_Li1;~Weiqiao_Shan1;~Tong_Xiao4;~JingBo_Zhu2", "gender": "M;M;M;M;;F", "homepage": "https://kidzheng.github.io/;https://libeineu.github.io/;;https://www.nlplab.com/members/xiaotong.html;https://dblp.org/pid/73/2129.html;", "dblp": ";;;05/5091;;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;wzbJ5EIAAAAJ;https://scholar.google.co.in/citations?user=zsXzXD4AAAAJ;-fov7zkAAAAJ;;3PnRUyQAAAAJ", "or_profile": "~Tong_Zheng1;~Bei_Li1;~Weiqiao_Shan1;~Tong_Xiao4;~JingBo_Zhu2;~bao_huiwen1", "aff": ";Northeastern University;Northeastern University;Northeastern University;Northeastern University;Northeastern University", "aff_domain": ";neu.edu.cn;neu.edu.cn;mail.neu.edu.cn;mail.neu.edu.cn;neu.edu.cn", "position": ";PhD student;PhD student;Full Professor;Full Professor;Intern", "bibtex": "@misc{\nzheng2023partialformer,\ntitle={PartialFormer: Modeling Part Instead of Whole for Machine Translation},\nauthor={Tong Zheng and Huiwen Bao and Bei Li and Weiqiao Shan and Tong Xiao and JingBo Zhu},\nyear={2023},\nurl={https://openreview.net/forum?id=NHeAUKlTO8}\n}", "github": "", "project": "", "reviewers": "kL8C;BqHF;1vsa", "site": "https://openreview.net/forum?id=NHeAUKlTO8", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;3;3", "reproducibility": "4;3;3", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-3472-4387;;;;;", "linkedin": ";;;tong-xiao-168bb081/;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Northeastern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northeastern.edu", "aff_unique_abbr": "NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "NMMRH80gha", "title": "Simple and Effective Input Reformulations for Translation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Foundation language models learn from their finetuning input context in different ways. In this paper, we reformulate inputs during finetuning for challenging translation tasks, leveraging model strengths from pretraining in novel ways to improve downstream performance. These reformulations are simple data level modifications, require no additional collection of training data or modification of data at inference time. They can be applied either on single language pair translation tasks or massively multilingual translation tasks. Experiments with these techniques demonstrate significant performance improvements up to \\textbf{3.5 chrF++ on the Flores200 translation benchmark}. We hope our research accessibly improves finetuning data efficiency, enabling more effective training to scalably improve state-of-the-art performance. Our code is released \\href{https://github.com/bri25yu/LanguageModelExperimentation}{here}.", "keywords": "natural language processing;data efficiency;input reformulations;multilingual;translation;foundation language models;finetuning;machine learning", "primary_area": "", "supplementary_material": "", "author": "Brian Yu;Hansen Lillemark;Kurt Keutzer", "authorids": "~Brian_Yu1;~Hansen_Lillemark1;~Kurt_Keutzer1", "gender": "M;M;M", "homepage": "http://bri25yu.github.io/;https://hlillemark.github.io;https://people.eecs.berkeley.edu/~keutzer/", "dblp": ";;k/KurtKeutzer.html", "google_scholar": ";X_mcf_EAAAAJ;ID9QePIAAAAJ", "or_profile": "~Brian_Yu1;~Hansen_Lillemark1;~Kurt_Keutzer1", "aff": "University of California, Berkeley;;University of California, Berkeley", "aff_domain": "berkeley.edu;;berkeley.edu", "position": "MS student;;Full Professor", "bibtex": "@inproceedings{\nyu2023simple,\ntitle={Simple and Effective Input Reformulations for Translation},\nauthor={Brian Yu and Hansen Lillemark and Kurt Keutzer},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=NMMRH80gha}\n}", "github": "", "project": "", "reviewers": "Mb7f;V5ez;wsZB", "site": "https://openreview.net/forum?id=NMMRH80gha", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;4", "excitement": "2;3;4", "reproducibility": "3;3;4", "correctness": "2;3;3", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 2.6666666666666665, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-3868-8501", "linkedin": "bri25yu/;hansenlillemark/;kurtkeutzer/", "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "NMMnxhQm01", "title": "The Distributional Hypothesis Does Not Fully Explain the Benefits of Masked Language Model Pretraining", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We analyze the masked language modeling pretraining objective function from the perspective of the Distributional Hypothesis.\nWe investigate whether the better sample efficiency and\nthe better generalization capability of models\npretrained with masked language modeling can\nbe attributed to the semantic similarity encoded in the pretraining data's distributional property.\nVia a synthetic dataset, our analysis suggests that distributional property indeed leads to the better sample efficiency of pretrained masked language models, but \ndoes not fully explain the generalization capability.\nWe also conduct an analysis over two real-world datasets \nand demonstrate that the distributional property does not explain the generalization ability\nof pretrained natural language models either.\nOur results illustrate our limited understanding of model pretraining and provide future research directions.", "keywords": "Distributional Hypothesis;MLM;Pretraining", "primary_area": "", "supplementary_material": "", "author": "Ting-Rui Chiang;Dani Yogatama", "authorids": "~Ting-Rui_Chiang1;~Dani_Yogatama2", "gender": "Not Specified;", "homepage": "https://ctinray.github.io/;", "dblp": "230/3609;08/8178", "google_scholar": "aIgoIxwAAAAJ;", "or_profile": "~Ting-Rui_Chiang1;~Dani_Yogatama1", "aff": "University of Southern California;Google DeepMind", "aff_domain": "usc.edu;google.com", "position": "PhD student;Research Scientist", "bibtex": "@inproceedings{\nchiang2023the,\ntitle={The Distributional Hypothesis Does Not Fully Explain the Benefits of Masked Language Model Pretraining},\nauthor={Ting-Rui Chiang and Dani Yogatama},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=NMMnxhQm01}\n}", "github": "", "project": "", "reviewers": "GJHm;ggfY;wjUa;GgVT", "site": "https://openreview.net/forum?id=NMMnxhQm01", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "2;4;4;3", "excitement": "3;4;3;4", "reproducibility": "3;4;3;4", "correctness": "2;4;3;4", "rating_avg": 4.0, "confidence_avg": 3.25, "excitement_avg": 3.5, "reproducibility_avg": 3.5, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;1", "aff_unique_norm": "University of Southern California;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.usc.edu;https://deepmind.com", "aff_unique_abbr": "USC;DeepMind", "aff_campus_unique_index": "0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "NO5dc8Ljvj", "title": "C2D2 Dataset: A Resource for the Cognitive Distortion Analysis and Its Impact on Mental Health", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Cognitive distortions refer to patterns of irrational thinking that can lead to distorted perceptions of reality and mental health problems in individuals. Despite previous attempts to detect cognitive distortion through language, progress has been slow due to the lack of appropriate data. In this paper, we present the C2D2 dataset, the first expert-supervised \\textbf{C}hinese \\textbf{C}ognitive \\textbf{D}istortion \\textbf{D}ataset, which contains 7,500 cognitive distortion thoughts in everyday life scenes. Additionally, we examine the presence of cognitive distortions in social media texts shared by individuals diagnosed with mental disorders, providing insights into the association between cognitive distortions and mental health conditions. We propose that incorporating information about users' cognitive distortions can enhance the performance of existing models mental disorder detection. We contribute to a better understanding of how cognitive distortions appear in individuals' language and their impact on mental health.", "keywords": "cognitive distortion;mental health;text analysis", "primary_area": "", "supplementary_material": "", "author": "BiChen Wang;PengFei Deng;Yanyan Zhao;Bing Qin", "authorids": "~BiChen_Wang1;~PengFei_Deng2;~Yanyan_Zhao1;~Bing_Qin2", "gender": "Non-Binary;M;F;", "homepage": "http://ir.hit.edu.cn/~wangbichen/;https://github.com/wojiaodpf;http://ir.hit.edu.cn/~yanyan/;http://ir.hit.edu.cn/~qinb", "dblp": "358/3330;;;86/5934.html", "google_scholar": ";;mEdfAYoAAAAJ;LKnCub0AAAAJ", "or_profile": "~BiChen_Wang1;~PengFei_Deng2;~Yanyan_Zhao1;~Bing_Qin2", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology", "aff_domain": "hit.edu;hit.edu.cn;hit.edu.cn;hit.edu.cn", "position": "PhD student;MS student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwang2023cd,\ntitle={C2D2 Dataset: A Resource for the Cognitive Distortion Analysis and Its Impact on Mental Health},\nauthor={BiChen Wang and PengFei Deng and Yanyan Zhao and Bing Qin},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=NO5dc8Ljvj}\n}", "github": "", "project": "", "reviewers": "JxAP;jS9Q;DcU8;8gtz", "site": "https://openreview.net/forum?id=NO5dc8Ljvj", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;4;3", "excitement": "3;4;3;4", "reproducibility": "4;4;4;3", "correctness": "3;4;4;4", "rating_avg": 3.0, "confidence_avg": 3.75, "excitement_avg": 3.5, "reproducibility_avg": 3.75, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-6011-2125;;;0000-0002-2543-5604", "linkedin": ";;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Harbin Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hit.edu.cn/", "aff_unique_abbr": "HIT", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Harbin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "NPJznfA7ZC", "title": "Demystifying Prompts in Language Models via Perplexity Estimation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Language models can be prompted to perform a wide variety of tasks with zero- and few-shot in-context learning.\nHowever, performance varies significantly with the choice of prompt, and we do not yet understand why this happens.\nIn this paper, we analyze the factors that contribute to this variance and establish a new empirical hypothesis: the performance of a prompt is predicted by the extent to which the model is familiar with the language it contains. Over a wide range of tasks, we show that the lower the perplexity of the prompt, the better it is able to perform the task, when considering reasonable prompts that are related to it. As part of our analysis, we also devise a method to automatically extend a small seed set of manually written prompts by paraphrasing with GPT3 and backtranslation. This larger set allows us to verify that perplexity is a strong predictor of the success of a prompt and we show that the lowest perplexity prompts are consistently effective.", "keywords": "LLM;perplexity;prompts", "primary_area": "", "supplementary_material": "", "author": "Hila Gonen;Srini Iyer;Terra Blevins;Noah A. Smith;Luke Zettlemoyer", "authorids": "~Hila_Gonen1;~Srini_Iyer1;~Terra_Blevins1;~Noah_A._Smith2;~Luke_Zettlemoyer1", "gender": ";M;F;M;M", "homepage": "https://gonenhila.github.io/;http://sriniiyer.github.io;https://blvns.github.io;https://www.cs.washington.edu/people/faculty/lsz/;https://homes.cs.washington.edu/~nasmith/", "dblp": "167/5312;78/4928.html;184/3734;21/6793;90/5204.html", "google_scholar": "URThmtMAAAAJ;jNjde2wAAAAJ;;https://scholar.google.com.tw/citations?user=UjpbO6IAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Hila_Gonen1;~Srini_Iyer1;~Terra_Blevins1;~Luke_Zettlemoyer1;~Noah_Smith1", "aff": "Meta Facebook;Meta Facebook;University of Washington;Meta;Allen Institute for Artificial Intelligence", "aff_domain": "facebook.com;meta.com;washington.edu;meta.com;allenai.org", "position": "Postdoc;Principal Researcher;PhD student;Researcher;Senior Director of NLP Research", "bibtex": "@inproceedings{\ngonen2023demystifying,\ntitle={Demystifying Prompts in Language Models via Perplexity Estimation},\nauthor={Hila Gonen and Srini Iyer and Terra Blevins and Noah A. Smith and Luke Zettlemoyer},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=NPJznfA7ZC}\n}", "github": "", "project": "", "reviewers": "nmvT;feBz;ZmV4", "site": "https://openreview.net/forum?id=NPJznfA7ZC", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "4;3;4", "reproducibility": "4;4;5", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-2310-6380", "linkedin": ";;;luke-zettlemoyer-a0109b226/;", "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Meta;University of Washington;Allen Institute for Artificial Intelligence", "aff_unique_dep": "Meta Platforms, Inc.;;", "aff_unique_url": "https://meta.com;https://www.washington.edu;https://allenai.org", "aff_unique_abbr": "Meta;UW;AI2", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "NPkkvrv2Vp", "title": "Who is Speaking? Speaker-Aware Multiparty Dialogue Act Classification", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Utterances do not occur in isolation in dialogues; it is essential to have the information of who the speaker of an utterance is to be able to recover the speaker\u2019s intention with respect to the surrounding context. Beyond simply capturing speaker switches, identifying how speakers interact with each other in a dialogue is crucial to understanding conversational flow. This becomes increasingly important and simultaneously difficult to model when more than two interlocutors take part in a conversation. To overcome this challenge, we propose to explicitly add speaker awareness to each utterance representation. To that end, we use a graph neural network to model how each speaker is behaving within the local context of a conversation. The speaker representations learned this way are then used to update their respective utterance representations. We experiment with both multiparticipant and dyadic conversations on the MRDA and SwDA datasets and show the effectiveness of our approach.", "keywords": "Dialogue acts;multiparty dialogues;speaker modeling", "primary_area": "", "supplementary_material": "", "author": "Ayesha Qamar;Adarsh Pyarelal;Ruihong Huang", "authorids": "~Ayesha_Qamar1;~Adarsh_Pyarelal1;~Ruihong_Huang1", "gender": "F;M;F", "homepage": ";https://adarsh.cc;https://people.engr.tamu.edu/huangrh/index.html", "dblp": "273/6982;242/7424;42/4811.html", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=NU2aHWUAAAAJ", "or_profile": "~Ayesha_Qamar1;~Adarsh_Pyarelal1;~Ruihong_Huang1", "aff": "Texas A&M University - College Station;University of Arizona;Texas A&M University", "aff_domain": "tamu.edu;arizona.edu;cse.tamu.edu", "position": "PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nqamar2023who,\ntitle={Who is Speaking? Speaker-Aware Multiparty Dialogue Act Classification},\nauthor={Ayesha Qamar and Adarsh Pyarelal and Ruihong Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=NPkkvrv2Vp}\n}", "github": "", "project": "", "reviewers": "tXBt;pYXY;JZjf;U5G3", "site": "https://openreview.net/forum?id=NPkkvrv2Vp", "pdf_size": 0, "rating": "2;2;2;2", "confidence": "3;4;3;4", "excitement": "3;2;4;2", "reproducibility": "2;3;4;4", "correctness": "3;2;3;2", "rating_avg": 2.0, "confidence_avg": 3.5, "excitement_avg": 2.75, "reproducibility_avg": 3.25, "correctness_avg": 2.5, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-1602-0386;", "linkedin": "ayesha-qamar-559556197/;adarshpyarelal/;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Texas A&M University;University of Arizona", "aff_unique_dep": ";", "aff_unique_url": "https://www.tamu.edu;https://www.arizona.edu", "aff_unique_abbr": "TAMU;UA", "aff_campus_unique_index": "0", "aff_campus_unique": "College Station;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "NT4ehxCifo", "title": "Large Language Models Meet Open-World Intent Discovery and Recognition: An Evaluation of ChatGPT", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The tasks of out-of-domain (OOD) intent discovery and generalized intent discovery (GID) aim to extend a closed intent classifier to open-world intent sets, which is crucial to task-oriented dialogue (TOD) systems. Previous methods address them by fine-tuning discriminative models. Recently, although some studies has been exploring the application of large language models (LLMs) represented by ChatGPT to various downstream tasks, it is still unclear for the ability of ChatGPT to discover and incrementally extent OOD intents. In this paper, we comprehensively evaluate ChatGPT on OOD intent discovery and GID, and then outline the strengths and weaknesses of ChatGPT. Overall, ChatGPT exhibits consistent advantages under zero-shot settings, but is still at a disadvantage compared to fine-tuned models. More deeply, through a series of analytical experiments, we summarize and discuss the challenges faced by LLMs including clustering, domain-specific understanding, and cross-domain in-context learning scenarios. Finally, we provide empirical guidance for future directions to address these challenges.", "keywords": "Large Language Models;Evaluation;Out of Domain;Intent Recognition", "primary_area": "", "supplementary_material": "", "author": "Xiaoshuai Song;Keqing He;Pei Wang;Guanting Dong;Yutao Mou;Jingang Wang;Yunsen Xian;Xunliang Cai;Weiran Xu", "authorids": "~Xiaoshuai_Song1;~Keqing_He1;~Pei_Wang12;~Guanting_Dong1;~Yutao_Mou1;~Jingang_Wang1;~Yunsen_Xian1;~Xunliang_Cai1;~Weiran_Xu1", "gender": "M;;;M;;M;;M;M", "homepage": ";https://helicqin.github.io/about/index.html;;https://dongguanting.github.io/;;https://sites.google.com/site/bitwjg/;http://faculty.dlut.edu.cn/GuoHe/en/xsxx/791119/content/132173.htm;https://maimai.cn/contact/share/card?u=fudmdwckxlwi;", "dblp": "45/9576;79/2314;;;;59/7807;;;41/5448", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;811USNoAAAAJ;;amozZDkAAAAJ;;janU39IAAAAJ;;;https://scholar.google.com/citations?view_op=list_works", "or_profile": "~Xiaoshuai_Song1;~Keqing_He1;~Pei_Wang12;~Guanting_Dong1;~Yutao_Mou1;~Jingang_Wang1;~Yunsen_Xian1;~Xunliang_Cai1;~Weiran_Xu1", "aff": "Beijing University of Posts and Telecommunications;Meituan Group;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;;Meituan;;Meituan;Beijing University of Post and Telecommunication", "aff_domain": "bupt.edu.cn;meituan.com;bupt.edu.cn;bupt.edu.cn;;meituan.com;;meituan.com;bupt.edu.cn", "position": "MS student;Researcher;MS student;MS student;;Researcher;;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nsong2023large,\ntitle={Large Language Models Meet Open-World Intent Discovery and Recognition: An Evaluation of Chat{GPT}},\nauthor={Xiaoshuai Song and Keqing He and Pei Wang and Guanting Dong and Yutao Mou and Jingang Wang and Yunsen Xian and Xunliang Cai and Weiran Xu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=NT4ehxCifo}\n}", "github": "", "project": "", "reviewers": "x2rM;TNLF;s6Wk", "site": "https://openreview.net/forum?id=NT4ehxCifo", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;4;3", "reproducibility": "4;4;2", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;;0000-0002-9416-7666", "linkedin": ";;https://www.linkedin.cn/in/%E9%9C%88-%E7%8E%8B-18a94a174;;;;;;", "aff_unique_index": "0;1;0;0;2;2;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications;Meituan Group;Meituan", "aff_unique_dep": ";;", "aff_unique_url": "http://www.bupt.edu.cn/;https://www.meituan.com;https://www.meituan.com", "aff_unique_abbr": "BUPT;Meituan;Meituan", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "NW09xt3kvH", "title": "HutCRS: Hierarchical User-Interest Tracking for Conversational Recommender System", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Conversational Recommender System (CRS) aims to explicitly acquire user preferences towards items and attributes through natural language conversations. However, existing CRS methods ask users to provide explicit answers (yes/no) for each attribute they require, regardless of users' knowledge or interest, which may significantly reduce the user experience and semantic consistency. Furthermore, these methods assume that users like all attributes of the target item and dislike those unrelated to it, which can introduce bias in attribute-level feedback and impede the system's ability to accurately identify the target item. To address these issues, we propose a more realistic, user-friendly, and explainable CRS framework called Hierarchical User-Interest Tracking for Conversational Recommender System (HutCRS). HutCRS portrays the conversation as a hierarchical interest tree that consists of two stages. In stage I, the system identifies the aspects that the user prefers while the system asks about attributes related to these positive aspects or recommends items in stage II. In addition, we develop a Hierarchical-Interest Policy Learning (HIPL) module to integrate the decision-making process of which aspects to ask and when to ask about attributes or recommend items. Moreover, we classify the attribute-level feedback results to further enhance the system\u2019s ability to capture special information, such as attribute instances that are accepted by users but not presented in their historical interactive data. Extensive experiments on four benchmark datasets demonstrate the superiority of our method. The implementation of HutCRS is publicly available at https://github.com/xinle1129/HutCRS.", "keywords": "Conversational Recommender Rystem;Multi-round Conversations;Hierarchical Interest Tree;Graph Neural Network", "primary_area": "", "supplementary_material": "", "author": "Mingjie Qian;Yongsen Zheng;Jinghui Qin;Liang Lin", "authorids": "~Mingjie_Qian2;~Yongsen_Zheng1;~Jinghui_Qin1;~Liang_Lin1", "gender": ";F;M;M", "homepage": ";;;http://www.linliang.net", "dblp": ";249/6451;228/6607;", "google_scholar": ";;HIQBxXAAAAAJ;https://scholar.google.com.hk/citations?user=Nav8m8gAAAAJ", "or_profile": "~Mingjie_Qian2;~Yongsen_Zheng1;~Jinghui_Qin1;~Liang_Lin1", "aff": "SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;Guangdong University of Technology;SUN YAT-SEN UNIVERSITY", "aff_domain": "sysu.edu.cn;sysu.edu.cn;gdut.edu.cn;sysu.edu.cn", "position": "MS student;PhD student;Lecturer;Full Professor", "bibtex": "@inproceedings{\nqian2023hutcrs,\ntitle={Hut{CRS}: Hierarchical User-Interest Tracking for Conversational Recommender System},\nauthor={Mingjie Qian and Yongsen Zheng and Jinghui Qin and Liang Lin},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=NW09xt3kvH}\n}", "github": "", "project": "", "reviewers": "9hoF;DXwT;2e3F", "site": "https://openreview.net/forum?id=NW09xt3kvH", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "3;3;3", "reproducibility": "3;3;2", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0002-2604-4514;0000-0003-2692-6429;0000-0003-0663-199X;", "linkedin": ";;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Sun Yat-sen University;Guangdong University of Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.sysu.edu.cn;http://www.gdut.edu.cn", "aff_unique_abbr": "SYSU;GDUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "NXXrvcilq8", "title": "Are NLP Models Good at Tracing Thoughts: An Overview of Narrative Understanding", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Narrative understanding involves capturing the author's cognitive processes, providing insights into their knowledge, intentions, beliefs, and desires. Although large language models (LLMs) excel in generating grammatically coherent text, their ability to comprehend the author's thoughts remains uncertain. This limitation hinders the practical applications of narrative understanding. In this paper, we conduct a comprehensive survey of narrative understanding tasks, thoroughly examining their key features, definitions, taxonomy, associated datasets, training objectives, evaluation metrics, and limitations. Furthermore, we explore the potential of expanding the capabilities of modularized LLMs to address novel narrative understanding tasks. By framing narrative understanding as the retrieval of the author's imaginative cues that outline the narrative structure, our study introduces a fresh perspective on enhancing narrative comprehension.", "keywords": "Narrative Understanding;Reading Comprehension;Summarization;Question Answering", "primary_area": "", "supplementary_material": "", "author": "Lixing Zhu;Runcong Zhao;Lin Gui;Yulan He", "authorids": "~Lixing_Zhu1;~Runcong_Zhao1;~Lin_Gui3;~Yulan_He1", "gender": ";F;M;F", "homepage": ";https://sites.google.com/view/runcongzhao/home;;https://www.kcl.ac.uk/people/yulan-he", "dblp": ";284/0783;34/8605-3;75/5430", "google_scholar": ";;https://scholar.google.com.ph/citations?user=1b3Eyx4AAAAJ;https://scholar.google.co.uk/citations?user=SP9r32UAAAAJ", "or_profile": "~Lixing_Zhu1;~Runcong_Zhao1;~Lin_Gui3;~Yulan_He1", "aff": ";The university of Warwick;King's College London, University of London;King's College London, University of London", "aff_domain": ";warwick.ac.uk;kcl.ac.uk;kcl.ac.uk", "position": ";PhD student;Lecturer;Full Professor", "bibtex": "@inproceedings{\nzhu2023are,\ntitle={Are {NLP} Models Good at Tracing Thoughts: An Overview of Narrative Understanding},\nauthor={Lixing Zhu and Runcong Zhao and Lin Gui and Yulan He},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=NXXrvcilq8}\n}", "github": "", "project": "", "reviewers": "ZCLn;xuKr;Bkvf", "site": "https://openreview.net/forum?id=NXXrvcilq8", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;5;4", "excitement": "3;3;4", "reproducibility": "", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-3948-5845", "linkedin": ";;;yulan-he-277234a/?originalSubdomain=uk", "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Warwick;King's College London", "aff_unique_dep": ";", "aff_unique_url": "https://warwick.ac.uk;https://www.kcl.ac.uk", "aff_unique_abbr": "Warwick;KCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "NYlL3oACU2", "title": "Comparing Biases and the Impact of Multilingual Training across Multiple Languages", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Studies in bias and fairness in natural language processing have primarily examined social biases within a single language and/or across few attributes (e.g. gender, race). However, biases can manifest differently across various languages for individual attributes. As a result, it is critical to examine biases within each language and attribute. Of equal importance is to study how these biases compare across languages and how the biases are affected when training a model on multilingual data versus monolingual data. We present a bias analysis across Italian, Chinese, English, Hebrew, and Spanish on the downstream sentiment analysis task to observe whether specific demographics are viewed more positively. We study bias similarities and differences across these languages and investigate the impact of multilingual vs. monolingual training data. We adapt existing sentiment bias templates in English to Italian, Chinese, Hebrew, and Spanish for four attributes: race, religion, nationality, and gender. Our results reveal similarities in bias expression such as favoritism of groups that are dominant in each language's culture (e.g. majority religions and nationalities). Additionally, we find an increased variation in predictions across protected groups, indicating bias amplification, after multilingual finetuning in comparison to multilingual pretraining.", "keywords": "Fairness;Biases;Multilinguality", "primary_area": "", "supplementary_material": "", "author": "Sharon Levy;Neha Anna John;Ling Liu;Yogarshi Vyas;Jie Ma;Yoshinari Fujinuma;Miguel Ballesteros;Vittorio Castelli;Dan Roth", "authorids": "~Sharon_Levy1;~Neha_Anna_John1;~Ling_Liu6;~Yogarshi_Vyas1;~Jie_Ma3;~Yoshinari_Fujinuma1;~Miguel_Ballesteros1;~Vittorio_Castelli1;~Dan_Roth3", "gender": ";F;;M;M;M;M;M;M", "homepage": "https://sharonlevy.github.io/;;;http://www.cs.umd.edu/~yogarshi/;;;https://miguelballesteros.github.io/;;https://www.cis.upenn.edu/~danroth/", "dblp": "92/7341;331/2445.html;;147/9150;62/5110-5.html;174/7392;38/8065;c/VittorioCastelli;r/DanRoth", "google_scholar": "KdTUNZIAAAAJ;7_JJaE0AAAAJ;-P5PrncAAAAJ;k6k7i1IAAAAJ;0FSlSt4AAAAJ;;lhDwr-AAAAAJ;d-lg1lEAAAAJ;E-bpPWgAAAAJ", "or_profile": "~Sharon_Levy1;~Neha_Anna_John1;~Ling_Liu6;~Yogarshi_Vyas1;~Jie_Ma3;~Yoshinari_Fujinuma1;~Miguel_Ballesteros1;~Vittorio_Castelli1;~Dan_Roth3", "aff": "UC Santa Barbara;Amazon;Amazon;Amazon;Amazon;AWS AI Labs;Amazon;Amazon;Amazon", "aff_domain": "ucsb.edu;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com", "position": "PhD student;Researcher;Researcher;Applied Scientist;Researcher;Applied Scientist;Principal Applied Scientist;Senior Science Manager;VP and Distinguished Scientist", "bibtex": "@inproceedings{\nlevy2023comparing,\ntitle={Comparing Biases and the Impact of Multilingual Training across Multiple Languages},\nauthor={Sharon Levy and Neha Anna John and Ling Liu and Yogarshi Vyas and Jie Ma and Yoshinari Fujinuma and Miguel Ballesteros and Vittorio Castelli and Dan Roth},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=NYlL3oACU2}\n}", "github": "", "project": "", "reviewers": "WYR1;jH5Q;3PjR", "site": "https://openreview.net/forum?id=NYlL3oACU2", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;3;4", "excitement": "4;3;4", "reproducibility": "4;4;4", "correctness": "5;4;5", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.666666666666667, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;;", "linkedin": ";nehaannajohn/;;;jie-ma-6ab59497/;;;vittorio-castelli-3449604/;dan-roth-8667361/", "aff_unique_index": "0;1;1;1;1;1;1;1;1", "aff_unique_norm": "University of California, Santa Barbara;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.ucsb.edu;https://www.amazon.com", "aff_unique_abbr": "UCSB;Amazon", "aff_campus_unique_index": "0", "aff_campus_unique": "Santa Barbara;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "NYstQhld8J", "title": "MarkQA: A large scale KBQA dataset with numerical reasoning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "While question answering over knowledge bases (KBQA) has shown progress in addressing factoid questions, KBQA with numerical reasoning remains relatively unexplored. In this paper, we focus on the complex numerical reasoning in KBQA, and propose a new task, NR-KBQA, which necessitates the ability to perform both multi-hop reasoning and numerical reasoning. We also design a logic form in Python format called PyQL to represent the reasoning process of numerical reasoning questions. To facilitate the development of NR-KBQA, we present a large NR-KBQA dataset called MarkQA, which is automatically constructed by a small set of seeds. Each question in MarkQA is annotated with its corresponding SPARQL query, alongside the step-by-step reasoning path in the QDMR format and PyQL program. Experimental results of some state-of-the-art QA methods performed on the MarkQA dataset show that complex numerical reasoning in KBQA faces great challenges.", "keywords": "Knowledge base;Question answering;Numerical reasoning", "primary_area": "", "supplementary_material": "", "author": "Xiang Huang;Sitao Cheng;Yuheng Bao;Shanshan Huang;Yuzhong Qu", "authorids": "~Xiang_Huang2;~Sitao_Cheng1;~Yuheng_Bao1;~Shanshan_Huang4;~Yuzhong_Qu1", "gender": ";M;M;;M", "homepage": "https://cdhx.github.io/;https://sitaocheng.github.io/;https://github.com/BaoYuheng;https://github.com/TurquoiseDM;http://ws.nju.edu.cn/~yzqu", "dblp": "16/1064-7.html;349/4470;;;05/1694.html", "google_scholar": "9yvG_0sAAAAJ;;;;", "or_profile": "~Xiang_Huang2;~Sitao_Cheng1;~Yuheng_Bao1;~Shanshan_Huang4;~Yuzhong_Qu1", "aff": "Nanjing University;Nanjing University;Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "PhD student;MS student;PhD student;MS student;Full Professor", "bibtex": "@inproceedings{\nhuang2023markqa,\ntitle={Mark{QA}: A large scale {KBQA} dataset with numerical reasoning},\nauthor={Xiang Huang and Sitao Cheng and Yuheng Bao and Shanshan Huang and Yuzhong Qu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=NYstQhld8J}\n}", "github": "", "project": "", "reviewers": "Y7yn;PuQT;6fJ7", "site": "https://openreview.net/forum?id=NYstQhld8J", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;3;4", "reproducibility": "4;3;3", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-1904-0494;;;;0000-0003-2777-8149", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "NZK22y40DS", "title": "Towards Enhancing Relational Rules for Knowledge Graph Link Prediction", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Graph neural networks (GNNs) have shown promising performance for knowledge graph reasoning. A recent variant of GNN called progressive relational graph neural network (PRGNN), utilizes relational rules to infer missing knowledge in relational digraphs and achieves notable results. However, during reasoning with PRGNN, two important properties are often overlooked: (1) the sequentiality of relation composition, where the order of combining different relations affects the semantics of the relational rules, and (2) the lagged entity information propagation, where the transmission speed of required information lags behind the appearance speed of new entities. Ignoring these properties leads to incorrect relational rule learning and decreased reasoning accuracy. To address these issues, we propose a novel knowledge graph reasoning approach, the Relational rUle eNhanced Graph Neural Network (RUN-GNN). Specifically, RUN-GNN employs a query related fusion gate unit to model the sequentiality of relation composition and utilizes a buffering update mechanism to alleviate the negative effect of lagged entity information propagation, resulting in higher-quality relational rule learning. Experimental results on multiple datasets demonstrate the superiority of RUN-GNN is superior on both transductive and inductive link prediction tasks.", "keywords": "Knowledge Graph Reasoning;Graph Neural Network;Inductive Reasoning", "primary_area": "", "supplementary_material": "", "author": "Shuhan Wu;Huaiyu Wan;Wei Chen;Yuting Wu;Junfeng Shen;Youfang Lin", "authorids": "~Shuhan_Wu1;~Huaiyu_Wan1;~Wei_Chen38;~Yuting_Wu1;~Junfeng_Shen1;~Youfang_Lin1", "gender": "M;M;;F;M;M", "homepage": "https://www.researchgate.net/profile/Shuhan-Wu-4;https://faculty.bjtu.edu.cn/8793/;;https://github.com/StephanieWyt;https://blog.csdn.net/itshard?spm=1000.2115.3001.5343;https://faculty.bjtu.edu.cn/7443/", "dblp": ";07/9988;;09/10409;;12/4988", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;T5wVWIUAAAAJ;;https://scholar.google.com/citations?hl=en;;e8xT-e0AAAAJ", "or_profile": "~Shuhan_Wu1;~Huaiyu_Wan1;~Wei_Chen38;~Yuting_Wu1;~Junfeng_Shen1;~Youfang_Lin1", "aff": "Beijing Jiaotong University;Beijing Jiaotong University;;Beijing Jiaotong University;Beijing Jiaotong University;Beijing Jiaotong University", "aff_domain": "bjtu.edu.cn;bjtu.edu.cn;;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn", "position": "MS student;Full Professor;;Lecturer;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nwu2023towards,\ntitle={Towards Enhancing Relational Rules for Knowledge Graph Link Prediction},\nauthor={Shuhan Wu and Huaiyu Wan and Wei Chen and Yuting Wu and Junfeng Shen and Youfang Lin},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=NZK22y40DS}\n}", "github": "", "project": "", "reviewers": "55BH;G1kj;FhXe", "site": "https://openreview.net/forum?id=NZK22y40DS", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;5;4", "excitement": "3;3;4", "reproducibility": "2;4;4", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-0501-9363;;0000-0002-7550-3804;;0000-0002-5143-3645", "linkedin": ";;;yuting-wu-8a4319209/;;youfang-lin-a1625091/", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Beijing Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "http://www.njtu.edu.cn/en", "aff_unique_abbr": "BJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "NZZB3UGcd8", "title": "Editing Large Language Models: Problems, Methods, and Opportunities", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Despite the ability to train capable LLMs, the methodology for maintaining their relevancy and rectifying errors remains elusive. \nTo this end, the past few years have witnessed a surge in techniques for editing LLMs, the objective of which is to alter the behavior of LLMs \\textbf{efficiently} within a specific domain without negatively impacting performance across other inputs. This paper embarks on a deep exploration of the problems, methods, and opportunities related to model editing for LLMs. \nIn particular, we provide an exhaustive overview of the task definition and challenges associated with model editing, along with an in-depth empirical analysis of the most progressive methods currently at our disposal. We also build a new benchmark dataset to facilitate a more robust evaluation and pinpoint enduring issues intrinsic to existing techniques. \nOur objective is to provide valuable insights into the effectiveness and feasibility of each editing technique, thereby assisting the community in making informed decisions on the selection of the most appropriate method for a specific task or context\\footnote{Code and datasets are in the supplementary and will be released.}.", "keywords": "Model Editing;Large Language Model;Editing Factual Knowledge", "primary_area": "", "supplementary_material": "", "author": "Yunzhi Yao;Peng Wang;Bozhong Tian;Siyuan Cheng;Zhoubo Li;Shumin Deng;Huajun Chen;Ningyu Zhang", "authorids": "~Yunzhi_Yao1;~Peng_Wang28;~Bozhong_Tian1;~Siyuan_Cheng2;~Zhoubo_Li1;~Shumin_Deng1;~Huajun_Chen1;~Ningyu_Zhang1", "gender": "M;M;M;M;M;F;M;M", "homepage": "http://yyzcowtodd.cn;;https://github.com/tbozhong;https://github.com/cheng-simian;;https://231sm.github.io/;;https://person.zju.edu.cn/en/ningyu", "dblp": "295/9476;;338/5451;;;213/1853;94/5089;139/4181-1.html", "google_scholar": "https://scholar.google.com.hk/citations?user=nAagIwEAAAAJ;vLN6gsMAAAAJ;Sj9kUscAAAAJ;;;3am3hL4AAAAJ;;xQDOPvsAAAAJ", "or_profile": "~Yunzhi_Yao1;~Peng_Wang28;~Bozhong_Tian1;~Siyuan_Cheng2;~Zhoubo_Li1;~Shumin_Deng1;~Huajun_Chen1;~Ningyu_Zhang1", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;National University of Singapore;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;nus.edu.sg;zju.edu.cn;zju.edu.cn", "position": "PhD student;MS student;MS student;MS student;MS student;Postdoc;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nyao2023editing,\ntitle={Editing Large Language Models: Problems, Methods, and Opportunities},\nauthor={Yunzhi Yao and Peng Wang and Bozhong Tian and Siyuan Cheng and Zhoubo Li and Shumin Deng and Huajun Chen and Ningyu Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=NZZB3UGcd8}\n}", "github": "", "project": "", "reviewers": "yYDP;5Cwu;yadK", "site": "https://openreview.net/forum?id=NZZB3UGcd8", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;1;4", "excitement": "3;4;4", "reproducibility": "4;4;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 2.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 9, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;0000-0002-1970-0678", "linkedin": ";;;;https://www.linkedin.cn/incareer/in/ACoAADYbK1YBjJtr7ncGkYGnf8q0xu33a_fJlMo;;;ningyuzhang/", "aff_unique_index": "0;0;0;0;0;1;0;0", "aff_unique_norm": "Zhejiang University;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.nus.edu.sg", "aff_unique_abbr": "ZJU;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;0;0", "aff_country_unique": "China;Singapore" }, { "id": "Na4DonsjLx", "title": "Contrastive Learning for Inference in Dialogue", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Inference, especially those derived from inductive processes, is a crucial component in our conversation to complement the information implicitly or explicitly conveyed by a speaker. \nWhile recent large language models show remarkable advances in inference tasks, their performance in inductive reasoning, where not all information is present in the context, is far behind deductive reasoning. \nIn this paper, we analyze the behavior of the models based on the task difficulty defined by the semantic information gap -- which distinguishes inductive and deductive reasoning. \nOur analysis reveals that the information gap between dialogue contexts and desired inferences renders the inductive inference process more challenging. \nTo mitigate this information gap, we investigate a contrastive learning approach by feeding negative samples.\nOur experiments suggest negative samples help models understand what is wrong and improve their inference generations.", "keywords": "inference in dialogue;commonsense reasoning in dialogue;contrastive learning;semantic gap;dialogue comprehension;information gap;inductive reasoning", "primary_area": "", "supplementary_material": "", "author": "Etsuko Ishii;Yan Xu;Bryan Wilie;Ziwei Ji;Holy Lovenia;Willy Chung;Pascale Fung", "authorids": "~Etsuko_Ishii1;~Yan_Xu6;~Bryan_Wilie1;~Ziwei_Ji2;~Holy_Lovenia1;~Willy_Chung1;~Pascale_Fung1", "gender": ";F;;F;F;M;F", "homepage": ";https://yana-xuyan.github.io/;;https://ziweiji.github.io/;https://holylovenia.github.io/;https://github.com/WillyHC22;http://pascale.home.ece.ust.hk/", "dblp": ";03/4702-12;;176/4574;243/6573;;29/4187", "google_scholar": ";j1t9_ScAAAAJ;;oSnZ9mMAAAAJ;bugb-lAAAAAJ;y02ojzsAAAAJ;", "or_profile": "~Etsuko_Ishii1;~Yan_Xu6;~Bryan_Wilie1;~Ziwei_Ji2;~Holy_Lovenia1;~Willy_Chung1;~Pascale_Fung1", "aff": ";Hong Kong University of Science and Technology;;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;HKUST", "aff_domain": ";ust.hk;;ust.hk;hkust.edu;ust.hk;ece.ust.hk", "position": ";PhD student;;PhD student;MS student;MS student;Full Professor", "bibtex": "@inproceedings{\nishii2023contrastive,\ntitle={Contrastive Learning for Inference in Dialogue},\nauthor={Etsuko Ishii and Yan Xu and Bryan Wilie and Ziwei Ji and Holy Lovenia and Willy Chung and Pascale Fung},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Na4DonsjLx}\n}", "github": "", "project": "", "reviewers": "LNAb;FcSR;Qjsz;X1eN;eCnU", "site": "https://openreview.net/forum?id=Na4DonsjLx", "pdf_size": 0, "rating": "3;3;3;3;3", "confidence": "4;4;5;3;4", "excitement": "3;3;3;3;3", "reproducibility": "5;4;4;4;3", "correctness": "3;3;3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 17, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-0206-7861;0000-0002-8995-5107;0000-0003-4499-525X;", "linkedin": ";yan-xu-4a822a172/;;ziwei-ji-a516b91a7/;holylovenia;willyhcchung;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "NbkVQsbaqJ", "title": "Exploring In-Context Learning for Knowledge Grounded Dialog Generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large neural-based dialog generation models have been applied in many real-life scenarios, yet they are prone to hallucination and tend to produce factually inaccurate outputs which raise great concerns. \nTo alleviate this problem, we propose a plug-and-play retrieval-based framework IKA, which leverages in-context learning and retrieval techniques to enhance LLMs on knowledge grounded dialog generation.\nWe design thorough experiments on a large-scale knowledge graph with 1M+ facts to investigate the effectiveness and generalization of our framework. \nExperiments show that our method surpasses previous training-based SOTA by a large margin, specifically 46.67% in BLEU4, 26.01% in ROUGE-L, 122.90% in BARTScore and 30.50% in Entity Coverage F1.\nFurther analysis show promising abilities of LLMs to perform knowledge-intensive tasks, which is previously considered weak and understudied.", "keywords": "dialog;knowledge;large language models;in-context learning;retrieval system", "primary_area": "", "supplementary_material": "", "author": "Qinyu Chen;Wenhao Wu;Sujian Li", "authorids": "~Qinyu_Chen2;~Wenhao_Wu7;~Sujian_Li1", "gender": "M;M;F", "homepage": "https://morganchen.site;;https://pku-tangent.github.io/", "dblp": ";;05/4288", "google_scholar": ";LZFvCrwAAAAJ;https://scholar.google.com.tw/citations?user=RvBDhSwAAAAJ", "or_profile": "~Qinyu_Chen2;~Wenhao_Wu7;~Sujian_Li1", "aff": "Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "MS student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nchen2023exploring,\ntitle={Exploring In-Context Learning for Knowledge Grounded Dialog Generation},\nauthor={Qinyu Chen and Wenhao Wu and Sujian Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=NbkVQsbaqJ}\n}", "github": "", "project": "", "reviewers": "fq1q;WFsR;MTiQ", "site": "https://openreview.net/forum?id=NbkVQsbaqJ", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;4;2", "reproducibility": "3;3;4", "correctness": "2;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "Nc6U1Z0DDt", "title": "Balaur: Language Model Pretraining with Lexical Semantic Relations", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Lexical semantic relations (LSRs) characterize meaning relationships between words and play an important role in systematic generalization on lexical inference tasks. Notably, several tasks that require knowledge of hypernymy still pose a challenge for pretrained language models (LMs) such as BERT, underscoring the need to better align their linguistic behavior with our knowledge of LSRs.\nIn this paper, we propose Balaur, a model that addresses this challenge by modeling LSRs directly in the LM's hidden states throughout pretraining. Motivating our approach is the hypothesis that the internal representations of LMs can provide an interface to their observable linguistic behavior, and that by controlling one we can influence the other. We validate our hypothesis and demonstrate that Balaur generally improves the performance of large transformer-based LMs on a comprehensive set of hypernymy-informed tasks, as well as on the original LM objective. Code and data are made available at https://github.com/mirandrom/balaur", "keywords": "language model;lm;pretraining;lexical semantics;lexical semantic relations;hypernymy;semantic specialization;wordnet", "primary_area": "", "supplementary_material": "", "author": "Andrei Mircea;Jackie CK Cheung", "authorids": "~Andrei_Mircea1;~Jackie_CK_Cheung1", "gender": "M;Unspecified", "homepage": "http://cs.mcgill.ca/~jcheung/;https://mirandrom.github.io", "dblp": "00/9012;280/9333", "google_scholar": "https://scholar.google.com.tw/citations?user=Um-wmYQAAAAJ;https://scholar.google.ca/citations?user=JhhcaCsAAAAJ", "or_profile": "~Jackie_CK_Cheung1;~Andrei_Mircea_Romascanu1", "aff": "Microsoft;McGill University", "aff_domain": "microsoft.com;mcgill.ca", "position": "Consulting Researcher;MS student", "bibtex": "@inproceedings{\nmircea2023balaur,\ntitle={Balaur: Language Model Pretraining with Lexical Semantic Relations},\nauthor={Andrei Mircea and Jackie CK Cheung},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Nc6U1Z0DDt}\n}", "github": "", "project": "", "reviewers": "8X5a;WbqA;AUBR", "site": "https://openreview.net/forum?id=Nc6U1Z0DDt", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;2", "excitement": "3;2;3", "reproducibility": "4;3;4", "correctness": "3;2;4", "rating_avg": 5.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-0881-428X", "linkedin": ";", "aff_unique_index": "0;1", "aff_unique_norm": "Microsoft;McGill University", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://www.mcgill.ca", "aff_unique_abbr": "Microsoft;McGill", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Canada" }, { "id": "NeOsOzNMiS", "title": "LayoutDIT: Layout-Aware End-to-End Document Image Translation with Multi-Step Conductive Decoder", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Document image translation (DIT) aims to translate text embedded in images from one language to another. It is a challenging task that needs to understand visual layout with text semantics simultaneously. However, existing methods struggle to capture the crucial visual layout in real-world complex document images. In this work, we make the first attempt to incorporate layout knowledge into DIT in an end-to-end way. Specifically, we propose a novel Layout-aware end-to-end Document Image Translation (LayoutDIT) with multi-step conductive decoder. A layout-aware encoder is first introduced to model visual layout relations with raw OCR results. Then a novel multi-step conductive decoder is unified with hidden states conduction across three step-decoders to achieve the document translation step by step. Benefiting from the layout-aware end-to-end joint training, our LayoutDIT outperforms state-of-the-art methods with better parameter efficiency. Besides, we create a new multi-domain document image translation dataset to validate the model\u2019s generalization. Extensive experiments show that LayoutDIT has a good generalization in diverse and complex layout scenes.", "keywords": "document image;machine translation;layout;multi-step;conductive;end-to-end", "primary_area": "", "supplementary_material": "", "author": "Zhiyang Zhang;Yaping Zhang;Yupu Liang;Lu Xiang;Yang Zhao;Yu Zhou;Chengqing Zong", "authorids": "~Zhiyang_Zhang1;~Yaping_Zhang1;~Yupu_Liang1;~Lu_Xiang1;~Yang_Zhao26;~Yu_Zhou8;~Chengqing_Zong1", "gender": "M;;M;F;F;M;M", "homepage": "https://github.com/zhangzhiyang-2020;https://aprilyapingzhang.github.io;https://liangyupu.github.io/;;;http://www.nlpr.ia.ac.cn/cip/english/zong.htm;https://yzhaoiacas.netlify.app/", "dblp": ";133/5803;89/5863;121/7268;36/2728-1.html;38/6093;", "google_scholar": "oHIDCTsAAAAJ;https://scholar.google.com.hk/citations?user=bAN6Lj0AAAAJ;rC1XVOkAAAAJ;https://scholar.google.com/citations?hl=zh-CN;DDpBW7wAAAAJ;l8lvKOQAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN", "or_profile": "~Zhiyang_Zhang1;~Yaping_Zhang1;~Yupu_Liang1;~Lu_Xiang1;~Yu_Zhou8;~Chengqing_Zong1;~Zhao_Yang1", "aff": "Institute of automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;University of Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;ia.ac.cn;ucas.edu.cn;ia.ac.cn;nlpr.ia.ac.cn;ia.ac.cn;ia.ac.cn", "position": "PhD student;Assistant Professor;PhD student;Assistant Professor;Full Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\nzhang2023layoutdit,\ntitle={Layout{DIT}: Layout-Aware End-to-End Document Image Translation with Multi-Step Conductive Decoder},\nauthor={Zhiyang Zhang and Yaping Zhang and Yupu Liang and Lu Xiang and Yang Zhao and Yu Zhou and Chengqing Zong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=NeOsOzNMiS}\n}", "github": "", "project": "", "reviewers": "iJGw;ZMNX;sqAx", "site": "https://openreview.net/forum?id=NeOsOzNMiS", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;4;4", "excitement": "2;3;3", "reproducibility": "4;3;3", "correctness": "3;3;2", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-6892-905X;;;;;", "linkedin": ";;;;;;", "aff_unique_index": "0;0;1;0;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation;", "aff_unique_url": "http://www.ia.cas.cn;http://www.ucas.ac.cn", "aff_unique_abbr": "CAS;UCAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "NfN3ZDCcsO", "title": "SAMRank: Unsupervised Keyphrase Extraction using Self-Attention Map in BERT and GPT-2", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We propose a novel unsupervised keyphrase extraction approach, called SAMRank, which uses only a self-attention map in a pre-trained language model (PLM) to determine the importance of phrases. Most recent approaches for unsupervised keyphrase extraction mainly utilize contextualized embeddings to capture semantic relevance between words, sentences, and documents. However, due to the anisotropic nature of contextual embeddings, these approaches may not be optimal for semantic similarity measurements. SAMRank as proposed here computes the importance of phrases solely leveraging a self-attention map in a PLM, in this case BERT and GPT-2, eliminating the need to measure embedding similarities. To assess the level of importance, SAMRank combines both global and proportional attention scores through calculations using a self-attention map. We evaluate the SAMRank on three keyphrase extraction datasets: Inspec, SemEval2010, and SemEval2017. The experimental results show that SAMRank outperforms most embedding-based models on both long and short documents and demonstrating that it is possible to use only a self-attention map for keyphrase extraction without relying on embeddings. Source code is available at https://github.com/kangnlp/SAMRank.", "keywords": "Unsupervised Keyphrase Extraction;Pre-trained Language Model;Self-Attention Map;BERT;GPT-2", "primary_area": "", "supplementary_material": "", "author": "Byungha Kang;Youhyun Shin", "authorids": "~Byungha_Kang1;~Youhyun_Shin2", "gender": "M;", "homepage": "https://github.com/kangnlp;https://sites.google.com/view/inudi/members/faculty?authuser=0", "dblp": ";160/5730.html", "google_scholar": ";mv7cedAAAAAJ", "or_profile": "~Byungha_Kang1;~Youhyun_Shin2", "aff": "Incheon National University;Incheon National University", "aff_domain": "inu.ac.kr;inu.ac.kr", "position": "MS student;Associate Professor", "bibtex": "@inproceedings{\nkang2023samrank,\ntitle={{SAMR}ank: Unsupervised Keyphrase Extraction using Self-Attention Map in {BERT} and {GPT}-2},\nauthor={Byungha Kang and Youhyun Shin},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=NfN3ZDCcsO}\n}", "github": "", "project": "", "reviewers": "Tn1j;4f1S;aUyH;G3r8", "site": "https://openreview.net/forum?id=NfN3ZDCcsO", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "5;5;4;3", "excitement": "4;3;3;4", "reproducibility": "5;5;5;5", "correctness": "4;2;3;4", "rating_avg": 5.0, "confidence_avg": 4.25, "excitement_avg": 3.5, "reproducibility_avg": 5.0, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Incheon National University", "aff_unique_dep": "", "aff_unique_url": "https://www.inu.ac.kr", "aff_unique_abbr": "INU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "Ni57pgQVqq", "title": "APoLLo : Unified Adapter and Prompt Learning for Vision Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The choice of input text prompt plays a critical role in the performance of Vision-Language Pretrained (VLP) models such as CLIP. We present APoLLo, a unified multi-modal approach that combines Adapter and Prompt learning for Vision-Language models. Our method is designed to substantially improve the generalization capabilities of VLP models when they are fine-tuned in a few-shot setting. We introduce trainable cross-attention-based adapter layers in conjunction with vision and language encoders to strengthen the alignment between the two modalities. We enforce consistency between the respective encoder branches (receiving augmented inputs) to prevent overfitting in downstream tasks. Our method is evaluated on three representative tasks: generalization to novel classes, cross-dataset evaluation, and unseen domain shifts. In practice, APoLLo achieves a relative gain up to 6.03% over MaPLe (SOTA) on novel classes for 10 diverse image recognition datasets.", "keywords": "Vision Language Models;Prompt Tuning;Adapter Tuning;Contrastive Learning", "primary_area": "", "supplementary_material": "", "author": "Sanjoy Chowdhury;Sayan Nag;Dinesh Manocha", "authorids": "~Sanjoy_Chowdhury1;~Sayan_Nag1;~Dinesh_Manocha3", "gender": "M;M;M", "homepage": "https://schowdhury671.github.io/;https://sayannag.github.io/;https://www.cs.umd.edu/people/dmanocha", "dblp": "62/3646;198/1398;m/DineshManocha", "google_scholar": "CEdJKCIAAAAJ;K8w4dj4AAAAJ;X08l_4IAAAAJ", "or_profile": "~Sanjoy_Chowdhury1;~Sayan_Nag1;~Dinesh_Manocha3", "aff": "Adobe Systems;University of Toronto;University of Maryland, College Park", "aff_domain": "adobe.com;utoronto.ca;umd.edu", "position": "Intern;PhD student;Professor", "bibtex": "@inproceedings{\nchowdhury2023apollo,\ntitle={{AP}o{LL}o : Unified Adapter and Prompt Learning for Vision Language Models},\nauthor={Sanjoy Chowdhury and Sayan Nag and Dinesh Manocha},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Ni57pgQVqq}\n}", "github": "", "project": "", "reviewers": "SvZA;4n34;bnbq;BqGK", "site": "https://openreview.net/forum?id=Ni57pgQVqq", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;4;2;3", "excitement": "3;4;4;4", "reproducibility": "2;3;4;4", "correctness": "3;4;4;4", "rating_avg": 4.0, "confidence_avg": 3.25, "excitement_avg": 3.75, "reproducibility_avg": 3.25, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-7047-9801", "linkedin": "sanjoy2528;sayan-nag-176046124/;dinesh-manocha-2311846", "aff_unique_index": "0;1;2", "aff_unique_norm": "Adobe;University of Toronto;University of Maryland", "aff_unique_dep": "Adobe Systems Incorporated;;", "aff_unique_url": "https://www.adobe.com;https://www.utoronto.ca;https://www/umd.edu", "aff_unique_abbr": "Adobe;U of T;UMD", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Canada" }, { "id": "NiEYKbNnQO", "title": "Text Rendering Strategies for Pixel Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Pixel-based language models process text rendered as images, which allows them to handle any script, making them a promising approach to open vocabulary language modelling. However, recent approaches use text renderers that produce a large set of almost-equivalent input patches, which may prove sub-optimal for downstream tasks, due to redundancy in the input representations.\nIn this paper, we investigate four approaches to rendering text in the PIXEL model (Rust et al., 2023), and find that simple character bigram rendering brings improved performance on sentence-level tasks without compromising performance on token-level or multilingual tasks.\nThis new rendering strategy also makes it possible to train a more compact model with only 22M parameters that performs on par with the original 86M parameter model.\nOur analyses show that character bigram rendering leads to a consistently better model but with an anisotropic patch embedding space, driven by a patch frequency bias, highlighting the connections between image patch- and tokenization-based language models.", "keywords": "Pixel-based language modelling;isotropy;word frequency bias", "primary_area": "", "supplementary_material": "", "author": "Jonas F. Lotz;Elizabeth Salesky;Phillip Rust;Desmond Elliott", "authorids": "~Jonas_F._Lotz1;~Elizabeth_Salesky1;~Phillip_Rust1;~Desmond_Elliott1", "gender": "M;;;", "homepage": ";https://esalesky.github.io;https://phillip.rs;", "dblp": ";184/8920;263/9843;46/7536", "google_scholar": "rQi0nEcAAAAJ;9I7TjgMAAAAJ;6MxyDqcAAAAJ;", "or_profile": "~Jonas_F._Lotz1;~Elizabeth_Salesky1;~Phillip_Rust1;~Desmond_Elliott1", "aff": "University of Copenhagen;Johns Hopkins University;University of Copenhagen;University of Copenhagen", "aff_domain": "diku.dk;jhu.edu;ku.dk;ku.dk", "position": "PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nlotz2023text,\ntitle={Text Rendering Strategies for Pixel Language Models},\nauthor={Jonas F. Lotz and Elizabeth Salesky and Phillip Rust and Desmond Elliott},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=NiEYKbNnQO}\n}", "github": "", "project": "", "reviewers": "Tumj;u9Mz;ocCw", "site": "https://openreview.net/forum?id=NiEYKbNnQO", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;2;4", "excitement": "3;2;4", "reproducibility": "3;3;4", "correctness": "3;3;5", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-6405-0590;0000-0001-6765-1447;0000-0001-5123-821X;", "linkedin": "jonas-f-lotz-ab7805113/;elizabeth-salesky;;", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Copenhagen;Johns Hopkins University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ku.dk;https://www.jhu.edu", "aff_unique_abbr": "UCPH;JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Denmark;United States" }, { "id": "Nijnhwu1Uz", "title": "PromptST: Abstract Prompt Learning for End-to-End Speech Translation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "An end-to-end speech-to-text (S2T) translation model is usually initialized from a pre-trained speech recognition encoder and a pre-trained text-to-text (T2T) translation decoder.\nAlthough this straightforward setting has been shown empirically successful, there do not exist clear answers to the research questions: 1) how are speech and text modalities fused in S2T model and 2) how to better fuse the two modalities?\nIn this paper, we take the first step toward understanding the fusion of speech and text features in S2T model.\nWe first design and release a 10GB linguistic probing benchmark, namely Speech-Senteval, to investigate the acoustic and linguistic behaviors of S2T models.\nPreliminary analysis reveals that the uppermost encoder layers of the S2T model can not learn linguistic knowledge efficiently, which is crucial for accurate translation.\nBased on the finding, we further propose a simple plug-in prompt-learning strategy on the uppermost encoder layers to broaden the abstract representation power of the encoder of S2T models.\nWe call such a prompt-enhanced S2T model PromptST.\nExperimental results on four widely-used S2T datasets show that PromptST can deliver significant improvements over a strong baseline by capturing richer linguistic knowledge.\nBenchmarks, code, and scripts are freely available at https://github.com/ytf-philp/PromptST.", "keywords": "Speech-to-Text Translation;Linguistic Probing Benchmark;Prompt Learning", "primary_area": "", "supplementary_material": "", "author": "Tengfei Yu;Liang Ding;Xuebo Liu;Kehai Chen;Meishan Zhang;Dacheng Tao;Min Zhang", "authorids": "~Tengfei_Yu1;~Liang_Ding3;~Xuebo_Liu1;~Kehai_Chen2;~Meishan_Zhang1;~Dacheng_Tao1;~Min_Zhang9", "gender": "M;M;M;M;M;;M", "homepage": ";http://liamding.cc/;https://sunbowliu.github.io/;https://chenkehai.github.io;https://zhangmeishan.github.io/;;https://zhangmin-nlp-ai.github.io/", "dblp": ";88/3340-6.html;166/0029-2;78/9623;127/0273;;83/5342-5", "google_scholar": ";lFCLvOAAAAAJ;XkDl9aoAAAAJ;_M4Am0AAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Tengfei_Yu1;~Liang_Ding3;~Xuebo_Liu1;~Kehai_Chen2;~Meishan_Zhang1;~Dacheng_Tao1;~Min_Zhang9", "aff": "Harbin Institute of Technology;JD Explore Academy, JD.com Inc.;Harbin Institute of Technolgy, Shenzhen;Harbin Institute of Technology (Shenzhen);Tianjin University, China;;Harbin Institute of Technology, Shenzhen", "aff_domain": "hit.edu.cn;jd.com;hit.edu.cn;hit.edu.cn;tju.edu.cn;;hit.edu.cn", "position": "MS student;Research Scientist;Assistant Professor;Assistant Professor;Associate Professor;;Full Professor", "bibtex": "@inproceedings{\nyu2023promptst,\ntitle={Prompt{ST}: Abstract Prompt Learning for End-to-End Speech Translation},\nauthor={Tengfei Yu and Liang Ding and Xuebo Liu and Kehai Chen and Meishan Zhang and Dacheng Tao and Min Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Nijnhwu1Uz}\n}", "github": "", "project": "", "reviewers": "68rb;LoxQ;A846;gne1;aEu5", "site": "https://openreview.net/forum?id=Nijnhwu1Uz", "pdf_size": 0, "rating": "5;5;5;5;5", "confidence": "3;5;4;3;4", "excitement": "4;3;3;3;4", "reproducibility": "4;4;4;4;3", "correctness": "4;4;2;4;4", "rating_avg": 5.0, "confidence_avg": 3.8, "excitement_avg": 3.4, "reproducibility_avg": 3.8, "correctness_avg": 3.6, "replies_avg": 17, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-4346-7618;;;", "linkedin": "%E8%85%BE%E6%96%90-%E4%BA%8E-7b1058220/;;xuebo-liu-47877b195/;;;;", "aff_unique_index": "0;1;0;0;2;0", "aff_unique_norm": "Harbin Institute of Technology;JD.com Inc.;Tianjin University", "aff_unique_dep": ";JD Explore Academy;", "aff_unique_url": "http://www.hit.edu.cn/;https://www.jd.com;http://www.tju.edu.cn", "aff_unique_abbr": "HIT;JD.com;Tianjin U", "aff_campus_unique_index": "0;2;2;2", "aff_campus_unique": "Harbin;;Shenzhen", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "Nk2vfZa4lX", "title": "Appraising the Potential Uses and Harms of LLMs for Medical Systematic Reviews", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Medical systematic reviews play a vital role in healthcare decision making and policy. However, their production is time-consuming, limiting the availability of high-quality and up-to-date evidence summaries. Recent advancements in LLMs offer the potential to automatically generate literature reviews on demand, addressing this issue. However, LLMs sometimes generate inaccurate (and potentially misleading) texts by hallucination or omission. In healthcare, this can make LLMs unusable at best and dangerous at worst. We conducted 16 interviews with international systematic review experts to characterize the perceived utility and risks of LLMs in the specific context of medical evidence reviews. Experts indicated that LLMs can assist in the writing process by drafting summaries, generating templates, distilling information, and crosschecking information. They also raised concerns regarding confidently composed but inaccurate LLM outputs and other potential downstream harms, including decreased accountability and proliferation of low-quality reviews. Informed by this qualitative analysis, we identify criteria for rigorous evaluation of biomedical LLMs aligned with domain expert views.", "keywords": "LLMs;Biomedical;Systematic Reviews;Qualitative Study;User Research", "primary_area": "", "supplementary_material": "", "author": "Hye Sun Yun;Iain James Marshall;Thomas Trikalinos;Byron C Wallace", "authorids": "~Hye_Sun_Yun1;~Iain_James_Marshall1;~Thomas_Trikalinos1;~Byron_C_Wallace1", "gender": "F;Not Specified;M;M", "homepage": "https://www.hyesunyun.com/;;;http://www.byronwallace.com/", "dblp": ";117/4523;;00/8247", "google_scholar": "GCUd5rEAAAAJ;4kdySIYAAAAJ;RAr2MWYAAAAJ;KTzRHmwAAAAJ", "or_profile": "~Hye_Sun_Yun1;~Iain_James_Marshall1;~Thomas_Trikalinos1;~Byron_C_Wallace1", "aff": "Northeastern University;King's College London, University of London;Brown University;Northeastern University", "aff_domain": "northeastern.edu;kcl.ac.uk;brown.edu;northeastern.edu", "position": "PhD student;Clinical Senior Lecturer;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nyun2023appraising,\ntitle={Appraising the Potential Uses and Harms of {LLM}s for Medical Systematic Reviews},\nauthor={Hye Sun Yun and Iain James Marshall and Thomas Trikalinos and Byron C Wallace},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Nk2vfZa4lX}\n}", "github": "", "project": "", "reviewers": "VXn3;UT4H;w8DF", "site": "https://openreview.net/forum?id=Nk2vfZa4lX", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "4;4;3", "reproducibility": "", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-6405-8082;;;", "linkedin": "hyesunyun;;;", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Northeastern University;King's College London;Brown University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.northeastern.edu;https://www.kcl.ac.uk;https://www.brown.edu", "aff_unique_abbr": "NEU;KCL;Brown", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "NlWH0Kvptf", "title": "FactSpotter: Evaluating the Factual Faithfulness of Graph-to-Text Generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Graph-to-text (G2T) generation takes a graph as input and aims to generate a fluent and faith- ful textual representation of the information in the graph. The task has many applications, such as dialogue generation and question an- swering. In this work, we investigate to what extent the G2T generation problem is solved for previously studied datasets, and how pro- posed metrics perform when comparing generated texts. To help address their limitations, we propose a new metric that correctly identifies factual faithfulness, i.e., given a triple (subject, predicate, object), it decides if the triple is present in a generated text. We show that our metric FactSpotter achieves the highest correlation with human annotations on data correct- ness, data coverage, and relevance. In addition, FactSpotter can be used as a plug-in feature to improve the factual faithfulness of existing models. Finally, we investigate if existing G2T datasets are still challenging for state-of-the-art models. Our code is available online: https://github.com/guihuzhang/FactSpotter.", "keywords": "Graph-to-text;Factual Faithfulness;Constrained Text Generation", "primary_area": "", "supplementary_material": "", "author": "Kun Zhang;Oana Balalau;Ioana Manolescu", "authorids": "~Kun_Zhang13;~Oana_Balalau1;~Ioana_Manolescu1", "gender": ";;F", "homepage": ";;https://pages.saclay.inria.fr/ioana.manolescu/", "dblp": ";;m/IoanaManolescu", "google_scholar": ";;q6Ft35wAAAAJ", "or_profile": "~Kun_Zhang13;~Oana_Balalau1;~Ioana_Manolescu1", "aff": ";;Inria", "aff_domain": ";;inria.fr", "position": ";;Principal Researcher", "bibtex": "@inproceedings{\nzhang2023factspotter,\ntitle={FactSpotter: Evaluating the Factual Faithfulness of Graph-to-Text Generation},\nauthor={Kun Zhang and Oana Balalau and Ioana Manolescu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=NlWH0Kvptf}\n}", "github": "", "project": "", "reviewers": "wiwF;NiRu;v9ZD", "site": "https://openreview.net/forum?id=NlWH0Kvptf", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "4;2;3", "reproducibility": "5;5;5", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 5.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-0425-2462", "linkedin": ";;ioana-manolescu-0a55b2/?originalSubdomain=fr", "aff_unique_index": "0", "aff_unique_norm": "INRIA", "aff_unique_dep": "", "aff_unique_url": "https://www.inria.fr", "aff_unique_abbr": "Inria", "aff_country_unique_index": "0", "aff_country_unique": "France" }, { "id": "NnVIFpsMAy", "title": "Make Every Example Count: On the Stability and Utility of Self-Influence for Learning from Noisy NLP Datasets", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Increasingly larger datasets have become a standard ingredient to advancing the state-of-the-art in NLP. However, data quality might have already become the bottleneck to unlock further gains. Given the diversity and the sizes of modern datasets, standard data filtering is not straight-forward to apply, because of the multifacetedness of the harmful data and elusiveness of filtering rules that would generalize across multiple tasks. We study the fitness of task-agnostic self-influence scores of training examples for data cleaning, analyze their efficacy in capturing naturally occurring outliers, and investigate to what extent self-influence based data cleaning can improve downstream performance in machine translation, question answering and text classification, building up on recent approaches to self-influence calculation and automated curriculum learning.", "keywords": "data filtering;influence functions;self-influence;curriculum learning;noisy data;machine translation;question answering", "primary_area": "", "supplementary_material": "", "author": "Irina Bejan;Artem Sokolov;Katja Filippova", "authorids": "~Irina_Bejan1;~Artem_Sokolov1;~Katja_Filippova1", "gender": "F;;F", "homepage": ";https://www.cl.uni-heidelberg.de/~sokolov/;", "dblp": ";79/3174;24/5028", "google_scholar": "nrAPd7wAAAAJ;0jSH2vsAAAAJ;https://scholar.google.ch/citations?user=23xz9QgAAAAJ", "or_profile": "~Irina_Bejan1;~Artem_Sokolov1;~Katja_Filippova1", "aff": "EPFL - EPF Lausanne;Google;Research, Google", "aff_domain": "epfl.ch;google.com;research.google.com", "position": "MS student;Research Scientist;Researcher", "bibtex": "@inproceedings{\nbejan2023make,\ntitle={Make Every Example Count: On the Stability and Utility of Self-Influence for Learning from Noisy {NLP} Datasets},\nauthor={Irina Bejan and Artem Sokolov and Katja Filippova},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=NnVIFpsMAy}\n}", "github": "", "project": "", "reviewers": "2t6t;SJBj;Bjvq", "site": "https://openreview.net/forum?id=NnVIFpsMAy", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;2", "excitement": "4;3;3", "reproducibility": "3;2;3", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;katja-filippova-93a2144", "aff_unique_index": "0;1;1", "aff_unique_norm": "EPFL;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.epfl.ch;https://www.google.com", "aff_unique_abbr": "EPFL;Google", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "Lausanne;Mountain View", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Switzerland;United States" }, { "id": "NomitcTG87", "title": "Transformer-based Live Update Generation for Soccer Matches from Microblog Posts", "track": "main", "status": "Short Main", "tldr": "", "abstract": "It has been known to be difficult to generate adequate sports updates from a sequence of vast amounts of diverse live tweets, although the live sports viewing experience with tweets is gaining the popularity. In this paper, we focus on soccer matches and work on building a system to generate live updates for soccer matches from tweets so that users can instantly grasp a match\u2019s progress and enjoy the excitement of the match from raw tweets. Our proposed system is based on a large pre-trained language model and incorporates a mechanism to control the number of updates and a mechanism to reduce the redundancy of duplicate and similar updates.", "keywords": "Timeline Summarization;Social Media", "primary_area": "", "supplementary_material": "", "author": "Masashi Oshika;Kosuke Yamada;Ryohei Sasano;Koichi Takeda", "authorids": "~Masashi_Oshika1;~Kosuke_Yamada2;~Ryohei_Sasano2;~Koichi_Takeda1", "gender": "M;M;M;", "homepage": "https://sites.google.com/view/masashi-oshika;https://sites.google.com/view/kosuke-yamada/en;http://cr.fvcrc.i.nagoya-u.ac.jp/~sasano/index-e.html;https://researchmap.jp/takedasu?lang=en", "dblp": ";;17/757;24/4299-3.html", "google_scholar": "https://scholar.google.co.jp/citations?user=hNuN0doAAAAJ;https://scholar.google.co.jp/citations?user=x6rA_7wAAAAJ;g9mNQ9MAAAAJ;IaZThNIAAAAJ", "or_profile": "~Masashi_Oshika1;~Kosuke_Yamada2;~Ryohei_Sasano2;~Koichi_Takeda1", "aff": "Nagoya University;Nagoya University;RIKEN;Nagoya University", "aff_domain": "nagoya-u.ac.jp;nagoya-u.ac.jp;riken.jp;ac.jp", "position": "Undergrad student;PhD student;Researcher;Professor", "bibtex": "@inproceedings{\noshika2023transformerbased,\ntitle={Transformer-based Live Update Generation for Soccer Matches from Microblog Posts},\nauthor={Masashi Oshika and Kosuke Yamada and Ryohei Sasano and Koichi Takeda},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=NomitcTG87}\n}", "github": "", "project": "", "reviewers": "23Lj;Yzg3;hkC3", "site": "https://openreview.net/forum?id=NomitcTG87", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;3;3", "reproducibility": "4;3;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 9, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Nagoya University;RIKEN", "aff_unique_dep": ";", "aff_unique_url": "https://www.nagoya-u.ac.jp;https://www.riken.jp", "aff_unique_abbr": "Nagoya U;RIKEN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Japan" }, { "id": "NphKIYvm9D", "title": "Investigating Multilingual Coreference Resolution by Universal Annotations", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Multilingual coreference resolution (MCR) has been a long-standing and challenging task. With the newly proposed multilingual coreference dataset, CorefUD (Nedoluzhko et al., 2022), we conduct an investigation into the task by using its harmonized universal morphosyntactic and coreference annotations. First, we study coreference by examining the ground truth data at different linguistic levels, namely mention, entity and document levels, and across different genres, to gain insights into the characteristics of coreference across multiple languages. Second, we perform an error analysis of the most challenging cases that the SotA system fails to resolve in the CRAC 2022 shared task using the universal annotations. Last, based on this analysis, we extract features from universal morphosyntactic annotations and integrate these features into a baseline system to assess their potential benefits for the MCR task. Our results show that our best configuration of features improves the baseline by 0.9% F1 score.", "keywords": "Multilingual coreference resolution;coreference analysis", "primary_area": "", "supplementary_material": "", "author": "Haixia Chai;Michael Strube", "authorids": "~Haixia_Chai1;~Michael_Strube1", "gender": "F;", "homepage": ";https://www.h-its.org/people/prof-dr-michael-strube/", "dblp": "245/3466;s/MichaelStrube1", "google_scholar": "https://scholar.google.de/citations?user=hVp7LHMAAAAJ;s0_rS0kAAAAJ", "or_profile": "~Haixia_Chai1;~Michael_Strube1", "aff": "Heidelberg Institute for Theoretical Studies;Heidelberg Institute for Theoretical Studies", "aff_domain": "h-its.org;h-its.org", "position": "PhD student;Principal Researcher", "bibtex": "@inproceedings{\nchai2023investigating,\ntitle={Investigating Multilingual Coreference Resolution by Universal Annotations},\nauthor={Haixia Chai and Michael Strube},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=NphKIYvm9D}\n}", "github": "", "project": "", "reviewers": "eyYv;5r1p;qVbW", "site": "https://openreview.net/forum?id=NphKIYvm9D", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;4;3", "reproducibility": "4;4;4", "correctness": "4;4;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Heidelberg Institute for Theoretical Studies", "aff_unique_dep": "", "aff_unique_url": "https://www.hits.org/", "aff_unique_abbr": "HITS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "NrCLVmq0KD", "title": "LLM aided semi-supervision for efficient Extractive Dialog Summarization", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Generating high-quality summaries for chat dialogs often requires large labeled datasets. We propose a method to efficiently use unlabeled data for extractive summarization of customer-agent dialogs. In our method, we frame summarization as a question-answering problem and use state-of-the-art large language models (LLMs) to generate pseudo-labels for a dialog. We then use these pseudo-labels to fine-tune a chat summarization model, effectively transferring knowledge from the large LLM into a smaller specialized model. \nWe demonstrate our method on the TWEETSUMM dataset, and show that using 10\\% of the original labelled data set we can achieve 65.9/57.0/61.0 ROUGE-1/-2/-L, whereas the current state-of-the-art trained on the entire training data set obtains 65.16/55.81/64.37 ROUGE-1/-2/-L. In other words, in the worst case (i.e., ROUGE-L) we still effectively retain 94.7\\% of the performance while using only 10\\% of the data.", "keywords": "Dialog Summarization;Large Language Models;Semi-supervised learning;Transformers;BART;GPT;Rouge", "primary_area": "", "supplementary_material": "", "author": "Nishant Mishra;Gaurav Sahu;Iacer Calixto;Ameen Abu-Hanna;Issam H. Laradji", "authorids": "~Nishant_Mishra1;~Gaurav_Sahu2;~Iacer_Calixto2;~Ameen_Abu-Hanna1;~Issam_H._Laradji1", "gender": "M;M;M;M;M", "homepage": "https://mnishant2.github.io/;https://demfier.github.io;https://iacercalixto.github.io;https://sites.google.com/view/homepage-aah/home;https://issamlaradji.github.io/", "dblp": ";227/2467;174/7199;31/59;142/0043", "google_scholar": "https://scholar.google.com/citations?hl=en;nMAt7UMAAAAJ;https://scholar.google.ca/citations?user=W0prRUMAAAAJ;https://scholar.google.nl/citations?user=lymnZacAAAAJ;https://scholar.google.ca/citations?user=8vRS7F0AAAAJ", "or_profile": "~Nishant_Mishra1;~Gaurav_Sahu2;~Iacer_Calixto2;~Ameen_Abu-Hanna1;~Issam_H._Laradji1", "aff": "Amsterdam Universitair Medische Centra;University of Waterloo;Amsterdam UMC, University of Amsterdam;Amsterdam UMC;ServiceNow", "aff_domain": "amc.nl;uwaterloo.ca;amsterdamumc.nl;amsterdamumc.nl;servicenow.com", "position": "PhD student;PhD student;Assistant Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nmishra2023llm,\ntitle={{LLM} aided semi-supervision for efficient Extractive Dialog Summarization},\nauthor={Nishant Mishra and Gaurav Sahu and Iacer Calixto and Ameen Abu-Hanna and Issam H. Laradji},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=NrCLVmq0KD}\n}", "github": "", "project": "", "reviewers": "P49m;2gVZ;Bx3Q", "site": "https://openreview.net/forum?id=NrCLVmq0KD", "pdf_size": 0, "rating": "2;2;2", "confidence": "5;4;4", "excitement": "4;2;2", "reproducibility": "3;4;4", "correctness": "4;4;4", "rating_avg": 2.0, "confidence_avg": 4.333333333333333, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 9, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-3725-3987;;0000-0001-6244-7906;0000-0003-4324-7954;", "linkedin": "mnishant2;;iacercalixto/;ameen-abu-hanna-5586171/;issam-laradji-67ba1a99/", "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Amsterdam Universitair Medische Centra;University of Waterloo;University of Amsterdam;Amsterdam University Medical Centers;ServiceNow", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.amsterdamumc.nl;https://uwaterloo.ca;https://www.uva.nl;https://www.amsterdamumc.nl/;https://www.servicenow.com", "aff_unique_abbr": "AMC;UW;UvA;AMC;ServiceNow", "aff_campus_unique_index": "1", "aff_campus_unique": ";Amsterdam", "aff_country_unique_index": "0;1;0;0;2", "aff_country_unique": "Netherlands;Canada;United States" }, { "id": "NrmYYAO7N4", "title": "Expand, Highlight, Generate: RL-driven Document Generation for Passage Reranking", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Generating synthetic training data based on large language models (LLMs) for ranking models has gained attention recently. Prior studies use LLMs to build pseudo query-document pairs by generating synthetic queries from documents in a corpus. In this paper, we propose a new perspective of data augmentation: generating synthetic documents from queries. To achieve this, we propose DocGen, that consists of a three-step pipeline that utilizes the few-shot capabilities of LLMs. DocGen pipeline performs synthetic document generation by (i) expanding, (ii) highlighting the original query, and then (iii) generating a synthetic document that is likely to be relevant to the query. To further improve the relevance between generated synthetic documents and their corresponding queries, we propose DocGen-RL, which regards the estimated relevance of the document as a reward and leverages reinforcement learning (RL) to optimize DocGen pipeline. Extensive experiments demonstrate that DocGen pipeline and DocGen-RL significantly outperform existing state-of-theart data augmentation methods, such as InPars, indicating that our new perspective of generating documents leverages the capacity of LLMs in generating synthetic data more effectively. We release the code, generated data, and model checkpoints to foster research in this area.", "keywords": "Synthetic document generation;Data augmentation;Information retrieval", "primary_area": "", "supplementary_material": "", "author": "Arian Askari;Mohammad Aliannejadi;Chuan Meng;Evangelos Kanoulas;Suzan Verberne", "authorids": "~Arian_Askari1;~Mohammad_Aliannejadi2;~Chuan_Meng1;~Evangelos_Kanoulas1;~Suzan_Verberne1", "gender": "M;M;;M;F", "homepage": "https://arian-askari.github.io/;https://aliannejadi.com;;https://staff.fnwi.uva.nl/e.kanoulas/;https://liacs.leidenuniv.nl/~verbernes/", "dblp": ";178/6008;;22/3088;86/5095", "google_scholar": "fp9QtoEAAAAJ;yiZk6coAAAAJ;;0HybxV4AAAAJ;https://scholar.google.nl/citations?user=-IHDKA0AAAAJ", "or_profile": "~Arian_Askari1;~Mohammad_Aliannejadi2;~Chuan_Meng1;~Evangelos_Kanoulas1;~Suzan_Verberne1", "aff": "Leiden University, Leiden University;University of Amsterdam;;University of Amsterdam;Universiteit Leiden", "aff_domain": "liacs.leidenuniv.nl;uva.nl;;uva.nl;universiteitleiden.nl", "position": "PhD student;Assistant Professor;;Full Professor;Associate Professor", "bibtex": "@inproceedings{\naskari2023expand,\ntitle={Expand, Highlight, Generate: {RL}-driven Document Generation for Passage Reranking},\nauthor={Arian Askari and Mohammad Aliannejadi and Chuan Meng and Evangelos Kanoulas and Suzan Verberne},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=NrmYYAO7N4}\n}", "github": "", "project": "", "reviewers": "qmXW;y3cg;yxMC;LWa7", "site": "https://openreview.net/forum?id=NrmYYAO7N4", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;3;4;3", "excitement": "4;3;3;4", "reproducibility": "4;4;4;4", "correctness": "4;4;4;4", "rating_avg": 4.0, "confidence_avg": 3.5, "excitement_avg": 3.5, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-9447-4172;;0000-0002-8312-0694;0000-0002-9609-9505", "linkedin": "arian-askari/;https://linkedin.com/in/maliannejadi/;;ekanou/;suzanverberne/", "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Leiden University;University of Amsterdam", "aff_unique_dep": ";", "aff_unique_url": "https://www.universiteitleiden.nl;https://www.uva.nl", "aff_unique_abbr": "LU;UvA", "aff_campus_unique_index": "0", "aff_campus_unique": "Leiden;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Netherlands" }, { "id": "NtHfJrjkiv", "title": "ReCEval: Evaluating Reasoning Chains via Correctness and Informativeness", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Multi-step reasoning ability is fundamental to many natural language tasks, yet it is unclear what constitutes a good reasoning chain and how to evaluate them. Most existing methods focus solely on whether the reasoning chain leads to the correct conclusion, but this answer-oriented view may confound reasoning quality with other spurious shortcuts to predict the answer. To bridge this gap, we evaluate reasoning chains by viewing them as informal proofs that derive the final answer. Specifically, we propose ReCEval (Reasoning Chain Evaluation), a framework that evaluates reasoning chains via two key properties: (1) correctness, i.e., each step makes a valid inference based on information contained within the step, preceding steps, and input context, and (2) informativeness, i.e., each step provides new information that is helpful towards deriving the generated answer. We evaluate these properties by developing metrics using natural language inference models and $\\mathcal{V}$-Information. On multiple datasets, we show that ReCEval effectively identifies various error types and yields notable improvements compared to prior methods. We analyze the impact of step boundaries, and previous steps on evaluating correctness and demonstrate that our informativeness metric captures the expected flow of information in high-quality reasoning chains. Finally, we show that scoring reasoning chains based on ReCEval improves downstream task performance.", "keywords": "reasoning;multi-step reasoning;evaluation;information-gain", "primary_area": "", "supplementary_material": "", "author": "Archiki Prasad;Swarnadeep Saha;Xiang Zhou;Mohit Bansal", "authorids": "~Archiki_Prasad1;~Swarnadeep_Saha2;~Xiang_Zhou3;~Mohit_Bansal2", "gender": "F;;M;M", "homepage": "https://archiki.github.io/;https://owenzx.github.io/;https://www.cs.unc.edu/~mbansal/;https://swarnahub.github.io/", "dblp": "264/2812;65/5138;32/5243.html;203/9296", "google_scholar": "Svcwv-IAAAAJ;Q9gfhNMAAAAJ;DN8QtscAAAAJ;sY5SyBgAAAAJ", "or_profile": "~Archiki_Prasad1;~Xiang_Zhou3;~Mohit_Bansal2;~Swarnadeep_Saha1", "aff": "University of North Carolina, Chapel Hill;University of North Carolina, Chapel Hill;University of North Carolina at Chapel Hill;Department of Computer Science, University of North Carolina, Chapel Hill", "aff_domain": "unc.edu;cs.unc.edu;unc.edu;cs.unc.edu", "position": "PhD student;PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nprasad2023receval,\ntitle={Re{CE}val: Evaluating Reasoning Chains via Correctness and Informativeness},\nauthor={Archiki Prasad and Swarnadeep Saha and Xiang Zhou and Mohit Bansal},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=NtHfJrjkiv}\n}", "github": "", "project": "", "reviewers": "qx1C;HVRX;Yqzx", "site": "https://openreview.net/forum?id=NtHfJrjkiv", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;4", "excitement": "4;3;4", "reproducibility": "4;3;4", "correctness": "4;4;5", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.333333333333333, "replies_avg": 9, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "archiki-prasad;;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of North Carolina", "aff_unique_dep": "", "aff_unique_url": "https://www.unc.edu", "aff_unique_abbr": "UNC", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Chapel Hill", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "NuMemgzPYT", "title": "LLM-in-the-loop: Leveraging Large Language Model for Thematic Analysis", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Thematic analysis (TA) has been widely used\nfor analyzing qualitative data in many disciplines and fields. To ensure reliable analysis,\nthe same piece of data is typically assigned to at least two human coders. Moreover, to produce\nmeaningful and useful analysis, human coders\ndevelop and deepen their data interpretation\nand coding over multiple iterations, making TA\nlabor-intensive and time-consuming. Recently\nthe emerging field of large language models\n(LLMs) research has shown that LLMs have\nthe potential replicate human-like behavior in\nvarious tasks: in particular, LLMs outperform\ncrowd workers on text-annotation tasks, suggesting an opportunity to leverage LLMs on\nTA. We propose a human\u2013LLM collaboration\nframework (i.e., LLM-in-the-loop) to conduct\nTA with in-context learning (ICL). This framework provides the prompt to frame discussions\nwith a LLM (e.g., GPT-3.5) to generate the final\ncodebook for TA. We demonstrate the utility\nof this framework using survey datasets on the\naspects of the music listening experience and\nthe usage of a password manager. Results of\nthe two case studies show that the proposed\nframework yields similar coding quality to that\nof human coders but reduces TA\u2019s labor and\ntime demands.", "keywords": "thematic analysis;NLP applications;qualitative research", "primary_area": "", "supplementary_material": "", "author": "Shih-Chieh Dai;Aiping Xiong;Lun-Wei Ku", "authorids": "~Shih-Chieh_Dai2;~Aiping_Xiong1;~Lun-Wei_Ku1", "gender": "M;;F", "homepage": "https://sjdai.github.io;;http://www.lunweiku.com/", "dblp": "179/8789;;82/2054", "google_scholar": "4ze3U6AAAAAJ;;SzcLXlkAAAAJ", "or_profile": "~Shih-Chieh_Dai2;~Aiping_Xiong1;~Lun-Wei_Ku1", "aff": "University of Texas at Austin;;Academia Sinica", "aff_domain": "utexas.edu;;sinica.edu.tw", "position": "MS student;;Researcher", "bibtex": "@inproceedings{\ndai2023llmintheloop,\ntitle={{LLM}-in-the-loop: Leveraging Large Language Model for Thematic Analysis},\nauthor={Shih-Chieh Dai and Aiping Xiong and Lun-Wei Ku},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=NuMemgzPYT}\n}", "github": "", "project": "", "reviewers": "WzFN;Mnmq;rocS;Gc6Y", "site": "https://openreview.net/forum?id=NuMemgzPYT", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;3;4;5", "excitement": "2;3;4;3", "reproducibility": "3;3;5;3", "correctness": "3;3;4;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.5, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-5439-3917;;0000-0003-2691-5404", "linkedin": "scdai;;lun-wei-ku/", "aff_unique_index": "0;1", "aff_unique_norm": "University of Texas at Austin;Academia Sinica", "aff_unique_dep": ";", "aff_unique_url": "https://www.utexas.edu;https://www.sinica.edu.tw", "aff_unique_abbr": "UT Austin;Academia Sinica", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Austin;Taiwan", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;China" }, { "id": "NwJVbDxfTd", "title": "Semantic Similarity Covariance Matrix Shrinkage", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "An accurate estimation of the covariance matrix is a critical component of many applications in finance, including portfolio optimization. The sample covariance suffers from the curse of dimensionality when the number of observations is in the same order or lower than the number of variables. This tends to be the case in portfolio optimization, where a portfolio manager can choose between thousands of stocks using historical daily returns to guide their investment decisions. To address this issue, past works proposed linear covariance shrinkage to regularize the estimated matrix. While effective, the proposed methods relied solely on historical price data and thus ignored company fundamental data. In this work, we propose to utilise semantic similarity derived from textual descriptions or knowledge graphs to improve the covariance estimation. Rather than using the semantic similarity directly as a biased estimator to the covariance, we employ it as a shrinkage target. The resulting covariance estimators leverage both semantic similarity and recent price history, and can be readily adapted to a broad range of financial securities. The effectiveness of the approach is demonstrated for a period including diverse market conditions and compared with the covariance shrinkage prior art.", "keywords": "semantics;embeddings;knowledge graphs;covariance;finance;portfolio optimization", "primary_area": "", "supplementary_material": "", "author": "Guillaume Becquin;Saher Esmeir", "authorids": "~Guillaume_Becquin1;~Saher_Esmeir1", "gender": ";", "homepage": ";", "dblp": ";64/4011", "google_scholar": ";pR7dZ1oAAAAJ", "or_profile": "~Guillaume_Becquin1;~Saher_Esmeir1", "aff": ";Bloomberg", "aff_domain": ";bloomberg.com", "position": ";Researcher", "bibtex": "@inproceedings{\nbecquin2023semantic,\ntitle={Semantic Similarity Covariance Matrix Shrinkage},\nauthor={Guillaume Becquin and Saher Esmeir},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=NwJVbDxfTd}\n}", "github": "", "project": "", "reviewers": "yKen;NdPC;EMEZ", "site": "https://openreview.net/forum?id=NwJVbDxfTd", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;2", "excitement": "4;3;3", "reproducibility": "3;4;3", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0", "aff_unique_norm": "Bloomberg", "aff_unique_dep": "", "aff_unique_url": "https://www.bloomberg.com", "aff_unique_abbr": "Bloomberg", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "Nx9D21g1lW", "title": "PivotFEC: Enhancing Few-shot Factual Error Correction with a Pivot Task Approach using Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Factual Error Correction (FEC) aims to rectify false claims by making minimal revisions to align them more accurately with supporting evidence. However, the lack of datasets containing false claims and their corresponding corrections has impeded progress in this field. Existing distantly supervised models typically employ the mask-then-correct paradigm, where a masker identifies problematic spans in false claims, followed by a corrector to predict the masked portions. Unfortunately, accurately identifying errors in claims is challenging, leading to issues like over-erasure and incorrect masking. To overcome these challenges, we present PivotFEC, a method that enhances few-shot FEC with a pivot task approach using large language models (LLMs). Specifically, we introduce a pivot task called factual error injection, which leverages LLMs (e.g., ChatGPT) to intentionally generate text containing factual errors under few-shot settings; then, the generated text with factual errors can be used to train the FEC corrector. Our experiments on a public dataset demonstrate the effectiveness of PivotFEC in two significant ways: Firstly, it improves the widely-adopted SARI metrics by 11.3 compared to the best-performing distantly supervised methods. Secondly, it outperforms its few-shot counterpart (i.e., LLMs are directly used to solve FEC) by 7.9 points in SARI, validating the efficacy of our proposed pivot task.", "keywords": "Factual Error Correction;Large Language Models;Few-shot", "primary_area": "", "supplementary_material": "", "author": "Xingwei He;A-Long Jin;Jun Ma;Yuan Yuan;Siu Ming Yiu", "authorids": "~Xingwei_He1;~A-Long_Jin1;~Jun_Ma11;~Yuan_Yuan19;~Siu_Ming_Yiu1", "gender": "M;M;M;M;M", "homepage": "https://scholar.google.com/citations?user=p1a5WXIAAAAJ&hl=zh-CN;;https://www.arch.hku.hk/staff/upad/ma-jun/;https://yyxhdy.github.io;https://www.cs.hku.hk/index.php/people/academic-staff/smyiu", "dblp": "18/8988-3;;;;y/SiuMingYiu.html", "google_scholar": "p1a5WXIAAAAJ;YpMInDMAAAAJ;;ygShHQUAAAAJ;QFNVqjEAAAAJ", "or_profile": "~Xingwei_He1;~A-Long_Jin1;~Jun_Ma11;~Yuan_Yuan19;~Siu_Ming_Yiu1", "aff": "The University of Hong Kong;The University of Hong Kong;University of Hong Kong;Beihang University;University of Hong Kong", "aff_domain": "hku.hk;hku.hk;hku.hk;buaa.edu.cn;hku.hk", "position": "Postdoc;PhD student;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nhe2023pivotfec,\ntitle={Pivot{FEC}: Enhancing Few-shot Factual Error Correction with a Pivot Task Approach using Large Language Models},\nauthor={Xingwei He and A-Long Jin and Jun Ma and Yuan Yuan and Siu Ming Yiu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Nx9D21g1lW}\n}", "github": "", "project": "", "reviewers": "8rV1;PYxa;Ejt3", "site": "https://openreview.net/forum?id=Nx9D21g1lW", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;3;3", "excitement": "2;3;4", "reproducibility": "4;4;4", "correctness": "3;4;3", "rating_avg": 2.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-3975-8500", "linkedin": ";;;;", "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of Hong Kong;Beihang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.hku.hk;http://www.buaa.edu.cn/", "aff_unique_abbr": "HKU;BUAA", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "NxOeOxe6qs", "title": "Variator: Accelerating Pre-trained Models with Plug-and-Play Compression Modules", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models (LLMs) have achieved remarkable results on NLP tasks but at the expense of huge parameter sizes and the consequent computational costs. In this paper, we propose Variator, a parameter-efficient acceleration method that enhances computational efficiency through plug-and-play compression plugins. Compression plugins are designed to reduce the sequence length via compressing multiple hidden vectors into one and trained with original LLMs frozen. Different from traditional model acceleration methods, which compress LLMs to smaller sizes, Variator offers two distinct advantages: (1) In real-world applications, the plug-and-play nature of our compression plugins enables dynamic selection of different compression plugins with varying acceleration ratios based on the current workload. (2) The compression plugin comprises a few compact neural network layers with minimal parameters, significantly saving storage and memory overhead, particularly in scenarios with a growing number of tasks. We validate the effectiveness of Variator on seven datasets. Experimental results show that Variator can save 53\\% computational costs using only 0.9\\% additional parameters with a performance drop of less than 2\\%. Moreover, when the model scales to billions of parameters, Variator matches the strong performance of uncompressed LLMs. Our code and checkpoints will be released to facilitate future work.", "keywords": "Large language model;model compression;plugins", "primary_area": "", "supplementary_material": "", "author": "Chaojun Xiao;Yuqi Luo;Wenbin Zhang;Pengle Zhang;Xu Han;Yankai Lin;Zhengyan Zhang;Ruobing Xie;Zhiyuan Liu;Maosong Sun;Jie Zhou", "authorids": "~Chaojun_Xiao1;~Yuqi_Luo1;~Wenbin_Zhang3;~Pengle_Zhang1;~Xu_Han2;~Yankai_Lin1;~Zhengyan_Zhang1;~Ruobing_Xie2;~Zhiyuan_Liu1;~Maosong_Sun1;~Jie_Zhou8", "gender": "M;M;;;;M;M;M;M;M;M", "homepage": "https://xcjthu.github.io/;https://github.com/demerzel-iv/;;;;https://linyankai.github.io/;;http://nlp.csai.tsinghua.edu.cn/~xrb/;http://nlp.csai.tsinghua.edu.cn/~lzy;https://www.cs.tsinghua.edu.cn/csen/info/1312/4394.htm;", "dblp": "223/4856;;;;;161/0001.html;;178/8590;53/3245-1;95/3291-1;00/5012-16", "google_scholar": "xoC8smYAAAAJ;;;;;https://scholar.google.com.hk/citations?user=j8K1FqEAAAAJ;;j3OX8KUAAAAJ;dT0v5u0AAAAJ;https://scholar.google.com.tw/citations?user=zIgT0HMAAAAJ;https://scholar.google.com.hk/citations?user=OijxQCMAAAAJ", "or_profile": "~Chaojun_Xiao1;~Yuqi_Luo1;~Wenbin_Zhang3;~Pengle_Zhang1;~Xu_Han2;~Yankai_Lin1;~Zhengyan_Zhang1;~Ruobing_Xie2;~Zhiyuan_Liu1;~Maosong_Sun1;~Jie_Zhou8", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;;;Renmin University of China;Tsinghua University;Tencent;Tsinghua University;Tsinghua University;WeChat AI, Tencent Inc.", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;mail.tsinghua.edu.cn;;;ruc.edu.cn;tsinghua.edu.cn;tencent.com;tsinghua.edu.cn;tsinghua.edu.cn;tencent.com", "position": "PhD student;Undergrad student;Research Assistant;;;Assistant Professor;PhD student;Senior researcher;Associate Professor;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nxiao2023variator,\ntitle={Variator: Accelerating Pre-trained Models with Plug-and-Play Compression Modules},\nauthor={Chaojun Xiao and Yuqi Luo and Wenbin Zhang and Pengle Zhang and Xu Han and Yankai Lin and Zhengyan Zhang and Ruobing Xie and Zhiyuan Liu and Maosong Sun and Jie Zhou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=NxOeOxe6qs}\n}", "github": "", "project": "", "reviewers": "3yEr;prtK;rvsY", "site": "https://openreview.net/forum?id=NxOeOxe6qs", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "2;3;3", "reproducibility": "3;4;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 11, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0002-9182-8158;;0000-0003-3170-5647;0000-0002-7709-2543;;0000-0002-5899-5165", "linkedin": ";;https://www.linkedin.cn/incareer/in/ACoAADOMbHEBBHe39PK2eK0gTam8cPUxm6TyYXk;;;;;;;;", "aff_unique_index": "0;0;0;1;0;2;0;0;2", "aff_unique_norm": "Tsinghua University;Renmin University of China;Tencent", "aff_unique_dep": ";;Tencent Holdings Limited", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.ruc.edu.cn;https://www.tencent.com", "aff_unique_abbr": "THU;RUC;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "O1IEUXd4SI", "title": "Do Stochastic Parrots have Feelings Too? Improving Neural Detection of Synthetic Text via Emotion Recognition", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recent developments in generative AI have shone a spotlight on high-performance synthetic text generation technologies. The now wide availability and ease of use of such models highlights the urgent need to provide equally powerful technologies capable of identifying synthetic text. With this in mind, we draw inspiration from psychological studies which suggest that people can be driven by emotion and encode emotion in the text they compose. We hypothesize that pretrained language models (PLMs) have an affective deficit because they lack such an emotional driver when generating text and consequently may generate synthetic text which has affective incoherence i.e. lacking the kind of emotional coherence present in human-authored text. We subsequently develop an emotionally aware detector by fine-tuning a PLM on emotion. Experiment results indicate that our emotionally-aware detector achieves improvements across a range of synthetic text generators, various sized models, datasets, and domains. Finally, we compare our emotionally-aware synthetic text detector to ChatGPT in the task of identification of its own output and show substantial gains, reinforcing the potential of emotion as a signal to identify synthetic text. Code, models, and datasets are available at https: //github.com/alanagiasi/emoPLMsynth", "keywords": "synthetic text detection;neural text detection;emotion;affective deficit", "primary_area": "", "supplementary_material": "", "author": "Alan Cowap;Yvette Graham;Jennifer Foster", "authorids": "~Alan_Cowap1;~Yvette_Graham1;~Jennifer_Foster2", "gender": ";F;F", "homepage": ";;https://www.computing.dcu.ie/~jfoster", "dblp": "298/1488;05/8150;14/3001", "google_scholar": "LSAuD90AAAAJ;;SC2xBNwAAAAJ", "or_profile": "~Alan_Cowap1;~Yvette_Graham1;~Jennifer_Foster2", "aff": "Dublin City University;;Dublin City University", "aff_domain": "dcu.ie;;dcu.ie", "position": "PhD student;;Lecturer", "bibtex": "@inproceedings{\ncowap2023do,\ntitle={Do Stochastic Parrots have Feelings Too? Improving Neural Detection of Synthetic Text via Emotion Recognition},\nauthor={Alan Cowap and Yvette Graham and Jennifer Foster},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=O1IEUXd4SI}\n}", "github": "", "project": "", "reviewers": "DyWC;TNw9;8S6u", "site": "https://openreview.net/forum?id=O1IEUXd4SI", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;4", "excitement": "3;3;3", "reproducibility": "4;3;4", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-6300-6034;;", "linkedin": ";;", "aff_unique_index": "0;0", "aff_unique_norm": "Dublin City University", "aff_unique_dep": "", "aff_unique_url": "https://www.dcu.ie", "aff_unique_abbr": "DCU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Ireland" }, { "id": "O36QcmUEDM", "title": "JWSign: A Highly Multilingual Corpus of Bible Translations for more Diversity in Sign Language Processing", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Advancements in sign language processing have been hindered by a lack of sufficient data, impeding progress in recognition, translation, and production tasks. The absence of comprehensive sign language datasets across the world's sign languages has widened the gap in this field, resulting in a few sign languages being studied more than others, making this research area extremely skewed mostly towards sign languages from high-income countries. In this work we introduce a new large and highly multilingual dataset for sign language translation: JWSign. The dataset consists of 2,530 hours of Bible translations in 98 sign languages, featuring more than 1,500 individual signers. On this dataset, we report neural machine translation experiments. Apart from bilingual baseline systems, we also train multilingual systems, including some that take into account the typological relatedness of signed or spoken languages. Our experiments highlight that multilingual systems are superior to bilingual baselines, and that in higher-resource scenarios, clustering language pairs that are related improves translation quality.", "keywords": "Machine Translation;Sign Languages;Dataset;Multilinguality", "primary_area": "", "supplementary_material": "", "author": "Shester Gueuwou;Sophie Siake;Colin Leong;Mathias M\u00fcller", "authorids": "~Shester_Gueuwou2;~Sophie_Siake1;~Colin_Leong1;~Mathias_M\u00fcller1", "gender": ";;;M", "homepage": ";;https://cdleong.github.io/;https://www.cl.uzh.ch/de/people/team/compling/mmueller.html", "dblp": ";;;07/9808-2", "google_scholar": ";;32mhzoMAAAAJ;kcpNn2EAAAAJ", "or_profile": "~Shester_Gueuwou2;~Sophie_Siake1;~Colin_Leong1;~Mathias_M\u00fcller1", "aff": ";;University of Dayton;University of Zurich", "aff_domain": ";;udayton.edu;cl.uzh.ch", "position": ";;PhD student;Postdoc", "bibtex": "@inproceedings{\ngueuwou2023jwsign,\ntitle={{JWS}ign: A Highly Multilingual Corpus of Bible Translations for more Diversity in Sign Language Processing},\nauthor={Shester Gueuwou and Sophie Siake and Colin Leong and Mathias M{\\\"u}ller},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=O36QcmUEDM}\n}", "github": "", "project": "", "reviewers": "Hyq8;Ty6U;x6To", "site": "https://openreview.net/forum?id=O36QcmUEDM", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;3;2", "excitement": "3;3;3", "reproducibility": "3;3;3", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 2.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0009-0004-8164-4402;0000-0002-8248-199X", "linkedin": ";;colin-l-52b4536a;", "aff_unique_index": "0;1", "aff_unique_norm": "University of Dayton;University of Zurich", "aff_unique_dep": ";", "aff_unique_url": "https://www.udayton.edu;https://www.unizh.ch", "aff_unique_abbr": "UD;UZH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Switzerland" }, { "id": "O4gELC78Bq", "title": "Towards Detecting Contextual Real-Time Toxicity for In-Game Chat", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Real-time toxicity detection in online environments poses a significant challenge, due to the increasing prevalence of social media and gaming platforms. We introduce ToxBuster, a simple and scalable model that reliably detects toxic content in real-time for a line of chat by including chat history and metadata. ToxBuster consistently outperforms conventional toxicity models across popular multiplayer games, including Rainbow Six Siege, For Honor, and DOTA 2. We conduct an ablation study to assess the importance of each model component and explore ToxBuster's transferability across the datasets. Furthermore, we showcase ToxBuster's efficacy in post-game moderation, successfully flagging 82.1% of chat-reported players at a precision level of 90.0%. Additionally, we show how an additional 6\\% of unreported toxic players can be proactively moderated.", "keywords": "Real-Time Toxicity Detection;Game Chat Toxicity;Game Chat Moderation", "primary_area": "", "supplementary_material": "", "author": "Zachary Yang;Nicolas Grenon-Godbout;Reihaneh Rabbany", "authorids": "~Zachary_Yang1;~Nicolas_Grenon-Godbout1;~Reihaneh_Rabbany1", "gender": "M;M;F", "homepage": "https://rstzzz.github.io/;https://www.linkedin.com/in/nicolas-grenon-godbout/;http://www.reirab.com/", "dblp": "312/2814;;94/9024", "google_scholar": "ljOxbowAAAAJ;;https://scholar.google.ca/citations?user=Foh_c-QAAAAJ", "or_profile": "~Zachary_Yang1;~Nicolas_Grenon-Godbout1;~Reihaneh_Rabbany1", "aff": "McGill University;;Montreal Institute for Learning Algorithms, University of Montreal, University of Montreal", "aff_domain": "cs.mcgill.ca;;mila.umontreal.ca", "position": "PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nyang2023towards,\ntitle={Towards Detecting Contextual Real-Time Toxicity for In-Game Chat},\nauthor={Zachary Yang and Nicolas Grenon-Godbout and Reihaneh Rabbany},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=O4gELC78Bq}\n}", "github": "", "project": "", "reviewers": "yvYH;BWUQ;KywN", "site": "https://openreview.net/forum?id=O4gELC78Bq", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;3;5", "excitement": "4;3;3", "reproducibility": "4;3;3", "correctness": "4;3;3", "rating_avg": 2.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0009-6765-2293;;", "linkedin": "zachary-y-647209103/;nicolas-grenon-godbout/;", "aff_unique_index": "0;1", "aff_unique_norm": "McGill University;University of Montreal", "aff_unique_dep": ";Montreal Institute for Learning Algorithms", "aff_unique_url": "https://www.mcgill.ca;https://www.umontreal.ca", "aff_unique_abbr": "McGill;UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "id": "O4kDO3yS9B", "title": "Rethinking the Evaluation for Conversational Recommendation in the Era of Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The recent success of large language models (LLMs) has shown great potential to develop more powerful conversational recommender systems (CRSs), which rely on natural language conversations to satisfy user needs. In this paper, we embark on an investigation into the utilization of ChatGPT for CRSs, revealing the inadequacy of the existing evaluation protocol. It might overemphasize the matching with ground-truth items annotated by humans while neglecting the interactive nature of CRSs.\n\nTo overcome the limitation, we further propose an **i**nteractive **Eva**luation approach based on **L**L**M**s, named **iEvaLM**, which harnesses LLM-based user simulators. Our evaluation approach can simulate various system-user interaction scenarios. Through the experiments on two public CRS datasets, we demonstrate notable improvements compared to the prevailing evaluation protocol. Furthermore, we emphasize the evaluation of explainability, and ChatGPT showcases persuasive explanation generation for its recommendations. Our study contributes to a deeper comprehension of the untapped potential of LLMs for CRSs and provides a more flexible and realistic evaluation approach for future research about LLM-based CRSs.", "keywords": "Conversational Recommendation;Large Language Model;Evaluation", "primary_area": "", "supplementary_material": "", "author": "Xiaolei Wang;Xinyu Tang;Xin Zhao;Jingyuan Wang;Ji-Rong Wen", "authorids": "~Xiaolei_Wang1;~Xinyu_Tang2;~Xin_Zhao10;~Jingyuan_Wang2;~Ji-Rong_Wen1", "gender": "M;M;M;M;M", "homepage": "https://scholar.google.com/citations?hl=zh-CN&user=aYpLzewAAAAJ;;https://gsai.ruc.edu.cn/addons/teacher/index/info.html?user_id=5&ruccode=20140041&ln=cn;https://www.bigscity.com;https://gsai.ruc.edu.cn/english/jrwen", "dblp": "09/766-5;65/5518-4;https://dblp.uni-trier.de/pid/52/8700.html;75/5135-1.html;w/JRWen", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;JNhNacoAAAAJ;qsLImx8AAAAJ;tbxCHJgAAAAJ", "or_profile": "~Xiaolei_Wang1;~Xinyu_Tang2;~Xin_Zhao10;~Jingyuan_Wang2;~Ji-Rong_Wen1", "aff": "Renmin University of China;Beijing Normal University;Renmin University of China;Beihang University;Renmin University of China", "aff_domain": "ruc.edu.cn;bnu.edu.cn;ruc.edu.cn;buaa.edu.cn;ruc.edu.cn", "position": "PhD student;Undergrad student;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwang2023rethinking,\ntitle={Rethinking the Evaluation for Conversational Recommendation in the Era of Large Language Models},\nauthor={Xiaolei Wang and Xinyu Tang and Xin Zhao and Jingyuan Wang and Ji-Rong Wen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=O4kDO3yS9B}\n}", "github": "", "project": "", "reviewers": "ANzj;uFwV;P19e;UZ7K", "site": "https://openreview.net/forum?id=O4kDO3yS9B", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;4;2;3", "excitement": "4;3;3;3", "reproducibility": "4;4;5;2", "correctness": "4;3;3;2", "rating_avg": 4.0, "confidence_avg": 3.25, "excitement_avg": 3.25, "reproducibility_avg": 3.75, "correctness_avg": 3.0, "replies_avg": 12, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-3685-3606;0000-0002-1882-5284;0000-0002-8333-6196;0000-0003-0651-1592;0000-0002-9777-9676", "linkedin": ";;;;", "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Renmin University of China;Beijing Normal University;Beihang University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ruc.edu.cn;https://www.bnu.edu.cn;http://www.buaa.edu.cn/", "aff_unique_abbr": "RUC;BNU;BUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "O7eKiJpePJ", "title": "Instruct and Extract: Instruction Tuning for On-Demand Information Extraction", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models with instruction-following capabilities open the door to a wider group of users. However, when it comes to information extraction \u2013 a classic task in natural language processing \u2013 most task-specific systems cannot align well with long-tail ad hoc extraction use cases for non-expert users. To address this, we propose a novel paradigm, termed On-Demand Information Extraction, to fulfill the personalized demands of real-world users. Our task aims to follow the instructions to extract the desired content from the associated text and present it in a structured tabular format. The table headers can either be user-specified or inferred contextually by the model. To facilitate research in this emerging area, we present a benchmark named InstructIE, inclusive of both automatically generated training data, as well as the human-annotated test set. Building on InstructIE, we further develop an On-Demand Information Extractor, ODIE. Comprehensive evaluations on our benchmark reveal that ODIE substantially outperforms the existing open-source models of similar size.", "keywords": "information extraction;instruction-tuning;language model;open scenario", "primary_area": "", "supplementary_material": "", "author": "Yizhu Jiao;Ming Zhong;Sha Li;Ruining Zhao;Siru Ouyang;Heng Ji;Jiawei Han", "authorids": "~Yizhu_Jiao1;~Ming_Zhong2;~Sha_Li1;~Ruining_Zhao1;~Siru_Ouyang1;~Heng_Ji3;~Jiawei_Han1", "gender": "F;M;F;F;F;F;M", "homepage": "https://yzjiao.github.io/;https://maszhongming.github.io/;;https://ruining0916.github.io/;https://ozyyshr.github.io;http://blender.cs.illinois.edu/hengji.html;http://hanj.cs.illinois.edu/", "dblp": "https://dblp.uni-trier.de/pid/250/9757;;;;https://dblp.org/search/pid/api?q=author:Siru_Ouyang:;;h/JiaweiHan.html", "google_scholar": "sHgBvMgAAAAJ;mnifqeUAAAAJ;OIo8J2YAAAAJ;;fetoihAAAAAJ;z7GCqT4AAAAJ;https://scholar.google.com.tw/citations?user=Kv9AbjMAAAAJ", "or_profile": "~Yizhu_Jiao1;~Ming_Zhong2;~Sha_Li1;~Ruining_Zhao1;~Siru_Ouyang1;~Heng_Ji3;~Jiawei_Han1", "aff": "UIUC;University of Illinois Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois Urbana-Champaign;University of Illinois Urbana-Champaign Champaign;University of Illinois, Urbana-Champaign;University of Illinois at Urbana-Champaign (UIUC)", "aff_domain": "illinois.edu;illinois.edu;illinois.edu;cs.illinois.edu;illinois.edu;uiuc.edu;illinois.edu", "position": "PhD student;PhD student;PhD student;Undergrad student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\njiao2023instruct,\ntitle={Instruct and Extract: Instruction Tuning for On-Demand Information Extraction},\nauthor={Yizhu Jiao and Ming Zhong and Sha Li and Ruining Zhao and Siru Ouyang and Heng Ji and Jiawei Han},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=O7eKiJpePJ}\n}", "github": "", "project": "", "reviewers": "i6Bg;Z3zM;REDP", "site": "https://openreview.net/forum?id=O7eKiJpePJ", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;5", "excitement": "3;4;3", "reproducibility": "3;3;4", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0009-0001-1331-424X;;0000-0002-3629-2696", "linkedin": ";;;;;;", "aff_unique_index": "0;0;0;0;0;1;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of Illinois", "aff_unique_dep": ";", "aff_unique_url": "https://www illinois.edu;https://illinois.edu", "aff_unique_abbr": "UIUC;UIUC", "aff_campus_unique_index": "0;0;0;0;1;0;0", "aff_campus_unique": "Urbana-Champaign;Champaign", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "O9zrG7NB3X", "title": "Learn Your Tokens: Word-Pooled Tokenization for Language Modeling", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Language models typically tokenize text into subwords, using a deterministic, hand-engineered heuristic of combining characters into longer surface-level strings such as 'ing' or whole words. \nRecent literature has repeatedly shown the limitations of such a tokenization strategy, particularly for documents not written in English and for representing numbers.\nOn the other extreme, byte/character-level language models are much less restricted but suffer from increased sequence description lengths and a subsequent quadratic expansion in self-attention computation.\nRecent attempts to compress and limit these context lengths with fixed size convolutions is helpful but completely ignores the word boundary.\nThis paper considers an alternative 'learn your tokens' scheme which utilizes the word boundary to pool bytes/characters into word representations, which are fed to the primary language model, before again decoding individual characters/bytes per word in parallel.\nWe find that our moderately expressive and moderately fast end-to-end tokenizer outperform by over `300%` both subwords and byte/character models over the intrinsic language modeling metric of next-word prediction across datasets. It particularly outshines on rare words, outperforming by a factor of 30! We extensively study the language modeling setup for all three categories of tokenizers and theoretically analyze how our end-to-end models can also be a strong trade-off in efficiency and robustness.", "keywords": "Tokenization;Language Modeling;BPE;Subword;Segmentation;Numeracy;Multilingual", "primary_area": "", "supplementary_material": "", "author": "Avijit Thawani;Saurabh Ghanekar;Xiaoyuan Zhu;Jay Pujara", "authorids": "~Avijit_Thawani1;~Saurabh_Ghanekar1;~Xiaoyuan_Zhu2;~Jay_Pujara1", "gender": "M;;;", "homepage": "http://avi-jit.github.io;;;https://www.jaypujara.org", "dblp": "208/4386;;;65/10103", "google_scholar": "8KleQAgAAAAJ;;;yvdSr4AAAAAJ", "or_profile": "~Avijit_Thawani1;~Saurabh_Ghanekar1;~Xiaoyuan_Zhu2;~Jay_Pujara1", "aff": "USC/ISI;;University of Southern California;University of Southern California", "aff_domain": "isi.edu;;usc.edu;usc.edu", "position": "PhD student;;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nthawani2023learn,\ntitle={Learn Your Tokens: Word-Pooled Tokenization for Language Modeling},\nauthor={Avijit Thawani and Saurabh Ghanekar and Xiaoyuan Zhu and Jay Pujara},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=O9zrG7NB3X}\n}", "github": "", "project": "", "reviewers": "nfbm;kEqs;Exn2", "site": "https://openreview.net/forum?id=O9zrG7NB3X", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "4;4;3", "reproducibility": "4;5;3", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-4289-3607;;;0000-0001-6921-1744", "linkedin": "avijitthawani/;;xiaoyuan-zhu-38005a224/;pujara", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "", "aff_unique_url": "https://isi.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "0;1;1", "aff_campus_unique": "ISI;Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "OC4OLQGtIR", "title": "Reducing Sequence Length by Predicting Edit Spans with Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large Language Models (LLMs) have demonstrated remarkable performance in various tasks and gained significant attention.\nLLMs are also used for local sequence transduction tasks, including grammatical error correction (GEC) and formality style transfer, where most tokens in a source text are kept unchanged.\nHowever, the models that generate all target tokens in such tasks have a tendency to simply copy the input text as is, without making needed changes, because the difference between input and output texts is minimal in the training data.\nThis is also inefficient because the computational cost grows quadratically with the target sequence length with Transformer.\nThis paper proposes predicting edit spans for the source text for local sequence transduction tasks.\nRepresenting an edit span with a position of the source text and corrected tokens, we can reduce the length of the target sequence and the computational cost for inference.\nWe apply instruction tuning for LLMs on the supervision data of edit spans.\nExperiments show that the proposed method achieves comparable performance to the baseline in four tasks, paraphrasing, formality style transfer, GEC, and text simplification, despite reducing the length of the target text by as small as 21%.\nFurthermore, we report that the task-specific fine-tuning with the proposed method achieved state-of-the-art performance in the four tasks.", "keywords": "Large language model;efficiency;instruction tuning;Local sequence transduction task", "primary_area": "", "supplementary_material": "", "author": "Masahiro Kaneko;Naoaki Okazaki", "authorids": "~Masahiro_Kaneko2;~Naoaki_Okazaki2", "gender": "M;M", "homepage": "https://sites.google.com/view/masahirokaneko;http://www.chokkan.org/", "dblp": "63/4936;49/4018", "google_scholar": "https://scholar.google.co.jp/citations?user=c-CLPD0AAAAJ;", "or_profile": "~Masahiro_Kaneko2;~Naoaki_Okazaki2", "aff": "Tokyo Institute of Technology;Tokyo Institute of Technology", "aff_domain": "titech.ac.jp;titech.ac.jp", "position": "Postdoc;Full Professor", "bibtex": "@inproceedings{\nkaneko2023reducing,\ntitle={Reducing Sequence Length by Predicting Edit Spans with Large Language Models},\nauthor={Masahiro Kaneko and Naoaki Okazaki},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=OC4OLQGtIR}\n}", "github": "", "project": "", "reviewers": "JGA8;R7G8;xjk9", "site": "https://openreview.net/forum?id=OC4OLQGtIR", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;3;4", "excitement": "3;4;3", "reproducibility": "4;3;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-5117-5447;", "linkedin": "masahiro-kaneko-06433914a/;", "aff_unique_index": "0;0", "aff_unique_norm": "Tokyo Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.titech.ac.jp", "aff_unique_abbr": "Titech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "id": "ODeHH5FBwx", "title": "M2C: Towards Automatic Multimodal Manga Complement", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Multimodal manga analysis focuses on enhancing manga understanding with visual and textual features, which has attracted considerable attention from both natural language processing and computer vision communities. Currently, most comics are hand-drawn and prone to problems such as missing pages, text contamination, and text aging, resulting in missing comic text content and seriously hindering human comprehension. In other words, the Multimodal Manga Complement (\\textbf{M2C}) task has not been investigated, which aims to handle the aforementioned issues by providing a shared semantic space for vision and language understanding. To this end,\nwe first propose the Multimodal Manga Complement task by establishing a new M2C benchmark dataset covering two languages. First, we design a manga argumentation method called MCoT to mine event knowledge in comics with large language models. Then, an effective baseline FVP-M$^{2}$ using fine-grained visual prompts is proposed to support manga complement. Extensive experimental results show the effectiveness of FVP-M$^{2}$ method for Multimodal Mange Complement.", "keywords": "Manga data;Vision and Language;Chain of Thought Prompting", "primary_area": "", "supplementary_material": "", "author": "Hongcheng Guo;Boyang Wang;Jiaqi Bai;Jiaheng Liu;Jian Yang;Zhoujun Li", "authorids": "~Hongcheng_Guo1;~Boyang_Wang3;~Jiaqi_Bai1;~Jiaheng_Liu1;~Jian_Yang10;~Zhoujun_Li1", "gender": "M;;M;M;M;M", "homepage": ";;;https://liujiaheng.github.io/;https://www.researchgate.net/scientific-contributions/Jian-Yang-2146089927;", "dblp": "84/8542;;;225/1962;181/2854-30;76/2866-1", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com/citations?hl=zh-CN;yFI_RjUAAAAJ;i9opWEgAAAAJ;", "or_profile": "~Hongcheng_Guo1;~Boyang_Wang3;~Jiaqi_Bai1;~Jiaheng_Liu1;~Jian_Yang10;~Zhoujun_Li1", "aff": "Beihang University;;Beihang University;Beihang University;Beihang University;Beihang University", "aff_domain": "buaa.edu.cn;;buaa.edu.cn;buaa.edu.cn;buaa.edu;buaa.edu.cn", "position": "PhD student;;PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nguo2023mc,\ntitle={M2C: Towards Automatic Multimodal Manga Complement},\nauthor={Hongcheng Guo and Boyang Wang and Jiaqi Bai and Jiaheng Liu and Jian Yang and Zhoujun Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ODeHH5FBwx}\n}", "github": "", "project": "", "reviewers": "YEKH;4xZw;naWw", "site": "https://openreview.net/forum?id=ODeHH5FBwx", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;4;3", "reproducibility": "4;3;3", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0003-1983-012X;", "linkedin": ";;;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Beihang University", "aff_unique_dep": "", "aff_unique_url": "http://www.buaa.edu.cn/", "aff_unique_abbr": "BUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "OETPPc15XG", "title": "Whispering LLaMA: A Cross-Modal Generative Error Correction Framework for Speech Recognition", "track": "main", "status": "Short Main", "tldr": "", "abstract": "We introduce a new cross-modal fusion technique designed for generative error correction in automatic speech recognition (ASR). Our methodology leverages both acoustic information and external linguistic representations to generate accurate speech transcription contexts. This marks a step towards a fresh paradigm in generative error correction within the realm of n-best hypotheses. Unlike the existing ranking-based rescoring methods, our approach adeptly uses distinct initialization techniques and parameter-efficient algorithms to boost ASR performance derived from pre-trained speech and text models. Through evaluation across diverse ASR datasets, we assess our fusion technique, demonstrating a 37.66\\% improvement in word error rate (WER) relative performance compared to the n-best Oracle. To encourage future research, we have made our code and pre-trained models open source at [https://github.com/Srijith-rkr/Whispering-LLaMA](https://github.com/Srijith-rkr/Whispering-LLaMA)", "keywords": "Multimodal Learning;Parameter-Efficient Adaptation;Speech Recognition;Generative Error Correction", "primary_area": "", "supplementary_material": "", "author": "Srijith Radhakrishnan;Chao-Han Huck Yang;Sumeer Ahmad Khan;Rohit Kumar;Narsis A. Kiani;David Gomez-Cabrero;Jesper Tegn\u00e9r", "authorids": "~Srijith_Radhakrishnan1;~Chao-Han_Huck_Yang1;~Sumeer_Ahmad_Khan1;~Rohit_Kumar3;~Narsis_A._Kiani1;~David_Gomez-Cabrero1;~Jesper_Tegn\u00e9r1", "gender": "M;M;;M;F;M;M", "homepage": ";https://huckiyang.github.io/;;;https://www.algorithmicdynamics.net/;https://faculty.kaust.edu.sa/en/persons/david-gomez-cabrero;https://www.kaust.edu.sa/en/study/faculty/jesper-tegner", "dblp": ";230/4012;;;;;", "google_scholar": "Rc07xAwAAAAJ;TT3XJW8AAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.se/citations?user=odJXANUAAAAJ;;https://scholar.google.com.tw/citations?user=_DUppAgAAAAJ", "or_profile": "~Srijith_Radhakrishnan1;~Chao-Han_Huck_Yang1;~Sumeer_Ahmad_Khan1;~Rohit_Kumar3;~Narsis_A._Kiani1;~David_Gomez-Cabrero1;~Jesper_Tegn\u00e9r1", "aff": "Manipal Institute of Technology;Amazon AGI;King Abdullah University of Science and Technology;King Abdullah University of Science and Technology;Karolinska Institute Stockholm;King Abdullah University of Science and Technology;King Abdullah University of Science and Technology", "aff_domain": "manipal.edu;amazon.com;kaust.edu.sa;kaust.edu.sa;ki.se;kaust.edu.sa;kaust.edu.sa", "position": "Undergrad student;Researcher;Postdoc;Postdoc;lab leader;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nradhakrishnan2023whispering,\ntitle={Whispering {LL}a{MA}: A Cross-Modal Generative Error Correction Framework for Speech Recognition},\nauthor={Srijith Radhakrishnan and Chao-Han Huck Yang and Sumeer Ahmad Khan and Rohit Kumar and Narsis A. Kiani and David Gomez-Cabrero and Jesper Tegn{\\'e}r},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=OETPPc15XG}\n}", "github": "", "project": "", "reviewers": "7p1W;YmwX;v94L", "site": "https://openreview.net/forum?id=OETPPc15XG", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "4;3;3", "reproducibility": "4;4;2", "correctness": "4;2;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-2879-8811;;0000-0002-7400-9986;;;", "linkedin": "srijith-radhakrishnan;;;;;;", "aff_unique_index": "0;1;2;2;3;2;2", "aff_unique_norm": "Manipal Institute of Technology;Amazon;King Abdullah University of Science and Technology;Karolinska Institute", "aff_unique_dep": ";Amazon AGI;;", "aff_unique_url": "https://mit manipal.edu;https://www.amazon.com;https://www.kast.kau.edu.sa;https://ki.se", "aff_unique_abbr": "MIT Manipal;Amazon;KAUST;KI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stockholm", "aff_country_unique_index": "0;1;2;2;3;2;2", "aff_country_unique": "India;United States;Saudi Arabia;Sweden" }, { "id": "OGdl9d3BEC", "title": "Revisiting Block-based Quantisation: What is Important for Sub-8-bit LLM Inference?", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The inference of Large language models (LLMs) requires immense computation and memory resources. To curtail these costs, quantisation has emerged as a promising solution, but existing LLM quantisation mainly focuses on 8-bit. In this work, we explore the statistical and learning properties of the LLM layer and attribute the bottleneck of LLM quantisation to numerical scaling offsets. To address this, we adapt block quantisations for LLMs, a family of methods that share scaling factors across packed numbers. Block quantisations efficiently reduce the numerical scaling offsets solely from an arithmetic perspective, without additional treatments in the computational path. Our nearly-lossless quantised 6-bit LLMs achieve a $19\\times$ higher arithmetic density and $5\\times$ memory density than the float32 baseline, surpassing the prior art 8-bit quantisation by $2.5\\times$ in arithmetic density and $1.2\\times$ in memory density, without requiring any data calibration or re-training. We also share our insights into sub-8-bit LLM quantisation, including the mismatch between activation and weight distributions, optimal fine-tuning strategies, and a lower quantisation granularity inherent in the statistical properties of LLMs. The latter two tricks enable nearly-lossless 4-bit LLMs on downstream tasks. Our code is open-sourced.", "keywords": "large language model;LLM;quantization;NLP", "primary_area": "", "supplementary_material": "", "author": "Cheng Zhang;Jianyi Cheng;Ilia Shumailov;George Anthony Constantinides;Yiren Zhao", "authorids": "~Cheng_Zhang21;~Jianyi_Cheng1;~Ilia_Shumailov1;~George_Anthony_Constantinides1;~Yiren_Zhao2", "gender": "M;M;M;M;Unspecified", "homepage": "https://chengzhang-98.github.io/blog/;https://jianyicheng.github.io/;http://cas.ee.ic.ac.uk;https://aaronzhao.me;https://www.cl.cam.ac.uk/~is410/", "dblp": ";190/7785;38/1966;https://dblp.uni-trier.de/pers/hd/z/Zhao:Yiren;213/8587", "google_scholar": "6K-mHPoAAAAJ;HfOtGc0AAAAJ;https://scholar.google.co.uk/citations?user=NTn1NJAAAAAJ;lOOmgEgAAAAJ;https://scholar.google.co.uk/citations?hl=en", "or_profile": "~Cheng_Zhang21;~Jianyi_Cheng1;~George_Anthony_Constantinides1;~Yiren_Zhao2;~I_Shumailov1", "aff": "Imperial College London;Imperial College London;Imperial College London;Imperial College London;University of Oxford", "aff_domain": "ic.ac.uk;imperial.ac.uk;imperial.ac.uk;ic.ac.uk;ox.ac.uk", "position": "PhD student;PhD student;Full Professor;Assistant Professor;Fellowship", "bibtex": "@inproceedings{\nzhang2023revisiting,\ntitle={Revisiting Block-based Quantisation: What is Important for Sub-8-bit {LLM} Inference?},\nauthor={Cheng Zhang and Jianyi Cheng and Ilia Shumailov and George Anthony Constantinides and Yiren Zhao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=OGdl9d3BEC}\n}", "github": "", "project": "", "reviewers": "F1oA;ra67;Qm4i;JYyW", "site": "https://openreview.net/forum?id=OGdl9d3BEC", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "3;3;3;3", "excitement": "4;4;4;4", "reproducibility": "4;4;3;3", "correctness": "4;4;3;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 3.5, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "chengzhang98/;;;yiren-aaron-zhao-baa8b5116/;ilia-shumailov/", "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Imperial College London;University of Oxford", "aff_unique_dep": ";", "aff_unique_url": "https://www.imperial.ac.uk;https://www.ox.ac.uk", "aff_unique_abbr": "ICL;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "OK5yv6Fhl9", "title": "MULTITuDE: Large-Scale Multilingual Machine-Generated Text Detection Benchmark", "track": "main", "status": "Long Main", "tldr": "", "abstract": "There is a lack of research into capabilities of recent LLMs to generate convincing text in languages other than English and into performance of detectors of machine-generated text in multilingual settings. This is also reflected in the available benchmarks which lack authentic texts in languages other than English and predominantly cover older generators. To fill this gap, we introduce MULTITuDE, a novel benchmarking dataset for multilingual machine-generated text detection comprising of 74,081 authentic and machine-generated texts in 11 languages (ar, ca, cs, de, en, es, nl, pt, ru, uk, and zh) generated by 8 multilingual LLMs. Using this benchmark, we compare the performance of zero-shot (statistical and black-box) and fine-tuned detectors. Considering the multilinguality, we evaluate 1) how these detectors generalize to unseen languages (linguistically similar as well as dissimilar) and unseen LLMs and 2) whether the detectors improve their performance when trained on multiple languages.", "keywords": "text generation;large language models;multilinguality;machine-generated text detection;benchmark", "primary_area": "", "supplementary_material": "", "author": "Dominik Macko;Robert Moro;Adaku Uchendu;Jason S Lucas;Michiharu Yamashita;Mat\u00fa\u0161 Pikuliak;Ivan Srba;Thai Le;Dongwon Lee;Jakub Simko;Maria Bielikova", "authorids": "~Dominik_Macko1;~Robert_Moro1;~Adaku_Uchendu1;~Jason_S_Lucas1;~Michiharu_Yamashita1;~Mat\u00fa\u0161_Pikuliak1;~Ivan_Srba1;~Thai_Le1;~Dongwon_Lee1;~Jakub_Simko1;~Maria_Bielikova1", "gender": ";M;F;M;M;;M;;M;M;F", "homepage": ";https://kinit.sk/member/robert-moro/;https://adauchendu.github.io/;https://www.jasonslucas.com/;https://mickeymst.github.io/;;https://kinit.sk/member/ivan-srba/;https://lethaiq.github.io/tql3/;https://pike.psu.edu/dongwon;https://kinit.sk/member/jakub-simko/;https://kinit.sk/member/maria-bielikova/", "dblp": ";13/10717;244/0488;359/3158.html;234/2706;218/6950;https://dblp.uni-trier.de/pid/06/9076;03/9889;l/DongwonLee;09/8578.html;https://dblp.uni-trier.de/pid/b/MariaBielikova", "google_scholar": ";https://scholar.google.sk/citations?user=dyeyjpQAAAAJ;A4be1l4AAAAJ;XU1WN6YAAAAJ;https://scholar.google.co.jp/citations?user=rHmPeHQAAAAJ;https://scholar.google.sk/citations?user=A6dFdnkAAAAJ;https://scholar.google.sk/citations?hl=sk;Fd8K7kAAAAAJ;MzL-WnEAAAAJ;vjXMG2AAAAAJ;wGB6dnEAAAAJ", "or_profile": "~Dominik_Macko1;~Robert_Moro1;~Adaku_Uchendu1;~Jason_S_Lucas1;~Michiharu_Yamashita1;~Mat\u00fa\u0161_Pikuliak1;~Ivan_Srba1;~Thai_Le1;~Dongwon_Lee1;~Jakub_Simko1;~Maria_Bielikova1", "aff": ";Kempelen Institute of Intelligent Technologies;Pennsylvania State University;Pennsylvania State University;Pennsylvania State University;Kempelen Institute of Intelligent Technologies;Kempelen Institute of Intelligent Technologies;University of Mississippi;The Pennsylvania State University;Kempelen Institute of Intelligent Technologies;Kempelen Institute of Intelligent Technologies", "aff_domain": ";kinit.sk;psu.edu;psu.edu;psu.edu;kinit.sk;kinit.sk;olemiss.edu;psu.edu;kinit.sk;kinit.sk", "position": ";Researcher;PhD student;PhD student;PhD student;Postdoc;Researcher;Assistant Professor;Full Professor;Principal Researcher;Principal Researcher", "bibtex": "@inproceedings{\nmacko2023multitude,\ntitle={{MULTIT}u{DE}: Large-Scale Multilingual Machine-Generated Text Detection Benchmark},\nauthor={Dominik Macko and Robert Moro and Adaku Uchendu and Jason S Lucas and Michiharu Yamashita and Mat{\\'u}{\\v{s}} Pikuliak and Ivan Srba and Thai Le and Dongwon Lee and Jakub Simko and Maria Bielikova},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=OK5yv6Fhl9}\n}", "github": "", "project": "", "reviewers": "45v3;Z4xe;tckt", "site": "https://openreview.net/forum?id=OK5yv6Fhl9", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "3;3;3", "reproducibility": "5;4;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 11, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-3052-8290;;0009-0000-3494-6935;0009-0002-3802-8618;0000-0003-1353-9462;0000-0003-3511-5337;0000-0001-9632-6870;0000-0001-8371-7629;0000-0003-0239-4237;0000-0003-4105-3494", "linkedin": ";;;jslu/;;;;;;;mariabielik/", "aff_unique_index": "0;1;1;1;0;0;2;1;0;0", "aff_unique_norm": "Kempelen Institute of Intelligent Technologies;Pennsylvania State University;University of Mississippi", "aff_unique_dep": ";;", "aff_unique_url": "http://www.kempeleninstitute.com;https://www.psu.edu;https://www.olemiss.edu", "aff_unique_abbr": ";PSU;UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0;0;1;1;0;0", "aff_country_unique": "Hungary;United States" }, { "id": "OLcDbSRjbx", "title": "DiffuSeq-v2: Bridging Discrete and Continuous Text Spaces for Accelerated Seq2Seq Diffusion Models", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Diffusion models have gained prominence in generating high-quality sequences of text. Nevertheless, current approaches predominantly represent discrete text within a continuous diffusion space, which incurs substantial computational overhead during training and results in slower sampling speeds. In this paper, we introduce a soft absorbing state that facilitates the diffusion model in learning to reconstruct discrete mutations based on the underlying Gaussian space, thereby enhancing its capacity to recover conditional signals. During the sampling phase, we employ state-of-the-art ODE solvers within the continuous space to expedite the sampling process. Comprehensive experimental evaluations reveal that our proposed method effectively accelerates the training convergence by 4x and generates samples of similar quality 800x faster, rendering it significantly closer to practical application. \\footnote{The code is released at \\url{https://github.com/Shark-NLP/DiffuSeq/tree/diffuseq-v2}.}", "keywords": "diffusion model;sequence to sequence;text generation", "primary_area": "", "supplementary_material": "", "author": "Shansan Gong;Mukai Li;Jiangtao Feng;Zhiyong Wu;Lingpeng Kong", "authorids": "~Shansan_Gong1;~Mukai_Li2;~Jiangtao_Feng1;~Zhiyong_Wu3;~Lingpeng_Kong1", "gender": "F;;M;;M", "homepage": "https://summmeer.github.io/;;https://jiangtaofeng.github.io/;;https://ikekonglp.github.io/", "dblp": "320/4745;;183/0908;;144/7656", "google_scholar": "F86VNoMAAAAJ;;7ufSFeIAAAAJ;;f1hBi5wAAAAJ", "or_profile": "~Shansan_Gong1;~Mukai_Li2;~Jiangtao_Feng1;~Zhiyong_Wu3;~Lingpeng_Kong1", "aff": "Shanghai AI Lab;;Shanghai AI Lab;;Department of Computer Science, The University of Hong Kong", "aff_domain": "pjlab.org.cn;;pjlab.org.cn;;cs.hku.hk", "position": "Researcher;;Researcher;;Assistant Professor", "bibtex": "@inproceedings{\ngong2023diffuseqv,\ntitle={DiffuSeq-v2: Bridging Discrete and Continuous Text Spaces for Accelerated Seq2Seq Diffusion Models},\nauthor={Shansan Gong and Mukai Li and Jiangtao Feng and Zhiyong Wu and Lingpeng Kong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=OLcDbSRjbx}\n}", "github": "", "project": "", "reviewers": "6MXf;SUQ8;ZN1G", "site": "https://openreview.net/forum?id=OLcDbSRjbx", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;5;5", "excitement": "3;3;2", "reproducibility": "2;3;3", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 5, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-5028-2323;;;;", "linkedin": ";;;;", "aff_unique_index": "0;0;1", "aff_unique_norm": "Shanghai AI Lab;University of Hong Kong", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www.shanghaiailab.com;https://www.hku.hk", "aff_unique_abbr": "SAIL;HKU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "ORHg3RKho0", "title": "Auto-Instruct: Automatic Instruction Generation and Ranking for Black-Box Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models (LLMs) can perform a wide range of tasks by following natural language instructions, without the necessity of task-specific fine-tuning. Unfortunately, the performance of LLMs is greatly influenced by the quality of these instructions, and manually writing effective instructions for each task is a laborious and subjective process. In this paper, we introduce Auto-Instruct, a novel method to automatically improve the quality of instructions provided to LLMs. Our method leverages the inherent generative ability of LLMs to produce diverse candidate instructions for a given task, and then ranks them using a scoring model trained on a variety of 575 existing NLP tasks. In experiments on 118 out-of-domain tasks, Auto-Instruct surpasses both human-written instructions and existing baselines of LLM-generated instructions. Furthermore, our method exhibits notable generalizability even with other LLMs that are not incorporated into its training process.", "keywords": "Large Language Model;Instruction Generation;Instruction Ranking;Multi-Task Learning;In-Context Learning", "primary_area": "", "supplementary_material": "", "author": "Zhihan Zhang;Shuohang Wang;Wenhao Yu;Yichong Xu;Dan Iter;Qingkai Zeng;Yang Liu;Chenguang Zhu;Meng Jiang", "authorids": "~Zhihan_Zhang2;~Shuohang_Wang1;~Wenhao_Yu2;~Yichong_Xu1;~Dan_Iter1;~Qingkai_Zeng2;~Yang_Liu50;~Chenguang_Zhu1;~Meng_Jiang3", "gender": ";M;M;M;Not Specified;M;M;M;M", "homepage": ";;https://wyu97.github.io/;http://xycking.wixsite.com/yichongxu;https://daniter-cu.github.io/;https://qingkaizeng.github.io/;https://nlp-yang.github.io/;;http://www.meng-jiang.com/", "dblp": ";173/5469.html;159/8117-2.html;154/6421;63/10689.html;66/3005-1.html;;48/7536-1.html;69/339-1", "google_scholar": ";mN-IO6wAAAAJ;z4qSdX8AAAAJ;sYza2XwAAAAJ;bg8RrSkAAAAJ;Z9R_5-wAAAAJ;HxTr-CtMdrsC;1b2kKWoAAAAJ;LZIPfCkAAAAJ", "or_profile": "~Zhihan_Zhang2;~Shuohang_Wang1;~Wenhao_Yu2;~Yichong_Xu1;~Dan_Iter1;~Qingkai_Zeng2;~Yang_Liu50;~Chenguang_Zhu1;~Meng_Jiang3", "aff": ";Microsoft;University of Notre Dame;Microsoft;Microsoft;Tencent AI Lab;Microsoft;Zoom;University of Notre Dame", "aff_domain": ";microsoft.com;nd.edu;microsoft.com;microsoft.com;tencent.com;microsoft.com;zoom.us;nd.edu", "position": ";Researcher;PhD student;Senior Researcher;Researcher;Intern;Researcher;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023autoinstruct,\ntitle={Auto-Instruct: Automatic Instruction Generation and Ranking for Black-Box Language Models},\nauthor={Zhihan Zhang and Shuohang Wang and Wenhao Yu and Yichong Xu and Dan Iter and Qingkai Zeng and Yang Liu and Chenguang Zhu and Meng Jiang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ORHg3RKho0}\n}", "github": "", "project": "", "reviewers": "MRyL;5KfX;n9EG", "site": "https://openreview.net/forum?id=ORHg3RKho0", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;3", "excitement": "4;3;3", "reproducibility": "4;3;4", "correctness": "4;3;4", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-4075-5980;;;0000-0002-0858-937X;;;0000-0002-3009-519X", "linkedin": ";;;;daniter;;;;meng-jiang-94b10916/", "aff_unique_index": "0;1;0;0;2;0;3;1", "aff_unique_norm": "Microsoft;University of Notre Dame;Tencent;Zoom Video Communications Inc.", "aff_unique_dep": "Microsoft Corporation;;Tencent AI Lab;", "aff_unique_url": "https://www.microsoft.com;https://www.nd.edu;https://ai.tencent.com;https://zoom.us", "aff_unique_abbr": "Microsoft;Notre Dame;Tencent AI Lab;Zoom", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;0;0", "aff_country_unique": "United States;China" }, { "id": "OUiW2DzpzT", "title": "Characterizing Mechanisms for Factual Recall in Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Language Models (LMs) often must integrate facts they memorized in pretraining with new information that appears in a given context. These two sources can disagree, causing competition within the model, and it is unclear how an LM will resolve the conflict. On a dataset that queries for knowledge of world capitals, we investigate both distributional and mechanistic determinants of LM behavior in such situations. Specifically, we measure the proportion of the time an LM will use a counterfactual prefix (e.g., \"The capital of Poland is London\") to overwrite what it learned in pretraining (\"Warsaw\"). On Pythia and GPT2, the training frequency of both the query country (\"Poland\") and the in-context city (\"London\") highly affect the models' likelihood of using the counterfactual. We then use head attribution to identify individual attention heads that either promote the memorized answer or the in-context answer in the logits. By scaling up or down the value vector of these heads, we can control the likelihood of using the in-context answer on new data. This method can increase the rate of generating the in-context answer to 88% of the time simply by scaling a single head at runtime. Our work contributes to a body of evidence showing that we can often localize model behaviors to specific components and provides a proof of concept for how future methods might control model behavior dynamically at runtime.", "keywords": "Interpretability;NLP;mechanistic interpretability", "primary_area": "", "supplementary_material": "", "author": "Qinan Yu;Jack Merullo;Ellie Pavlick", "authorids": "~Qinan_Yu1;~Jack_Merullo2;~Ellie_Pavlick1", "gender": "F;F;M", "homepage": "https://www.linkedin.com/in/qinan-yu-9b50471b2/;http://cs.brown.edu/people/epavlick/;https://jmerullo.github.io/", "dblp": ";141/4059;248/8361", "google_scholar": ";sFyrSa8AAAAJ;7w0xLF4AAAAJ", "or_profile": "~Qinan_Yu1;~Ellie_Pavlick1;~jack_merullo1", "aff": "Brown University;Brown University;Brown University", "aff_domain": "brown.edu;brown.edu;brown.edu", "position": "Undergrad student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nyu2023characterizing,\ntitle={Characterizing Mechanisms for Factual Recall in Language Models},\nauthor={Qinan Yu and Jack Merullo and Ellie Pavlick},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=OUiW2DzpzT}\n}", "github": "", "project": "", "reviewers": "ecsM;jXDd;tNke", "site": "https://openreview.net/forum?id=OUiW2DzpzT", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;3", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Brown University", "aff_unique_dep": "", "aff_unique_url": "https://www.brown.edu", "aff_unique_abbr": "Brown", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "OUmxBN45Gl", "title": "Do All Languages Cost the Same? Tokenization in the Era of Commercial Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Language models have graduated from being research prototypes to commercialized products offered as web APIs, and recent works have highlighted the multilingual capabilities of these products. The API vendors charge their users based on usage, more specifically on the number of ``tokens'' processed or generated by the underlying language models. What constitutes a token, however, is training data and model dependent with a large variance in the number of tokens required to convey the same information in different languages. In this work, we analyze the effect of this non-uniformity on the fairness of an API's pricing policy across languages. We conduct a systematic analysis of the cost and utility of OpenAI's language model API on multilingual benchmarks in 22 typologically diverse languages. We show evidence that speakers of a large number of the supported languages are overcharged while obtaining poorer results. These speakers tend to also come from regions where the APIs are less affordable, to begin with. Through these analyses, we aim to increase transparency around language model APIs' pricing policies and encourage the vendors to make them more equitable.", "keywords": "Tokenization;LLMs;under-resourced languages;equitable LLMs", "primary_area": "", "supplementary_material": "", "author": "Orevaoghene Ahia;Sachin Kumar;Hila Gonen;Jungo Kasai;David R Mortensen;Noah A. Smith;Yulia Tsvetkov", "authorids": "~Orevaoghene_Ahia1;~Sachin_Kumar1;~Hila_Gonen1;~Jungo_Kasai1;~David_R_Mortensen1;~Noah_A._Smith2;~Yulia_Tsvetkov1", "gender": ";M;;M;M;;F", "homepage": ";https://shocheen.com;https://gonenhila.github.io/;https://homes.cs.washington.edu/~jkasai/;http://www.cs.cmu.edu/~dmortens/;;https://homes.cs.washington.edu/~yuliats/", "dblp": ";31/4484-9;167/5312;205/9020;180/5443;;75/8157", "google_scholar": ";qO38fRIAAAAJ;URThmtMAAAAJ;nHCLoIwAAAAJ;https://scholar.google.com/citations?authuser=1;;SEDPkrsAAAAJ", "or_profile": "~Orevaoghene_Ahia1;~Sachin_Kumar1;~Hila_Gonen1;~Jungo_Kasai1;~David_R_Mortensen1;~Noah_A._Smith2;~Yulia_Tsvetkov1", "aff": ";Carnegie Mellon University;Meta Facebook;Paul G. Allen School of Computer Science & Engineering, University of Washington;Carnegie Mellon University;;Department of Computer Science, University of Washington", "aff_domain": ";cmu.edu;facebook.com;cs.washington.edu;cmu.edu;;cs.washington.edu", "position": ";PhD student;Postdoc;PhD student;Systems Scientist;;Assistant Professor", "bibtex": "@inproceedings{\nahia2023do,\ntitle={Do All Languages Cost the Same? Tokenization in the Era of Commercial Language Models},\nauthor={Orevaoghene Ahia and Sachin Kumar and Hila Gonen and Jungo Kasai and David R Mortensen and Noah A. Smith and Yulia Tsvetkov},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=OUmxBN45Gl}\n}", "github": "", "project": "", "reviewers": "8QsM;JDu5;YUzp", "site": "https://openreview.net/forum?id=OUmxBN45Gl", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;2;5", "excitement": "3;4;4", "reproducibility": "4;2;5", "correctness": "2;4;5", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-3927-6851;;0000-0002-4634-7128", "linkedin": ";;;;davidrmortensen/;;", "aff_unique_index": "0;1;2;0;2", "aff_unique_norm": "Carnegie Mellon University;Meta;University of Washington", "aff_unique_dep": ";Meta Platforms, Inc.;Paul G. Allen School of Computer Science & Engineering", "aff_unique_url": "https://www.cmu.edu;https://meta.com;https://www.washington.edu", "aff_unique_abbr": "CMU;Meta;UW", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "OVLnZliSHs", "title": "MultiTurnCleanup: A Benchmark for Multi-Turn Spoken Conversational Transcript Cleanup", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Current disfluency detection models focus on individual utterances each from a single speaker. However, numerous discontinuity phenomena in spoken conversational transcripts occur across multiple turns, which can not be identified by disfluency detection models. This study addresses these phenomena by proposing an innovative Multi-Turn Cleanup\u00a0task for spoken conversational transcripts and collecting a new dataset, MultiTurnCleanup. We design a data labeling schema to collect the high-quality dataset and provide extensive data analysis. Furthermore, we leverage two modeling approaches for experimental evaluation as benchmarks for future research.", "keywords": "Multi-Turn Spoken Conversations;Crowdsourcing;Transcript Cleanup;Disfluency Detection", "primary_area": "", "supplementary_material": "", "author": "Hua Shen;Vicky Zayats;Johann C Rocholl;Daniel David Walker;Dirk Padfield", "authorids": "~Hua_Shen1;~Vicky_Zayats1;~Johann_C_Rocholl1;~Daniel_David_Walker1;~Dirk_Padfield1", "gender": "F;F;M;M;", "homepage": "http://hua-shen.org/;;https://google.com;;https://sites.google.com/site/dirkpadfield", "dblp": ";;;61/6112;08/1148", "google_scholar": "zFjlv1sAAAAJ;BVVJvoMAAAAJ;;;", "or_profile": "~Hua_Shen1;~Vicky_Zayats1;~Johann_C_Rocholl1;~Daniel_David_Walker1;~Dirk_R._Padfield1", "aff": "Pennsylvania State University;Google;;;Google", "aff_domain": "psu.edu;google.com;;;google.com", "position": "PhD student;Researcher;;;Researcher", "bibtex": "@inproceedings{\nshen2023multiturncleanup,\ntitle={MultiTurnCleanup: A Benchmark for Multi-Turn Spoken Conversational Transcript Cleanup},\nauthor={Hua Shen and Vicky Zayats and Johann C Rocholl and Daniel David Walker and Dirk Padfield},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=OVLnZliSHs}\n}", "github": "", "project": "", "reviewers": "Hvuk;H2MH;chnj", "site": "https://openreview.net/forum?id=OVLnZliSHs", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "3;4;3", "reproducibility": "3;5;3", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-4928-525X;;;;", "linkedin": "hua-shen/;;;;", "aff_unique_index": "0;1;1", "aff_unique_norm": "Pennsylvania State University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.psu.edu;https://www.google.com", "aff_unique_abbr": "PSU;Google", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "OVmOQs85Xb", "title": "Dynamic Open-book Prompt for Conversational Recommender System", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Conversational Recommender System (CRS) aims to deliver personalized recommendations through interactive dialogues.\nRecent advances in prompt learning have shed light on this task.\nHowever, the performance of existing methods is confined by the limited context within ongoing conversations.\nMoreover, these methods utilize training samples only for prompt parameter training.\nThe constructed prompt lacks the ability to refer to the training data during inference, which exacerbates the problem of limited context.\nTo solve this problem, we propose a novel Dynamic Open-book Prompt approach, where the open book stores users' experiences in historical data, and we dynamically construct the prompt to memorize the user's current utterance and selectively retrieve relevant contexts from the open book.\nSpecifically, we first build an item-recommendation graph from the open book and convolute on the graph to form a base prompt which contains more information besides the finite dialogue.\nThen, we enhance the representation learning process of the prompt by tailoring similar contexts in the graph into the prompt to meet the user's current need. \nThis ensures the prompt provides targeted suggestions that are both informed and contextually relevant.\nExtensive experimental results on the ReDial dataset demonstrate the significant improvements achieved by our proposed model over the state-of-the-art methods.\nOur code and data are available at https://github.com/NLPWM-WHU/DOP.", "keywords": "Conversational Recommender System;Graph Learning;Prompt Learning", "primary_area": "", "supplementary_material": "", "author": "Xuan Ma;Tieyun Qian;Ke Sun", "authorids": "~Xuan_Ma1;~Tieyun_Qian1;~Ke_Sun11", "gender": ";;M", "homepage": "https://github.com/alittlepony;;", "dblp": "43/275;17/5583;69/476-10", "google_scholar": ";MYTt4EwAAAAJ;q9CjbjIAAAAJ", "or_profile": "~Xuan_Ma1;~Tieyun_Qian1;~Ke_Sun11", "aff": "Wuhan University;Wuhan University;Wuhan University", "aff_domain": "whu.edu.cn;whu.edu.cn;whu.edu.cn", "position": "MS student;Full Professor;PhD student", "bibtex": "@inproceedings{\nma2023dynamic,\ntitle={Dynamic Open-book Prompt for Conversational Recommender System},\nauthor={Xuan Ma and Tieyun Qian and Ke Sun},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=OVmOQs85Xb}\n}", "github": "", "project": "", "reviewers": "MJfa;1wDy;MYtF", "site": "https://openreview.net/forum?id=OVmOQs85Xb", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;5", "excitement": "3;2;3", "reproducibility": "3;3;3", "correctness": "3;2;2", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 2.3333333333333335, "replies_avg": 12, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-4230-446X;0000-0003-4667-5794;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Wuhan University", "aff_unique_dep": "", "aff_unique_url": "http://www.whu.edu.cn/", "aff_unique_abbr": "WHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "OVt2dIwxR1", "title": "Re$^3$Dial: Retrieve, Reorganize and Rescale Conversations for Long-Turn Open-Domain Dialogue Pre-training", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Pre-training on large-scale open-domain dialogue data can substantially improve the performance of dialogue models. However, the pre-trained dialogue model's ability to utilize long-range context is limited due to the scarcity of long-turn dialogue sessions. Most dialogues in existing pre-training corpora contain fewer than three turns of dialogue. To alleviate this issue, we propose the Retrieve, Reorganize and Rescale framework (Re$^3$Dial), which can automatically construct billion-scale long-turn dialogues by reorganizing existing short-turn ones. Given a short-turn session, Re$^3$Dial first employs a session retriever to retrieve coherent consecutive sessions. To this end, we train the retriever to capture semantic and discourse relations within multi-turn dialogues through contrastive training. Next, Re$^3$Dial samples a session from retrieved results following a diversity sampling strategy, which is designed to penalize repetitive or generic sessions. A longer session is then derived by concatenating the original session and the sampled session. By repeating the above process, Re$^3$Dial can yield a coherent long-turn dialogue. Extensive experiments on multiple multi-turn dialogue benchmarks demonstrate that Re$^3$Dial significantly improves the dialogue model's ability to utilize long-range context and thus generate more sensible and informative responses. Finally, we build a toolkit for efficiently rescaling conversations with Re$^3$Dial, which enables us to construct a corpus containing 1B Chinese dialogue sessions with 11.3 turns on average (5X longer than the original corpus). We will release our retriever model, toolkit, and data for public use.", "keywords": "Dialogue;Large-scale Pre-training;Pre-training data;Multi-turn Dialogue", "primary_area": "", "supplementary_material": "", "author": "Jiaxin Wen;Hao Zhou;Jian Guan;Jie Zhou;Minlie Huang", "authorids": "~Jiaxin_Wen2;~Hao_Zhou8;~Jian_Guan1;~Jie_Zhou8;~Minlie_Huang1", "gender": "M;M;M;M;M", "homepage": "https://jiaxin-wen.github.io/;;https://jianguanthu.github.io/;;http://coai.cs.tsinghua.edu.cn/hml", "dblp": "189/3085;;58/2489-2;00/5012-16;", "google_scholar": "jVRL96IAAAAJ;q3WaozcAAAAJ;BWCDa8YAAAAJ;https://scholar.google.com.hk/citations?user=OijxQCMAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Jiaxin_Wen2;~Hao_Zhou8;~Jian_Guan1;~Jie_Zhou8;~Minlie_Huang1", "aff": "Tsinghua University;Tencent;Tsinghua University;WeChat AI, Tencent Inc.;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tencent.com;tsinghua.edu.cn;tencent.com;tsinghua.edu.cn", "position": "MS student;Researcher;PhD student;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nwen2023redial,\ntitle={Re\\${\\textasciicircum}3\\$Dial: Retrieve, Reorganize and Rescale Conversations for Long-Turn Open-Domain Dialogue Pre-training},\nauthor={Jiaxin Wen and Hao Zhou and Jian Guan and Jie Zhou and Minlie Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=OVt2dIwxR1}\n}", "github": "", "project": "", "reviewers": "oPUQ;RkmQ;YxhS;2y1L;dkfb", "site": "https://openreview.net/forum?id=OVt2dIwxR1", "pdf_size": 0, "rating": "5;5;5;5;5", "confidence": "3;4;5;3;3", "excitement": "4;4;4;3;3", "reproducibility": "3;4;4;4;4", "correctness": "3;4;4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6, "excitement_avg": 3.6, "reproducibility_avg": 3.8, "correctness_avg": 3.8, "replies_avg": 17, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-3597-0176;0000-0002-5899-5165;", "linkedin": ";;;;", "aff_unique_index": "0;1;0;1;0", "aff_unique_norm": "Tsinghua University;Tencent", "aff_unique_dep": ";Tencent Holdings Limited", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.tencent.com", "aff_unique_abbr": "THU;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "OXQFcwKrTM", "title": "Salespeople vs SalesBot: Exploring the Role of Educational Value in Conversational Recommender Systems", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Making big purchases requires consumers to research or consult a salesperson to gain domain expertise. However, existing conversational recommender systems (CRS) often overlook users' lack of background knowledge, focusing solely on gathering preferences. \nIn this work, we define a new problem space for conversational agents that aim to provide both product recommendations and educational value through mixed-type mixed-initiative dialog. \nWe introduce SalesOps, a framework that facilitates the simulation and evaluation of such systems by leveraging recent advancements in large language models (LLMs).\nWe build SalesBot and ShopperBot, a pair of LLM-powered agents that can simulate either side of the framework. \nA comprehensive human study compares SalesBot against professional salespeople, revealing that although SalesBot approaches professional performance in terms of fluency and informativeness, it lags behind in recommendation quality. We emphasize the distinct limitations both face in providing truthful information, highlighting the challenges of ensuring faithfulness in the CRS context. We release our code and make all data available.", "keywords": "conversational recommender systems;mixed-initiative conversational agents;mixed-type conversational dataset", "primary_area": "", "supplementary_material": "", "author": "Lidiya Murakhovs'ka;Philippe Laban;Tian Xie;Caiming Xiong;Chien-Sheng Wu", "authorids": "~Lidiya_Murakhovs'ka1;~Philippe_Laban1;~Tian_Xie8;~Caiming_Xiong1;~Chien-Sheng_Wu1", "gender": "F;M;M;M;M", "homepage": ";https://people.eecs.berkeley.edu/~phillab/;;http://cmxiong.com/;http://jasonwu0731.github.io", "dblp": "304/3084;220/3590;;80/7282;180/5537", "google_scholar": "6EtJwmgAAAAJ;fR5t200AAAAJ;;vaSdahkAAAAJ;1G4GV2EAAAAJ", "or_profile": "~Lidiya_Murakhovs'ka1;~Philippe_Laban1;~Tian_Xie8;~Caiming_Xiong1;~Chien-Sheng_Wu1", "aff": "SalesForce.com;SalesForce.com;Salesforce Research;Salesforce Research;Salesforce AI", "aff_domain": "salesforce.com;salesforce.com;salesforce.com;salesforce.com;salesforce.com", "position": "Researcher;Researcher;Researcher;Research Scientist;Researcher", "bibtex": "@inproceedings{\nmurakhovs'ka2023salespeople,\ntitle={Salespeople vs SalesBot: Exploring the Role of Educational Value in Conversational Recommender Systems},\nauthor={Lidiya Murakhovs'ka and Philippe Laban and Tian Xie and Caiming Xiong and Chien-Sheng Wu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=OXQFcwKrTM}\n}", "github": "", "project": "", "reviewers": "Uvkj;LBGM;fUC4", "site": "https://openreview.net/forum?id=OXQFcwKrTM", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;4;3", "excitement": "4;3;3", "reproducibility": "4;3;4", "correctness": "5;3;2", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 12, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "lidiyam/;;tian-xie-a13287128/;caiming-xiong-150a1417;chien-sheng-jason-wu/", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Salesforce", "aff_unique_dep": "", "aff_unique_url": "https://www.salesforce.com", "aff_unique_abbr": "Salesforce", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "OYWeQdQiIn", "title": "Identifying Conspiracy Theories News based on Event Relation Graph", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Conspiracy theories, as a type of misinformation, are narratives that explains an event or situation in an irrational or malicious manner. While most previous work examined conspiracy theory in social media short texts, limited attention was put on such misinformation in long news documents. In this paper, we aim to identify whether a news article contains conspiracy theories. We observe that a conspiracy story can be made up by mixing uncorrelated events together, or by presenting an unusual distribution of relations between events. Achieving a contextualized understanding of events in a story is essential for detecting conspiracy theories. Thus, we propose to incorporate an event relation graph for each article, in which events are nodes, and four common types of event relations, coreference, temporal, causal, and subevent relations, are considered as edges. Then, we integrate the event relation graph into conspiracy theory identification in two ways: an event-aware language model is developed to augment the basic language model with the knowledge of events and event relations via soft labels; further, a heterogeneous graph attention network is designed to derive a graph embedding based on hard labels. Experiments on a large benchmark dataset show that our approach based on event relation graph improves both precision and recall of conspiracy theory identification, and generalizes well for new unseen media sources.", "keywords": "conspiracy theory;misinformation;event relation graph", "primary_area": "", "supplementary_material": "", "author": "Yuanyuan Lei;Ruihong Huang", "authorids": "~Yuanyuan_Lei1;~Ruihong_Huang1", "gender": ";F", "homepage": ";https://people.engr.tamu.edu/huangrh/index.html", "dblp": ";42/4811.html", "google_scholar": ";https://scholar.google.com.tw/citations?user=NU2aHWUAAAAJ", "or_profile": "~Yuanyuan_Lei1;~Ruihong_Huang1", "aff": ";Texas A&M University", "aff_domain": ";cse.tamu.edu", "position": ";Associate Professor", "bibtex": "@inproceedings{\nlei2023identifying,\ntitle={Identifying Conspiracy Theories News based on Event Relation Graph},\nauthor={Yuanyuan Lei and Ruihong Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=OYWeQdQiIn}\n}", "github": "", "project": "", "reviewers": "dygx;9T2j;nzA4", "site": "https://openreview.net/forum?id=OYWeQdQiIn", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;3;4", "reproducibility": "4;4;2", "correctness": "2;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0", "aff_unique_norm": "Texas A&M University", "aff_unique_dep": "", "aff_unique_url": "https://www.tamu.edu", "aff_unique_abbr": "TAMU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "OZOrQQBDou", "title": "TCRA-LLM: Token Compression Retrieval Augmented Large Language Model for Inference Cost Reduction", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Since ChatGPT released its API for public use, the number of applications built on top of commercial large language models (LLMs) increase exponentially. One popular usage of such models is leveraging its in-context learning ability and generating responses given user queries leveraging knowledge obtained by retrieval augmentation. One problem of deploying commercial retrieval-augmented LLMs is the cost due to the additionally retrieved context that largely increases the input token size of the LLMs. To mitigate this, we propose a token compression scheme that includes two methods: summarization compression and semantic compression. The first method applies a T5-based model that is fine-tuned by datasets generated using self-instruct containing samples with varying lengths and reduce token size by doing summarization. The second method further compresses the token size by removing words with lower impact on the semantic. In order to adequately evaluate the effectiveness of the proposed methods, we propose and utilize a dataset called Food-Recommendation DB (FRDB) focusing on food recommendation for women around pregnancy period or infants. Our summarization compression can reduce 65\\% of the retrieval token size with further 0.3\\% improvement on the accuracy; semantic compression provides a more flexible way to trade-off the token size with performance, for which we can reduce the token size by 20\\% with only 1.6\\% of accuracy drop.", "keywords": "Large language model;Retrieval augmented;Knowledge compression;Question Answering", "primary_area": "", "supplementary_material": "", "author": "Junyi Liu;Liangzhi Li;Tong Xiang;Bowen Wang;Yiming Qian", "authorids": "~Junyi_Liu3;~Liangzhi_Li1;~Tong_Xiang1;~Bowen_Wang1;~Yiming_Qian6", "gender": "M;M;M;M;M", "homepage": "https://github.com/westwet;;;https://www.bowen-wang.com/home;", "dblp": ";169/4123;271/4360;64/4732;", "google_scholar": ";JIRw_tMAAAAJ;Qpk3D7kAAAAJ;hB4K5UMAAAAJ;gmpm0a8AAAAJ", "or_profile": "~Junyi_Liu3;~Liangzhi_Li1;~Tong_Xiang1;~Bowen_Wang1;~Yiming_Qian6", "aff": "Southwest University of Finance and Economics;Osaka University;Virginia Polytechnic Institute and State University;Osaka University;Institute of High Performance Computing, Singapore, A*STAR", "aff_domain": "swufe.edu.cn;osaka-u.ac.jp;vt.edu;osaka-u.ac.jp;ihpc.a-star.edu.sg", "position": "MS student;Assistant Professor;PhD student;PhD student;Researcher", "bibtex": "@inproceedings{\nliu2023tcrallm,\ntitle={{TCRA}-{LLM}: Token Compression Retrieval Augmented Large Language Model for Inference Cost Reduction},\nauthor={Junyi Liu and Liangzhi Li and Tong Xiang and Bowen Wang and Yiming Qian},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=OZOrQQBDou}\n}", "github": "", "project": "", "reviewers": "VUd2;9Fyp;Z9sQ", "site": "https://openreview.net/forum?id=OZOrQQBDou", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;4;4", "reproducibility": "5;3;3", "correctness": "4;4;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-1559-7757;0000-0002-2911-5595;0000-0002-1795-2038", "linkedin": ";;tong-xiang-50b198157/;;", "aff_unique_index": "0;1;2;1;3", "aff_unique_norm": "Southwest University of Finance and Economics;Osaka University;Virginia Tech;Institute of High Performance Computing", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.swufe.edu.cn;https://www.osaka-u.ac.jp;https://www.vt.edu;https://www.ihpc.a-star.edu.sg", "aff_unique_abbr": "SWUFE;Osaka U;VT;IHPC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;1;3", "aff_country_unique": "China;Japan;United States;Singapore" }, { "id": "OcaifDZKkA", "title": "Active Learning for Natural Language Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The field of Natural Language Generation (NLG) suffers from a severe shortage of labeled data due to the extremely expensive and time-consuming process involved in manual annotation. A natural approach for coping with this problem is active learning (AL), a well-known machine learning technique for improving annotation efficiency by selectively choosing the most informative examples to label. However, while AL has been well-researched in the context of text classification, its application to NLG remains largely unexplored. In this paper, we present a first systematic study of active learning for NLG, considering a diverse set of tasks and multiple leading selection strategies, and harnessing a strong instruction-tuned model. \nOur results indicate that the performance of existing AL strategies is inconsistent, surpassing the baseline of random example selection in some cases but not in others. We highlight some notable differences between the classification and generation scenarios, and analyze the selection behaviors of existing AL strategies. Our findings motivate exploring novel approaches for applying AL to generation tasks.", "keywords": "active learning;NLG;generation", "primary_area": "", "supplementary_material": "", "author": "Yotam Perlitz;Ariel Gera;Michal Shmueli-Scheuer;Dafna Sheinwald;Noam Slonim;Liat Ein-Dor", "authorids": "~Yotam_Perlitz1;~Ariel_Gera1;~Michal_Shmueli-Scheuer2;~Dafna_Sheinwald1;~Noam_Slonim1;~Liat_Ein-Dor2", "gender": ";;;;M;F", "homepage": ";;;;https://researcher.watson.ibm.com/researcher/view.php?person=il-NOAMS;https://researcher.watson.ibm.com/researcher/view.php?person=il-LIATE", "dblp": "298/0781;245/8586;s/MShmueliS.html;98/6723.html;62/7001;78/3923.html", "google_scholar": "n9iywfAAAAAJ;https://scholar.google.co.il/citations?user=ESCkne8AAAAJ;reNMHusAAAAJ;;https://scholar.google.co.il/citations?user=KjvrNGMAAAAJ;V_IZ86YAAAAJ", "or_profile": "~Yotam_Perlitz1;~Ariel_Gera1;~Michal_Shmueli-Scheuer2;~Dafna_Sheinwald1;~Noam_Slonim1;~Liat_Ein-Dor2", "aff": "International Business Machines;International Business Machines;International Business Machines;IBM Research;International Business Machines;International Business Machines", "aff_domain": "ibm.com;ibm.com;ibm.com;il.ibm.com;ibm.com;ibm.com", "position": "Researcher;Researcher;Principal Researcher;Researcher;Principal Researcher;Principal Researcher", "bibtex": "@inproceedings{\nperlitz2023active,\ntitle={Active Learning for Natural Language Generation},\nauthor={Yotam Perlitz and Ariel Gera and Michal Shmueli-Scheuer and Dafna Sheinwald and Noam Slonim and Liat Ein-Dor},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=OcaifDZKkA}\n}", "github": "", "project": "", "reviewers": "74zp;i9ma;hX5h", "site": "https://openreview.net/forum?id=OcaifDZKkA", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "4;3;4", "reproducibility": "4;5;3", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";;;;noam-slonim-28a80b63/;https://il.linkedin.com/in/liat-ein-dor-2240215", "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "International Business Machines Corporation;IBM", "aff_unique_dep": ";IBM Research", "aff_unique_url": "https://www.ibm.com;https://www.ibm.com/research", "aff_unique_abbr": "IBM;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Of2xc2GVid", "title": "On the Calibration of Large Language Models and Alignment", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "As large language models attract increasing attention and find widespread application, concurrent challenges of reliability also arise at the same time. Confidence calibration, an effective analysis method for gauging the reliability of deep models, serves as a crucial tool for assessing and improving their reliability. However, such investigation has been comparatively underexplored. In this work, we conduct a systematic examination of the calibration of aligned language models throughout the entire construction process, including pretraining and alignment training. At each stage, we investigate how different training settings, such as parameter scales and training data, affect model calibration. To thoroughly assess model calibration, we evaluate models on three most concerned aspects: generation, factuality and understanding. Our work sheds light on whether popular LLMs are well-calibrated and how the training process influences model calibration.", "keywords": "Large Language Models;Calibration", "primary_area": "", "supplementary_material": "", "author": "Chiwei Zhu;Benfeng Xu;Quan Wang;Yongdong Zhang;Zhendong Mao", "authorids": "~Chiwei_Zhu1;~Benfeng_Xu1;~Quan_Wang7;~Yongdong_Zhang2;~Zhendong_Mao1", "gender": "M;M;F;M;", "homepage": ";;;https://imcc.ustc.edu.cn/_upload/tpl/0d/13/3347/template3347/zhangyongdong.html;", "dblp": "361/7071;268/0859;;z/YongdongZhang;", "google_scholar": "xMAxveAAAAAJ;https://scholar.google.com/citations?hl=zh-CN;l2yEbhAAAAAJ;https://scholar.google.com.hk/citations?user=hxGs4ukAAAAJ;", "or_profile": "~Chiwei_Zhu1;~Benfeng_Xu1;~Quan_Wang7;~Yongdong_Zhang2;~Zhendong_Mao1", "aff": "University of Science and Technology of China;University of Science and Technology of China;Beijing University of Posts and Telecommunications;University of Science and Technology of China;", "aff_domain": "ustc.edu.cn;ustc.edu.cn;bupt.edu.cn;ustc.edu.cn;", "position": "PhD student;PhD student;Associate Professor;Full Professor;", "bibtex": "@inproceedings{\nzhu2023on,\ntitle={On the Calibration of Large Language Models and Alignment},\nauthor={Chiwei Zhu and Benfeng Xu and Quan Wang and Yongdong Zhang and Zhendong Mao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Of2xc2GVid}\n}", "github": "", "project": "", "reviewers": "kGi5;ojDc;fbVH", "site": "https://openreview.net/forum?id=Of2xc2GVid", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;4", "excitement": "2;4;4", "reproducibility": "4;4;4", "correctness": "2;4;3", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-0976-1634;;0000-0003-0066-3448;", "linkedin": ";;;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Science and Technology of China;Beijing University of Posts and Telecommunications", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;http://www.bupt.edu.cn/", "aff_unique_abbr": "USTC;BUPT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "OfBAABKH5X", "title": "Beyond Denouncing Hate: Strategies for Countering Implied Biases and Stereotypes in Language", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Counterspeech, i.e., responses to counteract potential harms of hateful speech, has become an increasingly popular solution to address online hate speech without censorship. However, properly countering hateful language requires countering and dispelling the underlying inaccurate stereotypes implied by such language. In this work, we draw from psychology and philosophy literature to craft six psychologically inspired strategies to challenge the underlying stereotypical implications of hateful language. We first examine the convincingness of each of these strategies through a user study, and then compare their usages in both human- and machine-generated counterspeech datasets. Our results show that human-written counterspeech uses countering strategies that are more specific to the implied stereotype (e.g., counter examples to the stereotype, external factors about the stereotype\u2019s origins), whereas machine-generated counterspeech uses less specific strategies (e.g., generally denouncing the hatefulness of speech). Furthermore, machine generated counterspeech often employs strategies that humans deem less convincing compared to human-produced counterspeech. Our findings point to the importance of accounting for the underlying stereotypical implications of speech when generating counterspeech and for better machine reasoning about anti-stereotypical examples.", "keywords": "counterspeech;stereotypes", "primary_area": "", "supplementary_material": "", "author": "Jimin Mun;Emily Allaway;Akhila Yerukola;Laura Vianna;Sarah-Jane Leslie;Maarten Sap", "authorids": "~Jimin_Mun1;~Emily_Allaway1;~Akhila_Yerukola1;~Laura_Vianna1;~Sarah-Jane_Leslie1;~Maarten_Sap1", "gender": "F;F;;F;;M", "homepage": "https://jiminmun.github.io/;https://emilyallaway.github.io/;https://akhila-yerukola.github.io/;;;http://maartensap.com", "dblp": "351/5635;220/4016;249/5606.html;;;153/9519", "google_scholar": "xvq0n50AAAAJ;z27qu0sAAAAJ;Y7j60UQAAAAJ;;;gFN4QUYAAAAJ", "or_profile": "~Jimin_Mun1;~Emily_Allaway1;~Akhila_Yerukola1;~Laura_Vianna1;~Sarah-Jane_Leslie1;~Maarten_Sap1", "aff": "Carnegie Mellon University;Columbia University;Carnegie Mellon University;University of Washington;;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;cs.columbia.edu;cmu.edu;washington.edu;;cmu.edu", "position": "PhD student;PhD student;PhD student;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nmun2023beyond,\ntitle={Beyond Denouncing Hate: Strategies for Countering Implied Biases and Stereotypes in Language},\nauthor={Jimin Mun and Emily Allaway and Akhila Yerukola and Laura Vianna and Sarah-Jane Leslie and Maarten Sap},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=OfBAABKH5X}\n}", "github": "", "project": "", "reviewers": "ds7j;PBEi;L28W", "site": "https://openreview.net/forum?id=OfBAABKH5X", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;4;4", "excitement": "3;4;4", "reproducibility": "4;4;2", "correctness": "2;3;4", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0007-7231-4550;;;;;", "linkedin": "jimin-mun;;akhilayerukola;viannalaura/;;", "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Carnegie Mellon University;Columbia University;University of Washington", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cmu.edu;https://www.columbia.edu;https://www.washington.edu", "aff_unique_abbr": "CMU;Columbia;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "OgK0kMz5Va", "title": "Prompting Scientific Names for Zero-Shot Species Recognition", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Trained on web-scale image-text pairs, Vision-Language Models (VLMs) such as CLIP can recognize images of common objects in a zero-shot fashion. However, it is underexplored how to use CLIP for zero-shot recognition of highly specialized concepts, e.g., species of birds, plants, and animals, for which their scientific names are written in Latin or Greek. Indeed, CLIP performs poorly for zero-shot species recognition with prompts that use scientific names, e.g., ``a photo of Lepus Timidus'' (which is a scientific name in Latin). This is because these names are usually not included in CLIP's training set. To improve performance, we explore using large-language models (LLMs) to generate descriptions (e.g., of species color and shape) and additionally use them in prompts. However, this method improves only marginally. Instead, we are motivated to translate scientific names (e.g., Lepus Timidus) to common English names (e.g., {\\tt mountain hare}) and use such in the prompts. We find that common names are more likely to be included in CLIP's training set, and prompting them achieves 2$\\sim$5 times higher accuracy on benchmarking datasets of fine-grained species recognition.", "keywords": "vision-language model;fine-grained recognition;zero-shot recognition;prompt engineering;species recognition", "primary_area": "", "supplementary_material": "", "author": "Shubham Parashar;Zhiqiu Lin;Yanan Li;Shu Kong", "authorids": "~Shubham_Parashar1;~Zhiqiu_Lin1;~Yanan_Li4;~Shu_Kong1", "gender": "M;M;F;M", "homepage": "https://www.linkedin.com/in/shubhamprshr/;https://linzhiqiu.github.io;https://yananlix1.github.io/;https://aimerykong.github.io/", "dblp": "359/0833;230/4394;61/7498-2.html;26/11141", "google_scholar": "QOMZiUMAAAAJ;https://scholar.google.com/citations?hl=en;9cTdt_kAAAAJ;sm9FdLoAAAAJ", "or_profile": "~Shubham_Parashar1;~Zhiqiu_Lin1;~Yanan_Li4;~Shu_Kong1", "aff": "Texas A&M University - College Station;Carnegie Mellon University;Zhejiang Lab;Texas A&M University - College Station", "aff_domain": "tamu.edu;cmu.edu;zhejianglab.com;tamu.edu", "position": "MS student;PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nparashar2023prompting,\ntitle={Prompting Scientific Names for Zero-Shot Species Recognition},\nauthor={Shubham Parashar and Zhiqiu Lin and Yanan Li and Shu Kong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=OgK0kMz5Va}\n}", "github": "", "project": "", "reviewers": "ik9i;tzwz;Duti", "site": "https://openreview.net/forum?id=OgK0kMz5Va", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;5;5", "excitement": "3;3;4", "reproducibility": "4;4;5", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 4.666666666666667, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-8482-7221;0000-0002-1362-5937", "linkedin": ";zhiqiu-lin-b49ba7126/;;aimerykong/", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Texas A&M University;Carnegie Mellon University;Zhejiang Lab", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tamu.edu;https://www.cmu.edu;http://www.zhejianglab.com", "aff_unique_abbr": "TAMU;CMU;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "College Station;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;China" }, { "id": "OhZLO1yunf", "title": "NEWTON: Are Large Language Models Capable of Physical Reasoning?", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large Language Models (LLMs), through their contextualized representations, have been empirically proven to encapsulate syntactic, semantic, word sense, and common-sense knowledge. However, there has been limited exploration of their physical reasoning abilities, specifically concerning the crucial attributes for comprehending everyday objects. To address this gap, we introduce NEWTON, a repository and benchmark for evaluating the physics reasoning skills of LLMs. Further, to enable domain-specific adaptation of this benchmark, we present a pipeline to enable researchers to generate a variant of this benchmark that has been customized to the objects and attributes relevant for their application. The NEWTON repository comprises a collection of 2800 object-attribute pairs, providing the foundation for generating infinite-scale assessment templates. The NEWTON benchmark consists of 160K QA questions, curated using the NEWTON repository to investigate the physical reasoning capabilities of several mainstream language models across foundational, explicit, and implicit reasoning tasks. Through extensive empirical analysis, our results highlight the capabilities of LLMs for physical reasoning. We find that LLMs like GPT-4 demonstrate strong reasoning capabilities in scenario-based tasks but exhibit less consistency in object-attribute reasoning compared to humans (50\\% vs. 84\\%). Furthermore, the NEWTON platform demonstrates its potential for evaluating and enhancing language models, paving the way for their integration into physically grounded settings, such as robotic manipulation. Project site: https://newtonreasoning.github.io", "keywords": "physical reasoning;robotics;object-centric;evaluation;benchmark;reasoning", "primary_area": "", "supplementary_material": "", "author": "Yi Ru Wang;Jiafei Duan;Dieter Fox;Siddhartha Srinivasa", "authorids": "~Yi_Ru_Wang1;~Jiafei_Duan1;~Dieter_Fox1;~Siddhartha_Srinivasa1", "gender": ";M;M;M", "homepage": ";https://duanjiafei.com/;https://homes.cs.washington.edu/~fox/;https://goodrobot.ai", "dblp": "302/0208;275/9973.html;f/DieterFox;", "google_scholar": "OTL-u30AAAAJ;d1WCSJIAAAAJ;DqXsbPAAAAAJ;https://scholar.google.com.tw/citations?user=RCi98EAAAAAJ", "or_profile": "~Yi_Ru_Wang1;~Jiafei_Duan1;~Dieter_Fox1;~Siddhartha_Srinivasa1", "aff": "University of Washington;University of Washington;Department of Computer Science;University of Washington", "aff_domain": "washington.edu;uw.edu;cs.washington.edu;washington.edu", "position": "PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwang2023newton,\ntitle={{NEWTON}: Are Large Language Models Capable of Physical Reasoning?},\nauthor={Yi Ru Wang and Jiafei Duan and Dieter Fox and Siddhartha Srinivasa},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=OhZLO1yunf}\n}", "github": "", "project": "", "reviewers": "G7Xo;EC6n;kDhf", "site": "https://openreview.net/forum?id=OhZLO1yunf", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "3;3;4", "reproducibility": "4;2;5", "correctness": "3;2;5", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "yi-ru-helen-wang/;jiafei-duan-a69b11112/;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Washington;Unknown Institution", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www.washington.edu;", "aff_unique_abbr": "UW;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States;" }, { "id": "OkQD6RMUK5", "title": "Label Words are Anchors: An Information Flow Perspective for Understanding In-Context Learning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In-context learning (ICL) emerges as a promising capability of large language models (LLMs) by providing them with demonstration examples to perform diverse tasks. However, the underlying mechanism of how LLMs learn from the provided context remains under-explored. In this paper, we investigate the working mechanism of ICL through an information flow lens. Our findings reveal that label words in the demonstration examples function as anchors: (1) semantic information aggregates into label word representations during the shallow computation layers' processing; (2) the consolidated information in label words serves as a reference for LLMs' final predictions. Based on these insights, we introduce an anchor re-weighting method to improve ICL performance, a demonstration compression technique to expedite inference, and an analysis framework for diagnosing ICL errors in GPT2-XL. The promising applications of our findings again validate the uncovered ICL working mechanism and pave the way for future studies.", "keywords": "in-context learning;label words;anchors;large language models", "primary_area": "", "supplementary_material": "", "author": "Lean Wang;Lei Li;Damai Dai;Deli Chen;Hao Zhou;Fandong Meng;Jie Zhou;Xu Sun", "authorids": "~Lean_Wang1;~Lei_Li14;~Damai_Dai1;~Deli_Chen1;~Hao_Zhou8;~Fandong_Meng3;~Jie_Zhou8;~Xu_Sun1", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://leanwang326.github.io/.github.io/;;;;http://fandongmeng.github.io/;;https://xusun.org/;https://lilei-nlp.github.io", "dblp": ";199/2097;50/2637;;117/4056.html;00/5012-16;37/1971-1;13/7007-39", "google_scholar": "jgyRjQQAAAAJ;8b-ysf0NWVoC;8YpGRDcAAAAJ;q3WaozcAAAAJ;sA8U4S0AAAAJ;https://scholar.google.com.hk/citations?user=OijxQCMAAAAJ;https://scholar.google.com/citations?hl=en;MeV4GGsAAAAJ", "or_profile": "~Lean_Wang1;~Damai_Dai1;~Deli_Chen1;~Hao_Zhou8;~Fandong_Meng3;~Jie_Zhou8;~Xu_Sun1;~Tobias_Lee1", "aff": "Peking University;Peking University;WeChat AI;Tencent;WeChat AI, Tencent Inc.;WeChat AI, Tencent Inc.;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;tencent.com;tencent.com;tencent.com;tencent.com;pku.edu.cn;pku.edu.cn", "position": "Undergrad student;PhD student;Researcher;Researcher;Principal Researcher;Principal Researcher;Associate Professor;MS student", "bibtex": "@inproceedings{\nwang2023label,\ntitle={Label Words are Anchors: An Information Flow Perspective for Understanding In-Context Learning},\nauthor={Lean Wang and Lei Li and Damai Dai and Deli Chen and Hao Zhou and Fandong Meng and Jie Zhou and Xu Sun},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=OkQD6RMUK5}\n}", "github": "", "project": "", "reviewers": "ghKz;XFQJ;5cZV", "site": "https://openreview.net/forum?id=OkQD6RMUK5", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;4", "excitement": "4;4;5", "reproducibility": "4;4;5", "correctness": "5;4;5", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.333333333333333, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.666666666666667, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-8158-2377;0000-0002-5899-5165;;0009-0008-6984-5104", "linkedin": ";;;;;;;", "aff_unique_index": "0;0;1;2;2;2;0;0", "aff_unique_norm": "Peking University;WeChat;Tencent", "aff_unique_dep": ";WeChat AI;Tencent Holdings Limited", "aff_unique_url": "http://www.pku.edu.cn;https://www.wechat.com;https://www.tencent.com", "aff_unique_abbr": "Peking U;WeChat AI;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "Ov6OZ2TFKI", "title": "A Video Is Worth 4096 Tokens: Verbalize Story Videos To Understand Them In Zero Shot", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Multimedia content, such as advertisements and story videos, exhibit a rich blend of creativity and multiple modalities. They incorporate elements like text, visuals, audio, and storytelling techniques, employing devices like emotions, symbolism, and slogans to convey\nmeaning. There is a dearth of large annotated training datasets in the multimedia domain hindering the development of supervised learn-\ning models with satisfactory performance for real-world applications. On the other hand, the rise of large language models (LLMs) has\nwitnessed remarkable zero-shot performance in various natural language processing (NLP) tasks, such as emotion classification, question-\nanswering, and topic classification. To leverage such advanced techniques to bridge this performance gap in multimedia understanding, we\npropose verbalizing long videos to generate their descriptions in natural language, followed by performing video-understanding tasks on\nthe generated story as opposed to the original video. Through extensive experiments on fifteen video-understanding tasks, we demonstrate that our method, despite being zero-shot, achieves significantly better results than supervised baselines for video understanding. Furthermore, to alleviate a lack of story understanding benchmarks, we publicly release the first dataset on a crucial task in computational social science on persuasion strategy identification.", "keywords": "video understanding;large language models;persuasion strategies;zero-shot;long video understanding", "primary_area": "", "supplementary_material": "", "author": "Aanisha Bhattacharyya;Yaman K Singla;Balaji Krishnamurthy;Rajiv Ratn Shah;Changyou Chen", "authorids": "~Aanisha_Bhattacharyya2;~Yaman_K_Singla1;~Balaji_Krishnamurthy1;~Rajiv_Ratn_Shah1;~Changyou_Chen1", "gender": "F;M;M;M;M", "homepage": "https://www.linkedin.com/in/aanisha-bhattacharyya/;;https://iiitd.ac.in/rajivratn;https://www.cse.buffalo.edu/~changyou/;https://sites.google.com/view/yaman-kumar/", "dblp": ";79/1076;134/3502;65/2802;239/5601", "google_scholar": "PkCeGdoAAAAJ;n8iUBg8AAAAJ;https://scholar.google.com.sg/citations?hl=en;LtEcKBcAAAAJ;https://scholar.google.co.in/citations?hl=en", "or_profile": "~Aanisha_Bhattacharyya2;~Balaji_Krishnamurthy1;~Rajiv_Ratn_Shah1;~Changyou_Chen1;~Yaman_Kumar1", "aff": "Adobe Systems;Adobe Systems;Indraprastha Institute of Information Technology, Delhi;State University of New York, Buffalo;Adobe ", "aff_domain": "adobe.com;adobe.com;iiitd.ac.in;buffalo.edu;adobe.com", "position": "Researcher;Principal Scientist;Assistant Professor;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nbhattacharyya2023a,\ntitle={A Video Is Worth 4096 Tokens: Verbalize Story Videos To Understand Them In Zero Shot},\nauthor={Aanisha Bhattacharyya and Yaman K Singla and Balaji Krishnamurthy and Rajiv Ratn Shah and Changyou Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Ov6OZ2TFKI}\n}", "github": "", "project": "", "reviewers": "Rk8R;5637;Jhzm", "site": "https://openreview.net/forum?id=Ov6OZ2TFKI", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;4", "excitement": "3;4;4", "reproducibility": "3;4;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-0366-2427;;;0000-0001-7880-8219", "linkedin": "aanisha-bhattacharyya/;balaji-krishnamurthy-4241695/;;;yaman-kumar/", "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Adobe;Indraprastha Institute of Information Technology;State University of New York at Buffalo", "aff_unique_dep": "Adobe Systems Incorporated;;", "aff_unique_url": "https://www.adobe.com;http://www.iiitd.ac.in;https://www.buffalo.edu", "aff_unique_abbr": "Adobe;IIIT-D;SUNY Buffalo", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Delhi;Buffalo", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;India" }, { "id": "OwWIl6gb1z", "title": "CRoW: Benchmarking Commonsense Reasoning in Real-World Tasks", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recent efforts in natural language processing (NLP) commonsense reasoning research have yielded a considerable number of new datasets and benchmarks. However, most of these datasets formulate commonsense reasoning challenges in artificial scenarios that are not reflective of the tasks which real-world NLP systems are designed to solve. In this work, we present CRoW, a manually-curated, multi-task benchmark that evaluates the ability of models to apply commonsense reasoning in the context of six real-world NLP tasks. CRoW is constructed using a multi-stage data collection pipeline that rewrites examples from existing datasets using commonsense-violating perturbations. We use CRoW to study how NLP systems perform across different dimensions of commonsense knowledge, such as physical, temporal, and social reasoning. We find a significant performance gap when NLP systems are evaluated on CRoW compared to humans, showcasing that commonsense reasoning is far from being solved in real-world task settings. We make our dataset and leaderboard available to the research community.", "keywords": "commonsense reasoning;benchmark;real-world task", "primary_area": "", "supplementary_material": "", "author": "Mete Ismayilzada;Debjit Paul;Syrielle Montariol;Mor Geva;Antoine Bosselut", "authorids": "~Mete_Ismayilzada1;~Debjit_Paul2;~Syrielle_Montariol1;~Mor_Geva1;~Antoine_Bosselut1", "gender": "M;M;F;F;M", "homepage": "https://mete.is;https://debjitpaul.github.io/;https://smontariol.github.io/;https://mega002.github.io/;https://atcbosselut.github.io/", "dblp": "334/0281.html;238/1467.html;245/2618;203/9159;184/3742", "google_scholar": ";https://scholar.google.de/citations?user=jJ8MjZMAAAAJ;oM63nTMAAAAJ;https://scholar.google.co.il/citations?user=GxpQbSkAAAAJ;XD9hkJwAAAAJ", "or_profile": "~Mete_Ismayilzada1;~Debjit_Paul2;~Syrielle_Montariol1;~Mor_Geva1;~Antoine_Bosselut1", "aff": "EPFL - EPF Lausanne;EPFL - EPF Lausanne;EPFL - EPF Lausanne;Google DeepMind;Swiss Federal Institute of Technology Lausanne", "aff_domain": "epfl.ch;epfl.ch;epfl.ch;google.com;epfl.ch", "position": "MS student;Postdoc;Postdoc;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nismayilzada2023crow,\ntitle={{CR}oW: Benchmarking Commonsense Reasoning in Real-World Tasks},\nauthor={Mete Ismayilzada and Debjit Paul and Syrielle Montariol and Mor Geva and Antoine Bosselut},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=OwWIl6gb1z}\n}", "github": "", "project": "", "reviewers": "H1md;x28y;1q4f", "site": "https://openreview.net/forum?id=OwWIl6gb1z", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;4;3", "excitement": "5;4;4", "reproducibility": "3;4;0", "correctness": "3;3;3", "rating_avg": 2.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.333333333333333, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0004-4029-6616;;;", "linkedin": "mismayilzada/;debjit-paul/;;morgeva/;", "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "EPFL;Google;Swiss Federal Institute of Technology Lausanne", "aff_unique_dep": ";Google DeepMind;", "aff_unique_url": "https://www.epfl.ch;https://deepmind.com;https://www.epfl.ch", "aff_unique_abbr": "EPFL;DeepMind;EPFL", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Lausanne;", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Switzerland;United Kingdom" }, { "id": "OwxjgsX68V", "title": "CASSI: Contextual and Semantic Structure-based Interpolation Augmentation for Low-Resource NER", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "While text augmentation methods have been successful in improving performance in the low-resource setting, they suffer from annotation corruption for a token-level task like NER. Moreover, existing methods cannot reliably add context diversity to the dataset, which has been shown to be crucial for low-resource NER. In this work, we propose Contextual and Semantic Structure-based Interpolation (CASSI), a novel augmentation scheme that generates high-quality contextually diverse augmentations while avoiding annotation corruption by structurally combining a pair of semantically similar sentences to generate a new sentence while maintaining semantic correctness and fluency. To accomplish this, we generate candidate augmentations by performing multiple dependency parsing-based exchanges in a pair of semantically similar sentences that are filtered via scoring with a pretrained Masked Language Model and a metric to promote specificity. Experiments show that CASSI consistently outperforms existing methods at multiple low resource levels, in multiple languages, and for noisy and clean text.", "keywords": "Named Entity Recognition;Text Augmentation;Low-Resource;Structure-Based Augmentation;Language Model;Context Diversity", "primary_area": "", "supplementary_material": "", "author": "Tanmay Surana;Thi-Nga Ho;KYAW ZIN TUN;EngSiong Chng", "authorids": "~Tanmay_Surana1;~Thi-Nga_Ho1;~KYAW_ZIN_TUN1;~EngSiong_Chng1", "gender": ";F;M;M", "homepage": ";https://www.linkedin.com/in/ngaho;;https://personal.ntu.edu.sg/aseschng/intro1.html", "dblp": "329/5302;;226/1900;c/ChngEngSiong", "google_scholar": ";;;https://scholar.google.com.tw/citations?user=FJodrCcAAAAJ", "or_profile": "~Tanmay_Surana1;~Thi-Nga_Ho1;~KYAW_ZIN_TUN1;~EngSiong_Chng1", "aff": "Nanyang Technological University;Nanyang Technological University;National Technological University;Nanyang Technological University", "aff_domain": "ntu.edu.sg;ntu.edu.sg;ntu.edu;ntu.edu.sg", "position": "MS student;Researcher;Researcher;Associate Professor", "bibtex": "@inproceedings{\nsurana2023cassi,\ntitle={{CASSI}: Contextual and Semantic Structure-based Interpolation Augmentation for Low-Resource {NER}},\nauthor={Tanmay Surana and Thi-Nga Ho and KYAW ZIN TUN and EngSiong Chng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=OwxjgsX68V}\n}", "github": "", "project": "", "reviewers": "sAiY;vXhU;JhLS", "site": "https://openreview.net/forum?id=OwxjgsX68V", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "4;3;3", "reproducibility": "4;4;5", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "tanmay-surana-118a79192?original_referer=;;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Nanyang Technological University;National Technological University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;https://www.ntu.edu", "aff_unique_abbr": "NTU;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Singapore;United States" }, { "id": "Ox0OoyLass", "title": "How Well Do Text Embedding Models Understand Syntax?", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Text embedding models have significantly contributed to advancements in natural language processing by adeptly capturing semantic properties of textual data. However, the ability of these models to generalize across a wide range of syntactic contexts remains under-explored. In this paper, we first develop an evaluation set, named SR, to scrutinize the capability for syntax understanding of text embedding models from two crucial syntactic aspects: Structural heuristics, and Relational understanding among concepts, as revealed by the performance gaps in previous studies. Our findings reveal that existing text embedding models have not sufficiently addressed these syntactic understanding challenges, and such ineffectiveness becomes even more apparent when evaluated against existing benchmark datasets. Furthermore, we conduct rigorous analysis to unearth factors that lead to such limitations and examine why previous evaluations fail to detect such ineffectiveness. Lastly, we propose strategies to augment the generalization ability of text embedding models in diverse syntactic scenarios. This study serves to highlight the hurdles associated with syntactic generalization and provides pragmatic guidance for boosting model performance across varied syntactic contexts.", "keywords": "sentence embedding;compositional understanding", "primary_area": "", "supplementary_material": "", "author": "Yan Zhang;Zhaopeng Feng;Zhiyang Teng;Zuozhu Liu;Haizhou Li", "authorids": "~Yan_Zhang12;~Zhaopeng_Feng1;~Zhiyang_Teng1;~Zuozhu_Liu1;~Haizhou_Li3", "gender": "M;M;M;M;M", "homepage": ";;https://zeeeyang.github.io;https://person.zju.edu.cn/en/lzz;https://colips.org/~eleliha/", "dblp": ";;136/8660;173/9297;36/4118", "google_scholar": "-oIMVnUAAAAJ;;9wOJrf8AAAAJ;h602wLIAAAAJ;https://scholar.google.com.sg/citations?user=z8_x7C8AAAAJ", "or_profile": "~Yan_Zhang12;~Zhaopeng_Feng1;~Zhiyang_Teng1;~Zuozhu_Liu1;~Haizhou_Li3", "aff": "National University of Singapore;Harbin Institute of Technology (Shenzhen);Nanyang Technological University;Zhejiang University;National University of Singapore", "aff_domain": "nus.edu.sg;hit.edu.cn;ntu.edu.sg;zju.edu.cn;nus.edu.sg", "position": "Researcher;Undergrad student;Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2023how,\ntitle={How Well Do Text Embedding Models Understand Syntax?},\nauthor={Yan Zhang and Zhaopeng Feng and Zhiyang Teng and Zuozhu Liu and Haizhou Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Ox0OoyLass}\n}", "github": "", "project": "", "reviewers": "neEv;H1cQ;QKSJ", "site": "https://openreview.net/forum?id=Ox0OoyLass", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;3;3", "reproducibility": "4;3;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-6396-3184;;0000-0002-7816-502X;0000-0001-9158-9401", "linkedin": "zhang-yan-1001940/;;;;haizhou-li-4ba74b6/", "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "National University of Singapore;Harbin Institute of Technology;Nanyang Technological University;Zhejiang University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.nus.edu.sg;http://en.hhit.edu.cn/;https://www.ntu.edu.sg;https://www.zju.edu.cn", "aff_unique_abbr": "NUS;HIT;NTU;ZJU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;1;0;1;0", "aff_country_unique": "Singapore;China" }, { "id": "OxoP1qFotz", "title": "Quality > Quantity: Synthetic Corpora from Foundation Models for Closed-Domain Extractive Question Answering", "track": "main", "status": "Reject", "tldr": "", "abstract": "Domain adaptation, the process of training a model in one domain and applying it to another, has been extensively explored in machine learning. While training a domain-specific foundation model (FM) from scratch is an option, recent methods have focused on adapting pre-trained FMs for domain-specific tasks. However, our experiments reveal that either approach does not consistently achieve state-of-the-art (SOTA) results in the target domain. In this work, we study extractive question answering within closed domains and introduce the concept of targeted pre-training. This involves determining and generating relevant data to further pre-train our models, as opposed to the conventional philosophy of utilizing domain-specific FMs trained on a wide range of data. Our proposed framework uses Galactica to generate synthetic, ``targeted'' corpora that align with specific writing styles and topics, such as research papers and radiology reports. This process can be viewed as a form of knowledge distillation. We apply our method to two biomedical extractive question answering datasets, COVID-QA and RadQA, achieving a new benchmark on the former and demonstrating overall improvements on the latter. Code available upon publication.", "keywords": "Closed Domain Question Answering;Prompt Engineering;Foundational Models;Domain Adaptation", "primary_area": "", "supplementary_material": "", "author": "Saptarshi Sengupta;Connor Heaton;Shreya Ghosh;Preslav Nakov;Prasenjit Mitra", "authorids": "~Saptarshi_Sengupta1;~Connor_Heaton1;~Shreya_Ghosh3;~Preslav_Nakov2;~Prasenjit_Mitra1", "gender": "M;;F;M;M", "homepage": ";;;https://mbzuai.ac.ae/study/faculty/preslav-nakov/;http://www.personal.psu.edu/pum10/", "dblp": "211/7768;274/0769.html;;https://dblp.uni-trier.de/pid/19/1947;19/3308", "google_scholar": "L-fgN8MAAAAJ;;https://scholar.google.co.in/citations?user=a5OKo7wAAAAJ;DfXsKZ4AAAAJ;8PbgiPkAAAAJ", "or_profile": "~Saptarshi_Sengupta1;~Connor_Heaton1;~Shreya_Ghosh3;~Preslav_Nakov2;~Prasenjit_Mitra1", "aff": "Pennsylvania State University;Pennsylvania State University;;Mohamed bin Zayed University of Artificial Intelligence;Pennsylvania State University", "aff_domain": "psu.edu;psu.edu;;mbzuai.ac.ae;psu.edu", "position": "PhD student;PhD student;;Full Professor;Full Professor", "bibtex": "@misc{\nsengupta2023quality,\ntitle={Quality \\ensuremath{>} Quantity: Synthetic Corpora from Foundation Models for Closed-Domain Extractive Question Answering},\nauthor={Saptarshi Sengupta and Connor Heaton and Shreya Ghosh and Preslav Nakov and Prasenjit Mitra},\nyear={2023},\nurl={https://openreview.net/forum?id=OxoP1qFotz}\n}", "github": "", "project": "", "reviewers": "grqj;KEWd;TY4M;KF6E", "site": "https://openreview.net/forum?id=OxoP1qFotz", "pdf_size": 0, "rating": "2;2;2;2", "confidence": "3;3;4;4", "excitement": "4;2;4;3", "reproducibility": "3;3;4;4", "correctness": "3;3;4;3", "rating_avg": 2.0, "confidence_avg": 3.5, "excitement_avg": 3.25, "reproducibility_avg": 3.5, "correctness_avg": 3.25, "replies_avg": 12, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-3600-1510;", "linkedin": ";;;preslavnakov/;prasenjit-mitra-962471/", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Pennsylvania State University;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.psu.edu;https://mbzuai.ac.ae", "aff_unique_abbr": "PSU;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;United Arab Emirates" }, { "id": "P04rLpllH7", "title": "A Black-Box Attack on Code Models via Representation Nearest Neighbor Search", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Existing methods for generating adversarial code examples face several challenges: limted availability of substitute variables, high verification costs for these substitutes, and the creation of adversarial samples with noticeable perturbations. To address these concerns, our proposed approach, RNNS, uses a search seed based on historical attacks to find potential adversarial substitutes. Rather than directly using the discrete substitutes, they are mapped to a continuous vector space using a pre-trained variable name encoder. Based on the vector representation, RNNS predicts and selects better substitutes for attacks. We evaluated the performance of RNNS across\nsix coding tasks encompassing three programming languages: Java, Python, and C. We employed three pre-trained code models (CodeBERT, GraphCodeBERT, and CodeT5) that resulted in a cumulative of 18 victim models. The results demonstrate that RNNS outperforms baselines in terms of ASR and QT. Furthermore, the perturbation of adversarial examples introduced by RNNS is smaller compared to the baselines in terms of the number of replaced variables and the change in variable length. Lastly, our experiments indicate that RNNS is efficient in attacking defended models and can be employed for adversarial training.", "keywords": "black box attacks;code models;robustness;code adversarial example", "primary_area": "", "supplementary_material": "", "author": "Jie Zhang;Wei Ma;Qiang Hu;Shangqing Liu;Xiaofei Xie;YVES LE TRAON;Yang Liu", "authorids": "~Jie_Zhang29;~Wei_Ma1;~Qiang_Hu3;~Shangqing_Liu1;~Xiaofei_Xie2;~YVES_LE_TRAON1;~Yang_Liu36", "gender": ";M;;;M;M;M", "homepage": "https://superzhang1984.github.io;https://marvinmw.github.io/weima/;https://wellido.github.io/;https://shangqing-liu.github.io/;http://xiaofeixie.bitbucket.io/;https://wwwfr.uni.lu/snt/people/yves_le_traon;https://personal.ntu.edu.sg/yangliu/", "dblp": ";;;207/8653;127/0713;95/5206;51/3710-3", "google_scholar": ";ZubTNs0AAAAJ;UTWWmz4AAAAJ;Rl0-phkAAAAJ;FfcZfJgAAAAJ;DmGlmNEAAAAJ;https://scholar.google.com.sg/citations?hl=en", "or_profile": "~Jie_Zhang29;~Wei_Ma1;~Qiang_Hu3;~Shangqing_Liu1;~Xiaofei_Xie2;~YVES_LE_TRAON1;~Yang_Liu36", "aff": ";Nanyang Technological University;University of Luxembourg;Nanyang Technological University;Singapore Management University;;Nanyang Technological University", "aff_domain": ";ntu.edu.sg;uni.lu;ntu.edu.sg;smu.edu.sg;;ntu.edu.sg", "position": ";Researcher;PhD student;PhD student;Assistant Professor;;Full Professor", "bibtex": "@inproceedings{\nzhang2023a,\ntitle={A Black-Box Attack on Code Models via Representation Nearest Neighbor Search},\nauthor={Jie Zhang and Wei Ma and Qiang Hu and Shangqing Liu and Xiaofei Xie and YVES LE TRAON and Yang Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=P04rLpllH7}\n}", "github": "", "project": "", "reviewers": "fokm;N9F5;Wyoa", "site": "https://openreview.net/forum?id=P04rLpllH7", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;4;3", "excitement": "3;3;3", "reproducibility": "3;3;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-1288-6502;;0000-0001-7300-9215", "linkedin": ";;;;;;", "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Nanyang Technological University;University of Luxembourg;Singapore Management University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ntu.edu.sg;https://wwwen.uniluxembourg.lu;https://www.smu.edu.sg", "aff_unique_abbr": "NTU;Uni Lu;SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "Singapore;Luxembourg" }, { "id": "P2jDML1Ub6", "title": "Are Personalized Stochastic Parrots More Dangerous? Evaluating Persona Biases in Dialogue Systems", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recent advancements in Large Language Models empower them to follow freeform instructions, including imitating generic or specific demographic personas in conversations. We define generic personas to represent demographic groups, such as \u201can Asian person\u201d, whereas specific personas may take the form of specific popular Asian names like \u201cYumi\u201d. While the adoption of personas enriches user experiences by making dialogue systems more engaging and approachable, it also casts a shadow of potential risk by exacerbating social biases within model responses, thereby causing societal harm through interactions with users. In this paper, we systematically study \u201cpersona biases\u201d, which we define to be the sensitivity of dialogue models\u2019 harmful behaviors contingent upon the personas they adopt. We categorize persona biases into biases in harmful expression and harmful agreement, and establish a comprehensive evaluation framework to measure persona biases in five aspects: Offensiveness, Toxic Continuation, Regard, Stereotype Agreement, and Toxic Agreement. Additionally, we propose to investigate persona biases by experimenting with UNIVERSALPERSONA, a systematically constructed persona dataset encompassing various types of both generic and specific model personas. Through benchmarking on four different models- including Blender, ChatGPT, Alpaca, and Vicuna- our study uncovers significant persona biases in dialogue systems. Our findings also underscore the pressing need to revisit the use of personas in dialogue agents to ensure safe application.", "keywords": "dialogue model persona;fairness;evaluation", "primary_area": "", "supplementary_material": "", "author": "Yixin Wan;Jieyu Zhao;Aman Chadha;Nanyun Peng;Kai-Wei Chang", "authorids": "~Yixin_Wan1;~Jieyu_Zhao1;~Aman_Chadha1;~Nanyun_Peng1;~Kai-Wei_Chang1", "gender": "F;F;M;F;M", "homepage": "https://scholar.google.com/citations?user=hZPIICQAAAAJ&hl=en;http://jyzhao.net/;https://aman.ai;https://violetpeng.github.io/;http://kwchang.net", "dblp": "320/5376;59/2379-1;55/10360;117/4036;18/2428", "google_scholar": "hZPIICQAAAAJ;9VaGBCQAAAAJ;gPGQuBQAAAAJ;XxRXvX0AAAAJ;fqDBtzYAAAAJ", "or_profile": "~Yixin_Wan1;~Jieyu_Zhao1;~Aman_Chadha1;~Nanyun_Peng1;~Kai-Wei_Chang1", "aff": "University of California, Los Angeles;University of Maryland, College Park;Amazon Web Services;University of California, Los Angeles;Amazon", "aff_domain": "ucla.edu;umd.edu;amazon.com;ucla.edu;amazon.com", "position": "PhD student;Postdoc;GenAI Science Manager;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nwan2023are,\ntitle={Are Personalized Stochastic Parrots More Dangerous? Evaluating Persona Biases in Dialogue Systems},\nauthor={Yixin Wan and Jieyu Zhao and Aman Chadha and Nanyun Peng and Kai-Wei Chang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=P2jDML1Ub6}\n}", "github": "", "project": "", "reviewers": "BKCo;8Dug;YxAC", "site": "https://openreview.net/forum?id=P2jDML1Ub6", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;4", "excitement": "4;3;3", "reproducibility": "4;3;3", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-6621-9003;;0000-0001-5365-0072", "linkedin": "elaine-yixin-wan-8032b8136/;;https://linkedin.aman.ai/;;kai-wei-chang-41239040", "aff_unique_index": "0;1;2;0;2", "aff_unique_norm": "University of California, Los Angeles;University of Maryland;Amazon", "aff_unique_dep": ";;Amazon Web Services", "aff_unique_url": "https://www.ucla.edu;https://www/umd.edu;https://aws.amazon.com", "aff_unique_abbr": "UCLA;UMD;AWS", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Los Angeles;College Park;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "P5hYS77k10", "title": "Quantifying the redundancy between prosody and text", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Prosody---the suprasegmental component of speech, including pitch, loudness, and tempo---carries critical aspects of meaning.\nHowever, the relationship between the information conveyed by prosody vs. by the words themselves remains poorly understood. We use large language models (LLMs) to estimate how much information is redundant between prosody and the words themselves. Using a large spoken corpus of English audiobooks, we extract prosodic features aligned to individual words and test how well they can be predicted from LLM embeddings, compared to non-contextual word embeddings. We find a high degree of redundancy between the information carried by the words and prosodic information across several prosodic features, including intensity, duration, pauses, and pitch contours. \nFurthermore, a word's prosodic information is redundant with both the word itself and the context preceding as well as following it.\nStill, we observe that prosodic features can not be fully predicted from text, suggesting that prosody carries information above and beyond the words. \nAlong with this paper, we release a general-purpose data processing pipeline for quantifying the relationship between linguistic information and extra-linguistic features.", "keywords": "Prosody;Psycholinguistics;Language Models;Information Theory", "primary_area": "", "supplementary_material": "", "author": "Lukas Wolf;Tiago Pimentel;Evelina Fedorenko;Ryan Cotterell;Alex Warstadt;Ethan Wilcox;Tamar I Regev", "authorids": "~Lukas_Wolf1;~Tiago_Pimentel1;~Evelina_Fedorenko1;~Ryan_Cotterell1;~Alex_Warstadt1;~Ethan_Wilcox1;~Tamar_I_Regev1", "gender": "M;M;F;M;;F;Not Specified", "homepage": "https://lu-wo.github.io;https://tpimentelms.github.io/;http://evlab.mit.edu;https://alexwarstadt.github.io;https://wilcoxeg.github.io/;https://www.tamarz.website/;https://rycolab.io/", "dblp": "305/7412;203/8292;;220/5281;227/3505;;146/4361.html", "google_scholar": "_FvMBFIAAAAJ;XjZ8NRsAAAAJ;1CgET20AAAAJ;QJNg79AAAAAJ;5jzLBBwAAAAJ;https://scholar.google.co.il/citations?user=EsvbTeYAAAAJ;DexOqtoAAAAJ", "or_profile": "~Lukas_Wolf1;~Tiago_Pimentel1;~Evelina_Fedorenko1;~Alex_Warstadt1;~Ethan_Wilcox1;~Tamar_I_Regev1;~Ryan_D_Cotterell1", "aff": "Swiss Federal Institute of Technology ;University of Cambridge;Massachusetts Institute of Technology;ETHZ - ETH Zurich;Georgetown University;Massachusetts Institute of Technology;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;cam.ac.uk;mit.edu;ethz.ch;georgetown.edu;mit.edu;ethz.ch", "position": "MS student;PhD student;Associate Professor;Postdoc;Assistant Professor;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\nwolf2023quantifying,\ntitle={Quantifying the redundancy between prosody and text},\nauthor={Lukas Wolf and Tiago Pimentel and Evelina Fedorenko and Ryan Cotterell and Alex Warstadt and Ethan Wilcox and Tamar I Regev},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=P5hYS77k10}\n}", "github": "", "project": "", "reviewers": "UNb6;AJ81;UEwE", "site": "https://openreview.net/forum?id=P5hYS77k10", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;4;5", "reproducibility": "5;4;5", "correctness": "5;4;5", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.333333333333333, "reproducibility_avg": 4.666666666666667, "correctness_avg": 4.666666666666667, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-5128-9890;0000-0003-0639-0890;", "linkedin": "lukas-wolf-14a992143/;;;;;;", "aff_unique_index": "0;1;2;3;4;2;0", "aff_unique_norm": "Swiss Federal Institute of Technology;University of Cambridge;Massachusetts Institute of Technology;ETH Zurich;Georgetown University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.ethz.ch;https://www.cam.ac.uk;https://web.mit.edu;https://www.ethz.ch;https://www.georgetown.edu", "aff_unique_abbr": "ETH Zurich;Cambridge;MIT;ETHZ;GU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;2;0;2;2;0", "aff_country_unique": "Switzerland;United Kingdom;United States" }, { "id": "P9V2jcotAF", "title": "Beyond Shared Vocabulary: Increasing Representational Word Similarities across Languages for Multilingual Machine Translation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Using a shared vocabulary is common practice in Multilingual Neural Machine Translation (MNMT). In addition to its simple design, shared tokens play an important role in positive knowledge transfer, which manifests naturally when the shared tokens refer to similar meanings across languages. However, when words overlap is small, e.g., using different writing systems, transfer is inhibited. In this paper, we propose a re-parameterized method for building embeddings to alleviate this problem. More specifically, we define word-level information transfer pathways via word equivalence classes and rely on graph networks to fuse word embeddings across languages. Our experiments demonstrate the advantages of our approach: 1) the semantics of embeddings are better aligned across languages, 2) our method achieves evident BLEU improvements on high- and low-resource MNMT, and 3) only less than 1.0\\% additional trainable parameters are required with a limited increase in computational costs, while the inference time is identical to baselines.", "keywords": "Multilingual Machine Translation;Shared Vocabulary", "primary_area": "", "supplementary_material": "", "author": "Di Wu;Christof Monz", "authorids": "~Di_Wu8;~Christof_Monz1", "gender": "M;M", "homepage": "https://moore3930.github.io/;https://staff.fnwi.uva.nl/c.monz/", "dblp": "https://dblp.org/rec/conf/emnlp/WuDLX20;m/ChristofMonz", "google_scholar": "OyhaeJQAAAAJ;0r3PWLQAAAAJ", "or_profile": "~Di_Wu8;~Christof_Monz1", "aff": "University of Amsterdam;University of Amsterdam, University of Amsterdam", "aff_domain": "uva.nl;ivi.uva.nl", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nwu2023beyond,\ntitle={Beyond Shared Vocabulary: Increasing Representational Word Similarities across Languages for Multilingual Machine Translation},\nauthor={Di Wu and Christof Monz},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=P9V2jcotAF}\n}", "github": "", "project": "", "reviewers": "hsEL;CxMx;4Nxw;8KXe", "site": "https://openreview.net/forum?id=P9V2jcotAF", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "4;4;3;4", "excitement": "3;3;4;4", "reproducibility": "4;4;4;5", "correctness": "4;3;4;4", "rating_avg": 5.0, "confidence_avg": 3.75, "excitement_avg": 3.5, "reproducibility_avg": 4.25, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "University of Amsterdam", "aff_unique_dep": "", "aff_unique_url": "https://www.uva.nl", "aff_unique_abbr": "UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Netherlands" }, { "id": "PAByut8fMZ", "title": "A Quality-based Syntactic Template Retriever for Syntactically-Controlled Paraphrase Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Existing syntactically-controlled paraphrase generation (SPG) models perform promisingly with human-annotated or well-chosen syntactic templates. However, the difficulty of obtaining such templates actually hinders the practical application of SPG models. For one thing, the prohibitive cost makes it unfeasible to manually design decent templates for every source sentence. For another, the templates automatically retrieved by current heuristic methods are usually unreliable for SPG models to generate qualified paraphrases. To escape this dilemma, we propose a novel Quality-based Syntactic Template Retriever (QSTR) to retrieve templates based on the quality of the to-be-generated paraphrases. Furthermore, for situations requiring multiple paraphrases for each source sentence, we design a Diverse Templates Search (DTS) algorithm, which can enhance the diversity between paraphrases without sacrificing quality. Experiments demonstrate that QSTR can significantly surpass existing retrieval methods in generating high-quality paraphrases and even perform comparably with human-annotated templates in terms of reference-free metrics. Additionally, human evaluation and the performance on downstream tasks using our generated paraphrases for data augmentation showcase the potential of our QSTR and DTS algorithm in practical scenarios.", "keywords": "Paraphrase generation;Syntactic template retrievers;Mutual diversity", "primary_area": "", "supplementary_material": "", "author": "Xue Zhang;Songming Zhang;Yunlong Liang;Yufeng Chen;Jian Liu;Wenjuan Han;Jinan Xu", "authorids": "~Xue_Zhang3;~Songming_Zhang1;~Yunlong_Liang1;~Yufeng_Chen1;~Jian_Liu7;~Wenjuan_Han1;~Jinan_Xu1", "gender": ";M;M;F;M;F;M", "homepage": ";;;;http://jianliu-ml.github.io;https://scholar.google.com/citations?user=rfVLLfAAAAAJ;http://faculty.bjtu.edu.cn/8300/", "dblp": ";315/4171;177/5130.html;64/5715;;188/9071;67/3124", "google_scholar": "rh-QHwQAAAAJ;u_bYOuYAAAAJ;P5iDDGIAAAAJ;;https://scholar.google.de/citations?hl=en;rfVLLfAAAAAJ;wMuW0W4AAAAJ", "or_profile": "~Xue_Zhang3;~Songming_Zhang1;~Yunlong_Liang1;~Yufeng_Chen1;~Jian_Liu7;~Wenjuan_Han1;~Xu_Jinan1", "aff": "Beijing Jiaotong University;Beijing Jiaotong University;Beijing Jiaotong University;Beijing jiaotong univercity;Beijing Jiaotong University;Beijing Jiaotong University;Beijing Jiaotong University", "aff_domain": "bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn", "position": "MS student;PhD student;PhD student;Assistant Professor;Lecturer;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2023a,\ntitle={A Quality-based Syntactic Template Retriever for Syntactically-Controlled Paraphrase Generation},\nauthor={Xue Zhang and Songming Zhang and Yunlong Liang and Yufeng Chen and Jian Liu and Wenjuan Han and Jinan Xu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=PAByut8fMZ}\n}", "github": "", "project": "", "reviewers": "7mbF;TgL2;h28a", "site": "https://openreview.net/forum?id=PAByut8fMZ", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;4;5", "excitement": "3;4;4", "reproducibility": "3;4;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 4.666666666666667, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-0409-430X;0000-0003-2311-7642;;;0000-0002-2327-0842;", "linkedin": "%E9%9B%AA-%E5%BC%A0-a95045226/;;;;;;jinan-xu-3544b137/", "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Beijing Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "http://www.njtu.edu.cn/en", "aff_unique_abbr": "BJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "PBvSGqYCSa", "title": "Bridging Background Knowledge Gaps in Translation with Automatic Explicitation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Translations help people understand content written in another language. However, even correct literal translations do not fulfill that goal when people lack the necessary background to understand them. Professional translators incorporate explicitations to explain the missing context by considering cultural differences between source and target audiences. Despite its potential to help users, NLP research on explicitation is limited because of the dearth of adequate evaluation methods. This work introduces techniques for automatically generating explicitations, motivated by WikiExpl: a dataset that we collect from Wikipedia and annotate with human translators. The resulting explicitations are useful as they help answer questions more accurately in a multilingual question answering framework.", "keywords": "Explicitation;translation;cross\u2011cultural NLP;pragmatic explicitation;multi-cultural NLP;explanatory translation", "primary_area": "", "supplementary_material": "", "author": "HyoJung Han;Jordan Lee Boyd-Graber;Marine Carpuat", "authorids": "~HyoJung_Han1;~Jordan_Lee_Boyd-Graber1;~Marine_Carpuat1", "gender": "F;M;F", "homepage": "https://h-j-han.github.io/;http://boydgraber.org;http://www.cs.umd.edu/~marine/", "dblp": ";57/5950;71/1827", "google_scholar": "https://scholar.google.co.kr/citations?user=ZjakxhkAAAAJ;BT4XTP4AAAAJ;iPAX6jcAAAAJ", "or_profile": "~HyoJung_Han1;~Jordan_Lee_Boyd-Graber1;~Marine_Carpuat1", "aff": "Department of Computer Science, University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "cs.umd.edu;umd.edu;umd.edu", "position": "PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nhan2023bridging,\ntitle={Bridging Background Knowledge Gaps in Translation with Automatic Explicitation},\nauthor={HyoJung Han and Jordan Lee Boyd-Graber and Marine Carpuat},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=PBvSGqYCSa}\n}", "github": "", "project": "", "reviewers": "CM4F;vd7U;SwKe;5DDw;Se4s", "site": "https://openreview.net/forum?id=PBvSGqYCSa", "pdf_size": 0, "rating": "4;4;4;4;4", "confidence": "3;2;3;3;3", "excitement": "3;4;4;2;4", "reproducibility": "3;4;4;3;3", "correctness": "4;4;4;3;4", "rating_avg": 4.0, "confidence_avg": 2.8, "excitement_avg": 3.4, "reproducibility_avg": 3.4, "correctness_avg": 3.8, "replies_avg": 15, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-7770-4431;", "linkedin": "h-j-han/;jordan-boyd-graber-99a83994;", "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Maryland, College Park;University of Maryland", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www/umd.edu;https://www/umd.edu", "aff_unique_abbr": "UMD;UMD", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "College Park", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "PBwotNgvp3", "title": "Zero-shot Topical Text Classification with LLMs - an Experimental Study", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Topical Text Classification (TTC) is an ancient, yet timely research area in natural language processing, with many practical applications. The recent dramatic advancements in large LMs raise the question of how well these models can perform in this task in a zero-shot scenario.\nHere, we share a first comprehensive study, comparing the zero-shot performance of a variety of LMs over TTC23, a large benchmark collection of 23 publicly available TTC datasets, covering a wide range of domains and styles. In addition, we leverage this new TTC benchmark to create LMs that are specialized in TTC, by fine-tuning these LMs over a subset of the datasets and evaluating their performance over the remaining, held-out datasets. We show that the TTC-specialized LMs obtain the top performance on our benchmark, by a significant margin. Our code and model are made available for the community. We hope that the results presented in this work will serve as a useful guide for practitioners interested in topical text classification.", "keywords": "topic classification;zero-shot classification;text classification;LLMs", "primary_area": "", "supplementary_material": "", "author": "Shai Gretz;Alon Halfon;Ilya Shnayderman;Orith Toledo-Ronen;Artem Spector;Lena Dankin;Yannis Katsis;Ofir Arviv;Yoav Katz;Noam Slonim;Liat Ein-Dor", "authorids": "~Shai_Gretz1;~Alon_Halfon1;~Ilya_Shnayderman1;~Orith_Toledo-Ronen1;~Artem_Spector2;~Lena_Dankin1;~Yannis_Katsis1;~Ofir_Arviv1;~Yoav_Katz1;~Noam_Slonim1;~Liat_Ein-Dor2", "gender": ";;M;F;;;;M;M;M;F", "homepage": ";;;;;;;;https://researcher.watson.ibm.com/researcher/view.php?person=il-KATZ;https://researcher.watson.ibm.com/researcher/view.php?person=il-NOAMS;https://researcher.watson.ibm.com/researcher/view.php?person=il-LIATE", "dblp": ";219/5539;73/4520;00/9879;;;;;40/21;62/7001;78/3923.html", "google_scholar": ";;;0J_zq00AAAAJ;;;;vMC7k0MAAAAJ;EfW-wnAAAAAJ;https://scholar.google.co.il/citations?user=KjvrNGMAAAAJ;V_IZ86YAAAAJ", "or_profile": "~Shai_Gretz1;~Alon_Halfon1;~Ilya_Shnayderman1;~Orith_Toledo-Ronen1;~Artem_Spector2;~Lena_Dankin1;~Yannis_Katsis1;~Ofir_Arviv1;~Yoav_Katz1;~Noam_Slonim1;~Liat_Ein-Dor2", "aff": ";;International Business Machines;International Business Machines;International Business Machines;;;International Business Machines;International Business Machines;International Business Machines;International Business Machines", "aff_domain": ";;ibm.com;ibm.com;ibm.com;;;ibm.com;ibm.com;ibm.com;ibm.com", "position": ";;Researcher;Research Staff Member;Researcher;;;Researcher;IBM;Principal Researcher;Principal Researcher", "bibtex": "@inproceedings{\ngretz2023zeroshot,\ntitle={Zero-shot Topical Text Classification with {LLM}s - an Experimental Study},\nauthor={Shai Gretz and Alon Halfon and Ilya Shnayderman and Orith Toledo-Ronen and Artem Spector and Lena Dankin and Yannis Katsis and Ofir Arviv and Yoav Katz and Noam Slonim and Liat Ein-Dor},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=PBwotNgvp3}\n}", "github": "", "project": "", "reviewers": "aRE1;UZow;PZ6f", "site": "https://openreview.net/forum?id=PBwotNgvp3", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "2;4;2", "reproducibility": "4;4;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 11, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;;;;", "linkedin": ";;ilya-shnayderman-6932502/;orith-toledo-ronen-7336a63/;artemspector/;;;ofir-arviv-0523a8b9/;yoav-katz-0326b74/?originalSubdomain=il;noam-slonim-28a80b63/;https://il.linkedin.com/in/liat-ein-dor-2240215", "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "International Business Machines Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.ibm.com", "aff_unique_abbr": "IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "PCNsizlhRU", "title": "Towards Conceptualization of ``Fair Explanation'': Disparate Impacts of anti-Asian Hate Speech Explanations on Content Moderators", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recent research at the intersection of AI explainability and fairness has focused on how explanations can improve human-plus-AI task performance as assessed by fairness measures. We propose to characterize what constitutes an explanation that is itself \"fair\" -- an explanation that does not adversely impact specific populations. We formulate a novel evaluation method of \"fair explanations\" using not just accuracy and label time, but also psychological impact of explanations on different user groups across many metrics (mental discomfort, stereotype activation, and perceived workload). We apply this method in the context of content moderation of potential hate speech, and its differential impact on Asian vs. non-Asian proxy moderators, across explanation approaches (saliency map and counterfactual explanation). We find that saliency maps generally perform better and show less evidence of disparate impact (group) and individual unfairness than counterfactual explanations.\n\nContent warning: This paper contains examples of hate speech and racially discriminatory language. The authors do not support such content. Please consider your risk of discomfort carefully before continuing reading!", "keywords": "fairness;explainability;human study;hate speech prediction;content moderators;crowdworkers", "primary_area": "", "supplementary_material": "", "author": "Tin Trung Nguyen;Jiannan Xu;Aayushi Roy;Hal Daum\u00e9 III;Marine Carpuat", "authorids": "~Tin_Trung_Nguyen2;~Jiannan_Xu1;~Aayushi_Roy1;~Hal_Daum\u00e9_III1;~Marine_Carpuat1", "gender": "M;M;F;M;F", "homepage": "https://www.cs.umd.edu/people/tintn;https://jiannan-xu.github.io/;;http://hal3.name;http://www.cs.umd.edu/~marine/", "dblp": ";;;77/2856.html;71/1827", "google_scholar": ";;;PbEw81gAAAAJ;iPAX6jcAAAAJ", "or_profile": "~Tin_Trung_Nguyen2;~Jiannan_Xu1;~Aayushi_Roy1;~Hal_Daum\u00e9_III1;~Marine_Carpuat1", "aff": "Department of Computer Science, University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;Microsoft;University of Maryland, College Park", "aff_domain": "cs.umd.edu;umd.edu;umd.edu;microsoft.com;umd.edu", "position": "PhD student;PhD student;MS student;Senior Principle Researcher;Associate Professor", "bibtex": "@inproceedings{\nnguyen2023towards,\ntitle={Towards Conceptualization of ``Fair Explanation'': Disparate Impacts of anti-Asian Hate Speech Explanations on Content Moderators},\nauthor={Tin Trung Nguyen and Jiannan Xu and Aayushi Roy and Hal Daum{\\'e} III and Marine Carpuat},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=PCNsizlhRU}\n}", "github": "", "project": "", "reviewers": "eKeg;YzLM;JNom", "site": "https://openreview.net/forum?id=PCNsizlhRU", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;4", "excitement": "3;4;4", "reproducibility": "2;5;4", "correctness": "2;4;3", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-2807-4936;;", "linkedin": "tin-nguyen-b0b65b188/;jiannan-xu/;;;", "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "University of Maryland, College Park;University of Maryland;Microsoft", "aff_unique_dep": "Department of Computer Science;;Microsoft Corporation", "aff_unique_url": "https://www/umd.edu;https://www/umd.edu;https://www.microsoft.com", "aff_unique_abbr": "UMD;UMD;Microsoft", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "PCyB5LUF4z", "title": "Learning to Follow Object-Centric Image Editing Instructions Faithfully", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Natural language instructions are a powerful interface for editing the outputs of text-to-image diffusion models. However, several challenges need to be addressed: 1) underspecification (the need to model the implicit meaning of instructions) 2) grounding (the need to localize where the edit has to be performed), 3) faithfulness (the need to preserve the elements of the image not affected by the edit instruction). Current approaches focusing on image editing with natural language instructions rely on automatically generated paired data, which, as shown in our investigation, is noisy and sometimes nonsensical, exacerbating the above issues. Building on recent advances in segmentation, Chain-of-Thought prompting, and visual question answering, we significantly improve the quality of the paired data. In addition, we enhance the supervision signal by highlighting parts of the image that need to be changed by the instruction. The model fine-tuned on the improved data is capable of performing fine-grained object-centric edits better than state-of-the-art baselines, mitigating the problems outlined above, as shown by automatic and human evaluations. Moreover, our model is capable of generalizing to domains unseen during training, such as visual metaphors.", "keywords": "natural language instruction;image editing;stable diffusion;diffusion model;text-to-image;multimodal", "primary_area": "", "supplementary_material": "", "author": "Tuhin Chakrabarty;Kanishk Singh;Arkadiy Saakyan;Smaranda Muresan", "authorids": "~Tuhin_Chakrabarty2;~Kanishk_Singh2;~Arkadiy_Saakyan1;~Smaranda_Muresan3", "gender": "M;M;;M", "homepage": "https://tuhinjubcse.github.io/;https://asaakyan.github.io/;http://www.cs.columbia.edu/~smara/;https://cse.iitkgp.ac.in/~kanishks/", "dblp": "227/2812;294/5397;44/70;", "google_scholar": "HCmFuo8AAAAJ;oPegqXQAAAAJ;Esbx2VcAAAAJ;sTtcEZ8AAAAJ", "or_profile": "~Tuhin_Chakrabarty2;~Arkadiy_Saakyan1;~Smaranda_Muresan3;~Kanishk_Singh1", "aff": "Columbia University;Amazon;Columbia University;Columbia University", "aff_domain": "columbia.edu;amazon.com;columbia.edu;columbia.edu", "position": "PhD student;Intern;Principal Researcher;MS student", "bibtex": "@inproceedings{\nchakrabarty2023learning,\ntitle={Learning to Follow Object-Centric Image Editing Instructions Faithfully},\nauthor={Tuhin Chakrabarty and Kanishk Singh and Arkadiy Saakyan and Smaranda Muresan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=PCyB5LUF4z}\n}", "github": "", "project": "", "reviewers": "Jm5D;1SU6;6zEB;QHP1", "site": "https://openreview.net/forum?id=PCyB5LUF4z", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;3;5;4", "excitement": "3;3;3;3", "reproducibility": "2;4;4;4", "correctness": "4;4;4;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.5, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;kanishk7777", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Columbia University;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.columbia.edu;https://www.amazon.com", "aff_unique_abbr": "Columbia;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "PHh1s8dNlY", "title": "DIVE: Towards Descriptive and Diverse Visual Commonsense Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Towards human-level visual understanding, visual commonsense generation has been introduced to generate commonsense inferences beyond images. However, current research on visual commonsense generation has overlooked an important human cognitive ability: generating descriptive and diverse inferences. In this work, we propose a novel visual commonsense generation framework, called DIVE, which aims to improve the descriptiveness and diversity of generated inferences. DIVE involves two methods, generic inference filtering and contrastive retrieval learning, which address the limitations of existing visual commonsense resources and training objectives. Experimental results verify that DIVE outperforms state-of-the-art models for visual commonsense generation in terms of both descriptiveness and diversity, while showing a superior quality in generating unique and novel inferences. Notably, DIVE achieves human-level descriptiveness and diversity on Visual Commonsense Graphs. Furthermore, human evaluations confirm that DIVE aligns closely with human judgments on descriptiveness and diversity.", "keywords": "Visual commonsense generation;descriptive and diverse text generation;commonsense inference;vision-language model", "primary_area": "", "supplementary_material": "", "author": "Jun-Hyung Park;Hyuntae Park;Youjin Kang;Eojin Jeon;SangKeun Lee", "authorids": "~Jun-Hyung_Park1;~Hyuntae_Park1;~Youjin_Kang1;~Eojin_Jeon1;~SangKeun_Lee1", "gender": ";M;F;M;M", "homepage": "https://www.jhpark.info;https://github.com/Park-ing-lot;;http://xai.korea.ac.kr/;http://dilab.korea.ac.kr", "dblp": "16/716;37/877;;339/2527;73/3458-1", "google_scholar": "https://scholar.google.com/citations?hl=en;7CtGXToAAAAJ;;https://scholar.google.com/citations?view_op=list_works;BGSUpLgAAAAJ", "or_profile": "~Jun-Hyung_Park1;~Hyuntae_Park1;~Youjin_Kang1;~Eojin_Jeon1;~SangKeun_Lee1", "aff": "Korea University;Korea University;Korea University;Korea University;Korea University", "aff_domain": "korea.ac.kr;korea.edu;korea.ac.kr;korea.ac.kr;korea.ac.kr", "position": "PhD student;PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\npark2023dive,\ntitle={{DIVE}: Towards Descriptive and Diverse Visual Commonsense Generation},\nauthor={Jun-Hyung Park and Hyuntae Park and Youjin Kang and Eojin Jeon and SangKeun Lee},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=PHh1s8dNlY}\n}", "github": "", "project": "", "reviewers": "TWj1;3SLs;kR3u;Ukok;k6e2", "site": "https://openreview.net/forum?id=PHh1s8dNlY", "pdf_size": 0, "rating": "5;5;5;5;5", "confidence": "4;2;3;4;3", "excitement": "2;3;4;4;3", "reproducibility": "4;3;4;5;5", "correctness": "4;3;4;4;4", "rating_avg": 5.0, "confidence_avg": 3.2, "excitement_avg": 3.2, "reproducibility_avg": 4.2, "correctness_avg": 3.8, "replies_avg": 16, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-7900-3743;;0000-0002-6808-5157;;0000-0002-6249-8217", "linkedin": "jun-hyung-park-901a62252;;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Korea University", "aff_unique_dep": "", "aff_unique_url": "https://www.korea.ac.kr", "aff_unique_abbr": "KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "PHtXqUNGUA", "title": "SummEdits: Measuring LLM Ability at Factual Reasoning Through The Lens of Summarization", "track": "main", "status": "Long Main", "tldr": "", "abstract": "With the recent appearance of LLMs in practical settings, having methods that can effectively detect factual inconsistencies is crucial to reduce the propagation of misinformation and improve trust in model outputs.\nWhen testing on existing factual consistency benchmarks, we find that a few large language models (LLMs) perform competitively on classification benchmarks for factual inconsistency detection compared to traditional non-LLM methods. However, a closer analysis reveals issues with existing evaluation benchmarks, affecting evaluation precision.\nTo address this, we propose a new protocol for inconsistency detection benchmark creation and implement it in a 10-domain benchmark called SummEdits. This new benchmark is 20 times more cost-effective per sample than previous benchmarks and highly reproducible, as we estimate inter-annotator agreement at about 0.9.\nMost LLMs struggle on SummEdits, with performance close to random chance. The best-performing model, GPT-4, is still 8% below estimated human performance, highlighting the gaps in LLMs' ability to reason about facts and detect inconsistencies when they occur.", "keywords": "factual consistency;faithfulness;summarization;LLMs;benchmark", "primary_area": "", "supplementary_material": "", "author": "Philippe Laban;Wojciech Maciej Kryscinski;Divyansh Agarwal;Alexander Fabbri;Caiming Xiong;Shafiq Joty;Chien-Sheng Wu", "authorids": "~Philippe_Laban1;~Wojciech_Maciej_Kryscinski1;~Divyansh_Agarwal1;~Alexander_Fabbri1;~Caiming_Xiong1;~Shafiq_Joty1;~Chien-Sheng_Wu1", "gender": "M;M;M;M;M;M;M", "homepage": "https://people.eecs.berkeley.edu/~phillab/;;;https://alex-fabbri.github.io;http://cmxiong.com/;https://raihanjoty.github.io/;http://jasonwu0731.github.io", "dblp": "220/3590;;180/3005;203/8539;80/7282;62/2078;180/5537", "google_scholar": "fR5t200AAAAJ;;HSLyHrcAAAAJ;GgfJdhwAAAAJ;vaSdahkAAAAJ;hR249csAAAAJ;1G4GV2EAAAAJ", "or_profile": "~Philippe_Laban1;~Wojciech_Maciej_Kryscinski1;~Divyansh_Agarwal1;~Alexander_Fabbri1;~Caiming_Xiong1;~Shafiq_Joty1;~Chien-Sheng_Wu1", "aff": "SalesForce.com;;Salesforce.com;SalesForce.com;Salesforce Research;SalesForce.com;Salesforce AI", "aff_domain": "salesforce.com;;salesforce.com;salesforce.com;salesforce.com;salesforce.com;salesforce.com", "position": "Researcher;;Researcher;Researcher;Research Scientist;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nlaban2023summedits,\ntitle={SummEdits: Measuring {LLM} Ability at Factual Reasoning Through The Lens of Summarization},\nauthor={Philippe Laban and Wojciech Maciej Kryscinski and Divyansh Agarwal and Alexander Fabbri and Caiming Xiong and Shafiq Joty and Chien-Sheng Wu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=PHtXqUNGUA}\n}", "github": "", "project": "", "reviewers": "jN9Y;hL41;eGPn", "site": "https://openreview.net/forum?id=PHtXqUNGUA", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;2;3", "excitement": "4;3;3", "reproducibility": "3;4;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;", "linkedin": ";;agarwal-divyansh/;;caiming-xiong-150a1417;;chien-sheng-jason-wu/", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Salesforce", "aff_unique_dep": "", "aff_unique_url": "https://www.salesforce.com", "aff_unique_abbr": "Salesforce", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "PNpRxOhVut", "title": "A Spectral Viewpoint on Continual Relation Extraction", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Continual Relation Extraction (CRE) aims to continuously train a model to learn new relations while preserving its ability on previously learned relations. Similar to other continual learning problems, in CRE, models experience representation shift, where learned deep space changes in the continual learning process, which leads to the downgrade in the performance of the old tasks. In this work, we will provide an insight into this phenomenon under the spectral viewpoint. Our key argument is that, for each class shape, if its eigenvectors (or spectral components) do not change much, the shape is well-preserved. We then conduct a spectral experiment and show that, for the shape of each class, the eigenvectors with larger eigenvalue are more preserved after learning new tasks which means these vectors are good at keeping class shapes. Based on this analysis, we propose a simple yet effective class-wise regularization that improve the eigenvalues in the representation learning. We observe that our proposed regularization leads to an increase in the eigenvalues. Extensive experiments on two benchmark datasets, FewRel and TACRED, show the effectiveness of our proposed method with significant improvement in performance compared to the state-of-the-art models. Further analyses also verify our hypothesis that larger eigenvalues lead to better performance and vice versa.", "keywords": "Relation Extraction;Information Extraction;Continual Learning;Spectral Analysis", "primary_area": "", "supplementary_material": "", "author": "Huy Huu Nguyen;Chien Van Nguyen;Linh Ngo Van;Anh Tuan Luu;Thien Huu Nguyen", "authorids": "~Huy_Huu_Nguyen1;~Chien_Van_Nguyen1;~Linh_Ngo_Van1;~Anh_Tuan_Luu2;~Thien_Huu_Nguyen1", "gender": "M;M;;M;M", "homepage": ";https://chiennv2000.github.io/;https://users.soict.hust.edu.vn/linhnv/;https://tuanluu.github.io/;http://ix.cs.uoregon.edu/~thien", "dblp": ";351/5540;125/3578;81/8329.html;17/9407", "google_scholar": ";fW5HEnEAAAAJ;https://scholar.google.com.vn/citations?user=tZ78MoQAAAAJ;https://scholar.google.com.sg/citations?hl=en;Da2FhegAAAAJ", "or_profile": "~Huy_Huu_Nguyen1;~Chien_Van_Nguyen1;~Linh_Ngo_Van1;~Anh_Tuan_Luu2;~Thien_Huu_Nguyen1", "aff": "Hanoi University of Science and Technology;Hanoi University of Science and Technology;Hanoi University of Science and Technology;Nanyang Technological University;University of Oregon", "aff_domain": "hust.edu.vn;hust.edu.vn;hust.edu.vn;ntu.edu.sg;cs.uoregon.edu", "position": "Undergrad student;Student;Assistant Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nnguyen2023a,\ntitle={A Spectral Viewpoint on Continual Relation Extraction},\nauthor={Huy Huu Nguyen and Chien Van Nguyen and Linh Ngo Van and Anh Tuan Luu and Thien Huu Nguyen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=PNpRxOhVut}\n}", "github": "", "project": "", "reviewers": "rW8k;Nh4p;oY2D", "site": "https://openreview.net/forum?id=PNpRxOhVut", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;2", "excitement": "4;3;4", "reproducibility": "4;4;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "h%E1%BB%AFu-huy-nguy%E1%BB%85n-97832a1b4/;chiennv2000/;;;thien-huu-nguyen-7a193030/", "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Hanoi University of Science and Technology;Nanyang Technological University;University of Oregon", "aff_unique_dep": ";;", "aff_unique_url": "https://www.hust.edu.vn;https://www.ntu.edu.sg;https://www.uoregon.edu", "aff_unique_abbr": "HUST;NTU;UO", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hanoi;", "aff_country_unique_index": "0;0;0;1;2", "aff_country_unique": "Vietnam;Singapore;United States" }, { "id": "PPwRa7Wmg1", "title": "VIP5: Towards Multimodal Foundation Models for Recommendation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Computer Vision (CV), Natural Language Processing (NLP), and Recommender Systems (RecSys) are three prominent AI applications that have traditionally developed independently, resulting in disparate modeling and engineering methodologies. This has impeded the ability for these fields to directly benefit from each other's advancements. With the recent development of foundation models, large language models have emerged as a potential general-purpose interface for unifying different modalities and problem formulations. In light of this, we propose the development of a multimodal foundation model (MFM) considering visual, textual, and personalization modalities under the P5 recommendation paradigm, thus named VIP5 (Visual P5), to unify various modalities and recommendation tasks. This will enable the processing of multiple modalities in a shared architecture for improved recommendations. To achieve this, we introduce multimodal personalized prompts to accommodate multiple modalities under a shared format. Additionally, we propose a parameter-efficient training method for foundation models, which involves freezing the P5 backbone and fine-tuning lightweight adapters, resulting in improved recommendation performance and increased efficiency in terms of training time and memory usage. Code and data of VIP5 are available at https://github.com/jeykigung/VIP5.", "keywords": "Multimodal Foundation Model;Recommender Systems;Large Language Model;Parameter-efficient Tuning;Personalized Prompt", "primary_area": "", "supplementary_material": "", "author": "Shijie Geng;Juntao Tan;Shuchang Liu;Zuohui Fu;Yongfeng Zhang", "authorids": "~Shijie_Geng1;~Juntao_Tan1;~Shuchang_Liu1;~Zuohui_Fu1;~Yongfeng_Zhang1", "gender": "M;M;M;M;", "homepage": ";;;;", "dblp": "171/3642;;335/1645;146/6971;", "google_scholar": "wujqvGYAAAAJ;hbrLcKIAAAAJ;kivnB4QAAAAJ;;", "or_profile": "~Shijie_Geng1;~Juntao_Tan1;~Shuchang_Liu1;~Zuohui_Fu1;~Yongfeng_Zhang1", "aff": "ByteDance Inc.;Rutgers University;Kuaishou;Rutgers University;", "aff_domain": "bytedance.com;rutgers.edu;kuaishou.com;rutgers.edu;", "position": "Researcher;PhD student;Researcher;PhD student;", "bibtex": "@inproceedings{\ngeng2023vip,\ntitle={{VIP}5: Towards Multimodal Foundation Models for Recommendation},\nauthor={Shijie Geng and Juntao Tan and Shuchang Liu and Zuohui Fu and Yongfeng Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=PPwRa7Wmg1}\n}", "github": "", "project": "", "reviewers": "AFHM;tDAe;hNgC;gRHd", "site": "https://openreview.net/forum?id=PPwRa7Wmg1", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;3;4", "excitement": "3;1;3;4", "reproducibility": "3;3;3;3", "correctness": "3;2;3;3", "rating_avg": 3.0, "confidence_avg": 3.75, "excitement_avg": 2.75, "reproducibility_avg": 3.0, "correctness_avg": 2.75, "replies_avg": 13, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-1440-911X;;", "linkedin": ";;;;", "aff_unique_index": "0;1;2;1", "aff_unique_norm": "ByteDance;Rutgers University;Kuaishou Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.bytedance.com;https://www.rutgers.edu;https://www.kuaishou.com", "aff_unique_abbr": "ByteDance;Rutgers;Kuaishou", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "China;United States" }, { "id": "PSlrVYPTAX", "title": "Conversational Recommender System and Large Language Model Are Made for Each Other in E-commerce Pre-sales Dialogue", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "E-commerce pre-sales dialogue aims to understand and elicit user needs and preferences for the items they are seeking so as to provide appropriate recommendations. Conversational recommender systems (CRSs) learn user representation and provide accurate recommendations based on dialogue context, but rely on external knowledge. Large language models (LLMs) generate responses that mimic pre-sales dialogues after fine-tuning, but lack domain-specific knowledge for accurate recommendations. Intuitively, the strengths of LLM and CRS in E-commerce pre-sales dialogues are complementary, yet no previous work has explored this. This paper investigates the effectiveness of combining LLM and CRS in E-commerce pre-sales dialogues, proposing two collaboration methods: CRS assisting LLM and LLM assisting CRS. We conduct extensive experiments on a real-world dataset of E-commerce pre-sales dialogues. We analyze the impact of two collaborative approaches with two CRSs and two LLMs on four tasks of E-commerce pre-sales dialogue. We find that collaborations between CRS and LLM can be very effective in some cases.", "keywords": "Conversational recommendation;large language model;collaboration method", "primary_area": "", "supplementary_material": "", "author": "Yuanxing Liu;Weinan Zhang;Yifan Chen;Yuchi Zhang;Haopeng Bai;Fan Feng;Hengbin Cui;Yongbin Li;Wanxiang Che", "authorids": "~Yuanxing_Liu1;~Weinan_Zhang4;~Yifan_Chen14;~Yuchi_Zhang2;~Haopeng_Bai1;~Fan_Feng3;~Hengbin_Cui1;~Yongbin_Li2;~Wanxiang_Che1", "gender": "M;M;M;M;M;M;M;;M", "homepage": ";https://homepage.hit.edu.cn/zhangweinan;;http://ir.hit.edu.cn/~hpbai/;;https://yongbin-li.github.io/;http://ir.hit.edu.cn/~car/;;https://ys.mihoyo.com/", "dblp": "86/8392-1;28/10261-3;;;13/7763;;https://dblp.uni-trier.de/pers/hd/c/Che:Wanxiang;138/6894;", "google_scholar": "jgSM9f0AAAAJ;DBLdEf4AAAAJ;;;;xF5VrokAAAAJ;SVlQ6IEAAAAJ;https://scholar.google.com/citations?hl=zh-CN;", "or_profile": "~Yuanxing_Liu1;~Weinan_Zhang4;~Yuchi_Zhang2;~Haopeng_Bai1;~Fan_Feng3;~Yongbin_Li2;~Wanxiang_Che1;~CUI_HENGBIN1;~Chen_Yifan1", "aff": "University of Amsterdam;Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;Alibaba Group;Alibaba Group;Harbin Institute of Technology;Alibaba Group;Harbin Institute of Technology", "aff_domain": "uva.nl;hit.edu.cn;hit.edu.cn;hit.edu.cn;alibaba-inc.com;alibaba-inc.com;hit.edu.cn;alibaba-inc.com;hit.edu.cn", "position": "Intern;Full Professor;MS student;MS student;Researcher;Researcher;Full Professor;Researcher;Undergrad student", "bibtex": "@inproceedings{\nliu2023conversational,\ntitle={Conversational Recommender System and Large Language Model Are Made for Each Other in E-commerce Pre-sales Dialogue},\nauthor={Yuanxing Liu and Weinan Zhang and Yifan Chen and Yuchi Zhang and Haopeng Bai and Fan Feng and Hengbin Cui and Yongbin Li and Wanxiang Che},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=PSlrVYPTAX}\n}", "github": "", "project": "", "reviewers": "n8cG;FfbT;MtkJ;j99w", "site": "https://openreview.net/forum?id=PSlrVYPTAX", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;3;3;4", "excitement": "2;2;3;4", "reproducibility": "3;4;4;3", "correctness": "3;3;3;4", "rating_avg": 3.0, "confidence_avg": 3.5, "excitement_avg": 2.75, "reproducibility_avg": 3.5, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-9991-4480;;0009-0006-4492-5189;;;;;;", "linkedin": ";;;;;;;;", "aff_unique_index": "0;1;1;1;2;2;1;2;1", "aff_unique_norm": "University of Amsterdam;Harbin Institute of Technology;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uva.nl;http://www.hit.edu.cn/;https://www.alibaba.com", "aff_unique_abbr": "UvA;HIT;Alibaba", "aff_campus_unique_index": "1;1;1;1;1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;1;1;1;1;1;1;1;1", "aff_country_unique": "Netherlands;China" }, { "id": "PT63nNpyKg", "title": "Large Language Models are biased to overestimate profoundness", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Recent advancements in natural language processing by large language models (LLMs), such as GPT-4, have been suggested to approach Artificial General Intelligence. And yet, it is still under dispute whether LLMs possess similar reasoning abilities to humans. This study evaluates GPT-4 and various other LLMs in judging the profoundness of mundane, motivational, and pseudo-profound statements. We found a significant statement-to-statement correlation between the LLMs and humans, irrespective of the type of statements and the prompting technique used. However, LLMs systematically overestimate the profoundness of nonsensical statements, with the exception of Tk-instruct, which uniquely underestimates the profoundness of statements. Only few-shot learning prompts, as opposed to chain-of-thought prompting, draw LLMs ratings closer to humans. Furthermore, this work provides insights into the potential biases induced by Reinforcement Learning from Human Feedback (RLHF), inducing an increase in the bias to overestimate the profoundness of statements.", "keywords": "Large language models;reasoning;bias;nonsensical statements", "primary_area": "", "supplementary_material": "", "author": "Eugenio Herrera-Berg;Tom\u00e1s Vergara Browne;Pablo Le\u00f3n-Villagr\u00e1;Marc-Llu\u00eds Vives;Cristian Buc Calderon", "authorids": "~Eugenio_Herrera-Berg1;~Tom\u00e1s_Vergara_Browne1;~Pablo_Le\u00f3n-Villagr\u00e11;~Marc-Llu\u00eds_Vives1;~Cristian_Buc_Calderon1", "gender": ";M;;M;M", "homepage": "https://github.com/ouhenio;https://tvergara.github.io/;https://pabloleonvillagra.com/;;", "dblp": ";359/3796.html;;;", "google_scholar": ";RknbgOkAAAAJ;;zdnBi6gAAAAJ;https://scholar.google.be/citations?hl=en", "or_profile": "~Eugenio_Herrera-Berg1;~Tom\u00e1s_Vergara_Browne1;~Pablo_Le\u00f3n-Villagr\u00e11;~Marc-Llu\u00eds_Vives1;~Cristian_Buc_Calderon1", "aff": "Centro Nacional de Inteligencia Artificial;Pontificia Universidad Catolica de Chile;Brown University;Leiden University;Centro Nacional de Inteligencia Artificial", "aff_domain": "cenia.cl;uc.cl;brown.edu;leidenuniv.nl;cenia.cl", "position": "Researcher;MS student;Postdoc;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nherrera-berg2023large,\ntitle={Large Language Models are biased to overestimate profoundness},\nauthor={Eugenio Herrera-Berg and Tom{\\'a}s Vergara Browne and Pablo Le{\\'o}n-Villagr{\\'a} and Marc-Llu{\\'\\i}s Vives and Cristian Buc Calderon},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=PT63nNpyKg}\n}", "github": "", "project": "", "reviewers": "JFhv;W7L6;T4Lt", "site": "https://openreview.net/forum?id=PT63nNpyKg", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;2;4", "excitement": "3;3;3", "reproducibility": "5;2;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-2709-7602;;", "linkedin": "ouhenio/;https://linkedin.com/in/tvergara;;;", "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "Centro Nacional de Inteligencia Artificial;Pontificia Universidad Catolica de Chile;Brown University;Leiden University", "aff_unique_dep": ";;;", "aff_unique_url": ";https://www.puc.cl;https://www.brown.edu;https://www.leidenuniv.nl", "aff_unique_abbr": ";PUC;Brown;LU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;3;0", "aff_country_unique": "Spain;Chile;United States;Netherlands" }, { "id": "PT6lSdWEgw", "title": "Toxicity in Multilingual Machine Translation at Scale", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Machine Translation systems can produce different types of errors, some of which are characterized as critical or catastrophic due to the specific negative impact that they can have on users. In this paper we focus on one type of critical error: added toxicity. We evaluate and analyze added toxicity when translating a large evaluation dataset (HOLISTICBIAS, over 472k sentences, covering 13 demographic axes) from English into 164 languages. An automatic toxicity evaluation shows that added toxicity across languages varies from 0% to 5%. The output languages with the most added toxicity tend to be low-resource ones, and the demographic axes with the most added toxicity include sexual orientation, gender and sex, and ability. We\nalso perform human evaluation on a subset of 8 translation directions, confirming the prevalence\nof true added toxicity. We use a measurement of the amount of source contribution to the translation, where a low source contribution implies hallucination, to interpret what causes toxicity. Making use of the input attributions allows us to explain toxicity, because the source contributions significantly correlate with toxicity for 84% of languages studied. Given our findings, our recommendations to reduce added toxicity are to curate training data to avoid mistranslations, mitigate hallucination and check unstable translations.", "keywords": "Toxicity;Multilingual Machine Translation;Scale", "primary_area": "", "supplementary_material": "", "author": "Marta R. Costa-juss\u00e0;Eric Michael Smith;Christophe Ropers;Daniel Edward Licht;Jean Maillard;Javier Ferrando;Carlos Escolano", "authorids": "~Marta_R._Costa-juss\u00e01;~Eric_Michael_Smith1;~Christophe_Ropers1;~Daniel_Edward_Licht1;~Jean_Maillard1;~Javier_Ferrando1;~Carlos_Escolano1", "gender": "F;Non-Binary;;M;;M;M", "homepage": "https://www.costa-jussa.com;;http://www.chrisropers.net;;;https://javiferran.github.io/personal/;", "dblp": "17/2183;;324/2505;;;267/5458;51/7736", "google_scholar": "ESqQ7FoAAAAJ;uOK8DfQAAAAJ;;;;ZNsw8ZUAAAAJ;https://scholar.google.es/citations?user=yja1284AAAAJ", "or_profile": "~Marta_R._Costa-juss\u00e01;~Eric_Michael_Smith1;~Christophe_Ropers1;~Daniel_Edward_Licht1;~Jean_Maillard1;~Javier_Ferrando1;~Carlos_Escolano1", "aff": "Meta;Meta AI;Syntexys Inc;Meta [FAIR];;Universidad Polit\u00e9cnica de Cataluna;Universidad Polit\u00e9cnica de Cataluna", "aff_domain": "fb.com;meta.com;syntexys.com;facebook.com;;upc.edu;upc.edu", "position": "Research Scientist;Researcher;Linguist, CRO;Researcher;;PhD student;Postdoc", "bibtex": "@inproceedings{\ncosta-juss{\\`a}2023toxicity,\ntitle={Toxicity in Multilingual Machine Translation at Scale},\nauthor={Marta R. Costa-juss{\\`a} and Eric Michael Smith and Christophe Ropers and Daniel Edward Licht and Jean Maillard and Javier Ferrando and Carlos Escolano},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=PT6lSdWEgw}\n}", "github": "", "project": "", "reviewers": "ZjQm;YBu5;Ueh7", "site": "https://openreview.net/forum?id=PT6lSdWEgw", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;2", "excitement": "3;3;4", "reproducibility": "4;5;5", "correctness": "3;2;4", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.666666666666667, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;", "linkedin": ";;;lichtphyz/;;javierferrandomonsonis/;https://es.linkedin.com/in/carlos-escolano-ba26549a", "aff_unique_index": "0;0;1;0;2;2", "aff_unique_norm": "Meta;Syntexys Inc;Universitat Polit\u00e8cnica de Catalunya", "aff_unique_dep": "Meta Platforms, Inc.;;", "aff_unique_url": "https://meta.com;;https://www.upc.edu", "aff_unique_abbr": "Meta;;UPC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;2;2", "aff_country_unique": "United States;;Spain" }, { "id": "PTko0qsiA4", "title": "Large Language Models as Source Planner for Personalized Knowledge-grounded Dialogues", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Open-domain dialogue system usually requires different sources of knowledge to generate more informative and evidential responses. However, existing knowledge-grounded dialogue systems either focus on a single knowledge source or overlook the dependency between multiple sources of knowledge, which may result in generating inconsistent or even paradoxical responses. To incorporate multiple knowledge sources and dependencies between them, we propose SAFARI, a novel framework that leverages the exceptional capabilities of large language models (LLMs) in planning, understanding, and incorporating under both supervised and unsupervised settings. Specifically, SAFARI decouples the knowledge grounding into multiple sources and response generation, which allows easy extension to various knowledge sources including the possibility of not using any sources. To study the problem, we construct a personalized knowledge-grounded dialogue dataset Knowledge Behind Persona (KBP), which is the first to consider the dependency between persona and implicit knowledge. Experimental results on the KBP dataset demonstrate that the SAFARI framework can effectively produce persona-consistent and knowledge-enhanced responses.", "keywords": "knowledge-grounded dialogue system;personalized dialogue system;large lanuage models", "primary_area": "", "supplementary_material": "", "author": "Hongru WANG;Minda Hu;Yang Deng;Rui Wang;Fei Mi;Weichao Wang;Yasheng Wang;Wai-Chung Kwan;Irwin King;Kam-Fai Wong", "authorids": "~Hongru_WANG1;~Minda_Hu1;~Yang_Deng4;~Rui_Wang30;~Fei_Mi1;~Weichao_Wang3;~Yasheng_Wang1;~Wai-Chung_Kwan2;~Irwin_King1;~Kam-Fai_Wong2", "gender": "M;M;M;M;M;M;M;M;M;", "homepage": "https://rulegreen.github.io/;;https://dengyang17.github.io/;;https://mifei.github.io/;;;https://www.cse.cuhk.edu.hk/irwin.king/;http://www.se.cuhk.edu.hk/~kfwong;https://kwanwaichung.github.io/", "dblp": "72/1462-3;260/5462.html;115/6282-2;06/2293-92;161/0068;;57/8493;k/IrwinKing;w/KamFaiWong;", "google_scholar": "s6UtVYUAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=OshWT3UAAAAJ;https://scholar.google.com/citations?view_op=list_works;gX3493QAAAAJ;HZnZBdcAAAAJ;x-UYeJ4AAAAJ;MXvC7tkAAAAJ;;77Lyt1cAAAAJ", "or_profile": "~Hongru_WANG1;~Minda_Hu1;~Yang_Deng4;~Rui_Wang30;~Fei_Mi1;~Weichao_Wang3;~Yasheng_Wang1;~Irwin_King1;~Kam-Fai_Wong2;~Wai_Chung_Kwan1", "aff": "University of Edinburgh;The Chinese University of Hong Kong;The Chinese University of Hong Kong;Harbin Institute of Technology;;Huawei Technologies Ltd.;;The Chinese University of Hong Kong;The Chinese University of Hong Kong;The Chinese University of Hong Kong", "aff_domain": "ed.ac.uk;cse.cuhk.edu.hk;cuhk.edu.hk;hit.edu.cn;;huawei.com;;cuhk.edu.hk;cuhk.edu.hk;cuhk.edu.hk", "position": "Visiting Student;PhD student;PhD student;MS student;;Researcher;;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nwang2023large,\ntitle={Large Language Models as Source Planner for Personalized Knowledge-grounded Dialogues},\nauthor={Hongru WANG and Minda Hu and Yang Deng and Rui Wang and Fei Mi and Weichao Wang and Yasheng Wang and Wai-Chung Kwan and Irwin King and Kam-Fai Wong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=PTko0qsiA4}\n}", "github": "", "project": "", "reviewers": "C1P9;WKZY;EWfx;qWAp", "site": "https://openreview.net/forum?id=PTko0qsiA4", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;3;3;4", "excitement": "3;2;3;3", "reproducibility": "4;4;3;4", "correctness": "4;3;3;3", "rating_avg": 3.0, "confidence_avg": 3.5, "excitement_avg": 2.75, "reproducibility_avg": 3.75, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 10, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-5027-0138;0000-0003-1048-1998;;;;;;0000-0001-8106-6447;0000-0002-9427-5659;", "linkedin": ";;;;;;;irwinking/;;wai-chung-kwan-46a6bb152", "aff_unique_index": "0;1;1;2;3;1;1;1", "aff_unique_norm": "University of Edinburgh;Chinese University of Hong Kong;Harbin Institute of Technology;Huawei", "aff_unique_dep": ";;;Huawei Technologies", "aff_unique_url": "https://www.ed.ac.uk;https://www.cuhk.edu.hk;http://www.hit.edu.cn/;https://www.huawei.com", "aff_unique_abbr": "Edinburgh;CUHK;HIT;Huawei", "aff_campus_unique_index": "1;1;2;1;1;1", "aff_campus_unique": ";Hong Kong SAR;Harbin", "aff_country_unique_index": "0;1;1;1;1;1;1;1", "aff_country_unique": "United Kingdom;China" }, { "id": "PWWg9q3S0C", "title": "From Multilingual Complexity to Emotional Clarity: Leveraging Commonsense to Unveil Emotions in Code-Mixed Dialogues", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Understanding emotions during conversation is a fundamental aspect of human communication, driving NLP research for Emotion Recognition in Conversation (ERC). While considerable research has focused on discerning emotions of individual speakers in monolingual dialogues, understanding the emotional dynamics in code-mixed conversations has received relatively less attention. This motivates our undertaking of ERC for code-mixed conversations in this study. Recognizing that emotional intelligence encompasses a comprehension of worldly knowledge, we propose an innovative approach that integrates commonsense information with dialogue context to facilitate a deeper understanding of emotions. To achieve this, we devise an efficient pipeline that extracts relevant commonsense from existing knowledge graphs based on the code-mixed input. Subsequently, we develop an advanced fusion technique that seamlessly combines the acquired commonsense information with the dialogue representation obtained from a dedicated dialogue understanding module. Our comprehensive experimentation showcases the substantial performance improvement obtained through the systematic incorporation of commonsense in ERC. Both quantitative assessments and qualitative analyses further corroborate the validity of our hypothesis, reaffirming the pivotal role of commonsense integration in enhancing ERC.", "keywords": "Emotion Recognition in Conversation;Code-mix dialogues;Commonsense", "primary_area": "", "supplementary_material": "", "author": "Shivani Kumar;Ramaneswaran S;Md Shad Akhtar;Tanmoy Chakraborty", "authorids": "~Shivani_Kumar1;~Ramaneswaran_S1;~Md_Shad_Akhtar1;~Tanmoy_Chakraborty2", "gender": "F;M;;M", "homepage": "https://kumarshivani.com;;;http://tanmoychak.com", "dblp": "289/0065;;184/8579.html;65/2136-2.html", "google_scholar": "https://scholar.google.co.in/citations?hl=en;YIhHxbwAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.in/citations?user=C5S9JnIAAAAJ", "or_profile": "~Shivani_Kumar1;~Ramaneswaran_S1;~Md_Shad_Akhtar1;~Tanmoy_Chakraborty2", "aff": "Indraprastha Institute of Information Technology, Delhi;NVIDIA;Indraprastha Institute of Information Technology, Delhi;Indian Institute of Technology, Delhi", "aff_domain": "iiitd.ac.in;nvidia.com;iiitd.ac.in;iitd.ac.in", "position": "PhD student;Researcher;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nkumar2023from,\ntitle={From Multilingual Complexity to Emotional Clarity: Leveraging Commonsense to Unveil Emotions in Code-Mixed Dialogues},\nauthor={Shivani Kumar and Ramaneswaran S and Md Shad Akhtar and Tanmoy Chakraborty},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=PWWg9q3S0C}\n}", "github": "", "project": "", "reviewers": "FCsy;m45s;QymD", "site": "https://openreview.net/forum?id=PWWg9q3S0C", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;2;5", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-0210-0369", "linkedin": ";;;tanmoy-chakraborty-89553324/", "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Indraprastha Institute of Information Technology;NVIDIA;Indian Institute of Technology Delhi", "aff_unique_dep": ";NVIDIA Corporation;", "aff_unique_url": "http://www.iiitd.ac.in;https://www.nvidia.com;https://www.iitdelhi.ac.in", "aff_unique_abbr": "IIIT-D;NVIDIA;IIT Delhi", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Delhi;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "India;United States" }, { "id": "PXkS70nuNp", "title": "CRaSh: Clustering, Removing, and Sharing Enhance Fine-tuning without Full Large Language Model", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Instruction tuning has recently been recognized as an effective way of aligning Large Language Models (LLMs) to enhance their generalization ability across various tasks. However, when tuning publicly accessible, centralized LLMs with private instruction data, privacy concerns are inevitable. While direct transfer of parameterized modules between models is a plausible approach to address this, its implications and effectiveness need further exploration. This paper focuses on Offsite-Tuning (OFT), a representative technique that transfers transformer blocks between centralized LLMs and downstream emulators. Given the limited understanding of the underlying mechanism of OFT, we perform an empirical analysis on LLMs from the perspectives of representation and functional similarity. Interestingly, our findings reveal a unique modular structure within the layers of LLMs that appears to emerge as the model size expands. Simultaneously, we note subtle but potentially significant changes in representation and intermediate predictions across the layers. Inspired by these observations, we propose CRaSh, involving Clustering, Removing, and Sharing, a training-free strategy to derive improved emulators from LLMs. CRaSh significantly boosts performance of OFT with billions of parameters. Furthermore, we investigate the optimal solutions yielded by fine-tuning with and without full model through the lens of loss landscape. Our findings demonstrate a linear connectivity among these optima falling over the same basin, thereby highlighting the effectiveness of CRaSh and OFT.", "keywords": "large language models;transfer learning;representation similarity;model privacy", "primary_area": "", "supplementary_material": "", "author": "Kaiyan Zhang;Ning Ding;Biqing Qi;Xuekai Zhu;Xinwei Long;Bowen Zhou", "authorids": "~Kaiyan_Zhang1;~Ning_Ding5;~Biqing_Qi1;~Xuekai_Zhu1;~Xinwei_Long1;~Bowen_Zhou4", "gender": "M;M;M;M;M;", "homepage": "https://iseesaw.github.io/;https://www.stingning.cn/;https://biqing-qi.github.io/;;;", "dblp": ";;233/4949.html;327/9656;;", "google_scholar": "https://scholar.google.com/citations?hl=en-US;uZXQuYAAAAAJ;;plXXtQkAAAAJ;https://scholar.google.cz/citations?hl=cs;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Kaiyan_Zhang1;~Ning_Ding5;~Biqing_Qi1;~Xuekai_Zhu1;~Xinwei_Long1;~Bowen_Zhou4", "aff": "Tsinghua University;Tsinghua University;Harbin Institute of Technology;Tsinghua University;Tsinghua University;JD.com", "aff_domain": "mails.tsinghua.edu.cn;tsinghua.edu.cn;hit.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;jd.com", "position": "PhD student;PhD student;PhD student;Intern;PhD student;Vice President", "bibtex": "@inproceedings{\nzhang2023crash,\ntitle={{CR}aSh: Clustering, Removing, and Sharing Enhance Fine-tuning without Full Large Language Model},\nauthor={Kaiyan Zhang and Ning Ding and Biqing Qi and Xuekai Zhu and Xinwei Long and Bowen Zhou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=PXkS70nuNp}\n}", "github": "", "project": "", "reviewers": "TamK;ZSZD;CWER", "site": "https://openreview.net/forum?id=PXkS70nuNp", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;2;3", "excitement": "4;4;4", "reproducibility": "4;3;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0001-0595-3084;;0000-0002-4072-0577;;;", "linkedin": ";;;;;", "aff_unique_index": "0;0;1;0;0;2", "aff_unique_norm": "Tsinghua University;Harbin Institute of Technology;JD.com", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;http://www.hit.edu.cn/;https://www.jd.com", "aff_unique_abbr": "THU;HIT;JD", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "PaVP2Sc6pJ", "title": "A Novel Contrastive Learning Method for Clickbait Detection on RoCliCo: A Romanian Clickbait Corpus of News Articles", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "To increase revenue, news websites often resort to using deceptive news titles, luring users into clicking on the title and reading the full news. Clickbait detection is the task that aims to automatically detect this form of false advertisement and avoid wasting the precious time of online users. Despite the importance of the task, to the best of our knowledge, there is no publicly available clickbait corpus for the Romanian language. To this end, we introduce a novel Romanian Clickbait Corpus (RoCliCo) comprising 8,313 news samples which are manually annotated with clickbait and non-clickbait labels. Furthermore, we conduct experiments with four machine learning methods, ranging from handcrafted models to recurrent and transformer-based neural networks, to establish a line-up of competitive baselines. We also carry out experiments with a weighted voting ensemble. Among the considered baselines, we propose a novel BERT-based contrastive learning model that learns to encode news titles and contents into a deep metric space such that titles and contents of non-clickbait news have high cosine similarity, while titles and contents of clickbait news have low cosine similarity. Our data set and code to reproduce the baselines are publicly available for download at https://github.com/dariabroscoteanu/RoCliCo.", "keywords": "clickbait detection;low-resource language;Romanian corpus;contrastive learning", "primary_area": "", "supplementary_material": "", "author": "Daria Mihaela Broscoteanu;Radu Tudor Ionescu", "authorids": "~Daria_Mihaela_Broscoteanu1;~Radu_Tudor_Ionescu1", "gender": "F;M", "homepage": ";http://raduionescu.herokuapp.com", "dblp": ";120/9006", "google_scholar": ";qVbwC6QAAAAJ", "or_profile": "~Daria_Mihaela_Broscoteanu1;~Radu_Tudor_Ionescu1", "aff": "University of Bucharest;Universitatea Bucuresti", "aff_domain": "unibuc.ro;unibuc.ro", "position": "Undergrad student;Full Professor", "bibtex": "@inproceedings{\nbroscoteanu2023a,\ntitle={A Novel Contrastive Learning Method for Clickbait Detection on RoCliCo: A Romanian Clickbait Corpus of News Articles},\nauthor={Daria Mihaela Broscoteanu and Radu Tudor Ionescu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=PaVP2Sc6pJ}\n}", "github": "", "project": "", "reviewers": "T8m7;X7XQ;dgex;DCj2", "site": "https://openreview.net/forum?id=PaVP2Sc6pJ", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;5;4;4", "excitement": "2;3;3;3", "reproducibility": "3;3;4;4", "correctness": "2;3;4;3", "rating_avg": 4.0, "confidence_avg": 4.25, "excitement_avg": 2.75, "reproducibility_avg": 3.5, "correctness_avg": 3.0, "replies_avg": 13, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-9301-1950", "linkedin": "daria-broscoteanu-764186200/;radu-ionescu-5145374b/", "aff_unique_index": "0;0", "aff_unique_norm": "University of Bucharest", "aff_unique_dep": "", "aff_unique_url": "https://www.unibuc.ro", "aff_unique_abbr": "Unibuc", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Romania" }, { "id": "Pb1DhkTVLZ", "title": "Estimating Large Language Model Capabilities without Labeled Test Data", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large Language Models (LLMs) have exhibited an impressive ability to perform in-context learning (ICL) from only a few examples, but the success of ICL varies widely from task to task. Thus, it is important to quickly determine whether ICL is applicable to a new task, but directly evaluating ICL accuracy can be expensive in situations where test data is expensive to annotate---the exact situations where ICL is most appealing. In this paper, we propose the task of ICL accuracy estimation, in which we predict the accuracy of an LLM when doing in-context learning on a new task given only unlabeled test data for that task. To perform ICL accuracy estimation, we propose a method that trains a meta-model using LLM confidence scores as features. We compare our method to several strong accuracy estimation baselines on a new benchmark that covers 4 LLMs and 3 task collections. The meta-model improves over all baselines across 7 out of 12 settings and achieves the same estimation performance as directly evaluating on 40 collected labeled test examples per task. At the same time, no existing approach provides an accurate and reliable ICL accuracy estimation in every setting, highlighting the need for better ways to measure the uncertainty of LLM predictions.", "keywords": "large language model;accuracy prediction;confidence;calibration;in-context learning", "primary_area": "", "supplementary_material": "", "author": "Harvey Yiyun Fu;Qinyuan Ye;Albert Xu;Xiang Ren;Robin Jia", "authorids": "~Harvey_Yiyun_Fu1;~Qinyuan_Ye1;~Albert_Xu1;~Xiang_Ren1;~Robin_Jia1", "gender": "M;F;M;M;M", "homepage": "https://harvey-fin.github.io/;http://yeqy.xyz/;https://albertxu.xyz/;https://shanzhenren.github.io/;https://robinjia.github.io/", "dblp": ";239/5731;290/1589;36/360-1;182/2556", "google_scholar": "0ZBEwDUAAAAJ;g230ERwAAAAJ;u0DUItQAAAAJ;_moJlrIAAAAJ;ajZ-_O0AAAAJ", "or_profile": "~Harvey_Yiyun_Fu1;~Qinyuan_Ye1;~Albert_Xu1;~Xiang_Ren1;~Robin_Jia1", "aff": "University of Southern California;Microsoft;University of Southern California;University of Southern California;University of Southern California", "aff_domain": "usc.edu;microsoft.com;usc.edu;usc.edu;usc.edu", "position": "Undergrad student;Intern;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nfu2023estimating,\ntitle={Estimating Large Language Model Capabilities without Labeled Test Data},\nauthor={Harvey Yiyun Fu and Qinyuan Ye and Albert Xu and Xiang Ren and Robin Jia},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Pb1DhkTVLZ}\n}", "github": "", "project": "", "reviewers": "cyhj;rur3;8wF1;zZme", "site": "https://openreview.net/forum?id=Pb1DhkTVLZ", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "2;4;4;4", "excitement": "3;4;3;2", "reproducibility": "4;4;4;3", "correctness": "3;3;3;2", "rating_avg": 3.0, "confidence_avg": 3.5, "excitement_avg": 3.0, "reproducibility_avg": 3.75, "correctness_avg": 2.75, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "harvey-fu-yiyun/;;;xren7;", "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "University of Southern California;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.usc.edu;https://www.microsoft.com", "aff_unique_abbr": "USC;Microsoft", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "PffUQuD8sn", "title": "Statistical Depth for Ranking and Characterizing Transformer-Based Text Embeddings", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The popularity of transformer-based text embeddings calls for better statistical tools for measuring distributions of such embeddings. One such tool would be a method for ranking texts within a corpus by centrality, i.e. assigning each text a number signifying how representative that text is of the corpus as a whole. However, an intrinsic center-outward ordering of high-dimensional text representations is not trivial. A $\\textit{statistical depth}$ is a function for ranking $k$-dimensional objects by measuring centrality with respect to some observed $k$-dimensional distribution. We adopt a statistical depth to measure distributions of transformer-based text embeddings, $\\textit{transformer-based text embedding (TTE) depth}$, and introduce the practical use of this depth for both modeling and distributional inference in NLP pipelines. We first define TTE depth and an associated rank sum test for determining whether two corpora differ significantly in embedding space. We then use TTE depth for the task of in-context learning prompt selection, showing that this approach reliably improves performance over statistical baseline approaches across six text classification tasks. Finally, we use TTE depth and the associated rank sum test to characterize the distributions of synthesized and human-generated corpora, showing that five recent synthetic data augmentation processes cause a measurable distributional shift away from associated human-generated text.", "keywords": "text embeddings;transformers;statistical inference;corpus analysis;statistical depth;in-context learning;synthetic data augmentation", "primary_area": "", "supplementary_material": "", "author": "Parker Seegmiller;Sarah Masud Preum", "authorids": "~Parker_Seegmiller1;~Sarah_Masud_Preum1", "gender": "M;", "homepage": "https://pkseeg.com/;https://web.cs.dartmouth.edu/people/sarah-masud-preum", "dblp": "330/9880;165/8174.html", "google_scholar": "bU_Xi10AAAAJ;TyO23NgAAAAJ", "or_profile": "~Parker_Seegmiller1;~Sarah_Masud_Preum1", "aff": "Dartmouth College;Dartmouth College", "aff_domain": "dartmouth.edu;dartmouth.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nseegmiller2023statistical,\ntitle={Statistical Depth for Ranking and Characterizing Transformer-Based Text Embeddings},\nauthor={Parker Seegmiller and Sarah Masud Preum},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=PffUQuD8sn}\n}", "github": "", "project": "", "reviewers": "cygC;vaeR;bzCu", "site": "https://openreview.net/forum?id=PffUQuD8sn", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;3", "excitement": "3;4;2", "reproducibility": "4;4;3", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-6783-9773;0000-0002-7771-8323", "linkedin": "parker-seegmiller-8aa583172/;", "aff_unique_index": "0;0", "aff_unique_norm": "Dartmouth College", "aff_unique_dep": "", "aff_unique_url": "https://www.dartmouth.edu", "aff_unique_abbr": "Dartmouth", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "PnAmH1silV", "title": "On Bilingual Lexicon Induction with Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Bilingual Lexicon Induction (BLI) is a core task in multilingual NLP that still, to a large extent, relies on calculating cross-lingual word representations. Inspired by the global paradigm shift in NLP towards Large Language Models (LLMs), we examine the potential of the latest generation of LLMs for the development of bilingual lexicons. We ask the following research question: Is it possible to prompt and fine-tune multilingual LLMs (mLLMs) for BLI, and how does this approach compare against and complement current BLI approaches? To this end, we systematically study 1) zero-shot prompting for unsupervised BLI and 2) few-shot in-context prompting with a set of seed translation pairs, both without any LLM fine-tuning, as well as 3) standard BLI-oriented fine-tuning of smaller LLMs. We experiment with 18 open-source text-to-text mLLMs of different sizes (from 0.3B to 13B parameters) on two standard BLI benchmarks covering a range of typologically diverse languages. Our work is the first to demonstrate strong BLI capabilities of text-to-text mLLMs. The results reveal that few-shot prompting with in-context examples from nearest neighbours achieves the best performance, establishing new state-of-the-art BLI scores for many language pairs. We also conduct a series of in-depth analyses and ablation studies, providing more insights on BLI with (m)LLMs, also along with their limitations.", "keywords": "Bilingual Lexicon Induction;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "Yaoyiran Li;Anna Korhonen;Ivan Vuli\u0107", "authorids": "~Yaoyiran_Li1;~Anna_Korhonen1;~Ivan_Vuli\u01071", "gender": ";;M", "homepage": ";https://sites.google.com/site/annakorhonen/;https://sites.google.com/site/ivanvulic/", "dblp": ";14/6532;77/9768", "google_scholar": ";https://scholar.google.co.uk/citations?user=SCoVoOYAAAAJ;ZX8js60AAAAJ", "or_profile": "~Yaoyiran_Li1;~Anna_Korhonen1;~Ivan_Vuli\u01071", "aff": ";University of Cambridge;PolyAI Limited", "aff_domain": ";cam.ac.uk;poly-ai.com", "position": ";Professor;Senior Scientist", "bibtex": "@inproceedings{\nli2023on,\ntitle={On Bilingual Lexicon Induction with Large Language Models},\nauthor={Yaoyiran Li and Anna Korhonen and Ivan Vuli{\\'c}},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=PnAmH1silV}\n}", "github": "", "project": "", "reviewers": "cKpL;om7d;5V65", "site": "https://openreview.net/forum?id=PnAmH1silV", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;4", "excitement": "4;4;4", "reproducibility": "4;5;5", "correctness": "4;5;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.666666666666667, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";anna-korhonen-534a9b5/;ivan-vuli%C4%87-286b4a81/", "aff_unique_index": "0;1", "aff_unique_norm": "University of Cambridge;PolyAI Limited", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.poly.ai", "aff_unique_abbr": "Cambridge;PolyAI", "aff_campus_unique_index": "0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "PoMCId4iez", "title": "From Dissonance to Insights: Dissecting Disagreements in Rationale Construction for Case Outcome Classification", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In legal NLP, Case Outcome Classification (COC) must not only be accurate but also trustworthy and explainable. Existing work in explainable COC has been limited to annotations by a single expert. However, it is well-known that lawyers may disagree in their assessment of case facts. We hence collect a novel dataset RaVE: Rationale Variation in ECHR, which is obtained from two experts in the domain of international human rights law, for whom we observe weak agreement. We study their disagreements and build a two-level task-independent taxonomy, supplemented with COC-specific subcategories. To our knowledge, this is the first work in the legal NLP that focuses on human label variation. We quantitatively assess different taxonomy categories and find that disagreements mainly stem from underspecification of the legal context, which poses challenges given the typically limited granularity and noise in COC metadata. We further assess the explainablility of state-of-the-art COC models on RaVE and observe limited agreement between models and experts. Overall, our case study reveals hitherto underappreciated complexities in creating benchmark datasets in legal NLP that revolve around identifying aspects of a case's facts supposedly relevant for its outcome.", "keywords": "legal judgement prediction;case outcome classification;disagreement;explainability;rationale dataset", "primary_area": "", "supplementary_material": "", "author": "Shanshan Xu;Santosh T.Y.S.S;Oana Ichim;Isabella Risini;Barbara Plank;Matthias Grabmair", "authorids": "~Shanshan_Xu1;~Santosh_T.Y.S.S1;~Oana_Ichim1;~Isabella_Risini1;~Barbara_Plank2;~Matthias_Grabmair2", "gender": "F;M;F;;;M", "homepage": "https://sxu3.github.io/;;https://www.graduateinstitute.ch/discover-institute/oana-ichim;https://www.ruhr-uni-bochum.de/ls-puttler/cv_risini.html;https://bplank.github.io/;https://www.cs.cit.tum.de/lt/team/matthias-grabmair/", "dblp": ";220/2486;;;46/521;09/1651", "google_scholar": "dSDjjCEAAAAJ;aYytWsAAAAAJ;;;;MroPEGsAAAAJ", "or_profile": "~Shanshan_Xu1;~Santosh_T.Y.S.S1;~Oana_Ichim1;~Isabella_Risini1;~Barbara_Plank2;~Matthias_Grabmair2", "aff": "Technische Universit\u00e4t M\u00fcnchen;Adobe Systems;University of Massachusetts at Amherst;Ruhr-Universit\u00e4t Bochum;IT University of Copenhagen;Technische Universit\u00e4t M\u00fcnchen", "aff_domain": "tum.de;adobe.com;umass.edu;ruhr-uni-bochum.de;itu.dk;tum.de", "position": "PhD student;Research Intern;Postdoc;Postdoc;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nxu2023from,\ntitle={From Dissonance to Insights: Dissecting Disagreements in Rationale Construction for Case Outcome Classification},\nauthor={Shanshan Xu and Santosh T.Y.S.S and Oana Ichim and Isabella Risini and Barbara Plank and Matthias Grabmair},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=PoMCId4iez}\n}", "github": "", "project": "", "reviewers": "CY3m;7qoD;UWAY", "site": "https://openreview.net/forum?id=PoMCId4iez", "pdf_size": 0, "rating": "5;5;5", "confidence": "2;3;5", "excitement": "2;4;4", "reproducibility": "2;4;5", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0005-1203-1634;;;;;", "linkedin": ";;;;;matthias-grabmair-38216350/", "aff_unique_index": "0;1;2;3;4;0", "aff_unique_norm": "Technische Universit\u00e4t M\u00fcnchen;Adobe;University of Massachusetts Amherst;Ruhr-Universit\u00e4t Bochum;IT University of Copenhagen", "aff_unique_dep": ";Adobe Systems Incorporated;;;", "aff_unique_url": "https://www.tum.de;https://www.adobe.com;https://www.umass.edu;https://www.ruhr-uni-bochum.de;https://itu.dk", "aff_unique_abbr": "TUM;Adobe;UMass Amherst;RUB;ITU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Amherst", "aff_country_unique_index": "0;1;1;0;2;0", "aff_country_unique": "Germany;United States;Denmark" }, { "id": "PomhVDrvco", "title": "EpiK-Eval: Evaluation for Language Models as Epistemic Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In the age of artificial intelligence, the role of large language models (LLMs) is becoming increasingly central. Despite their growing prevalence, their capacity to consolidate knowledge from different training documents\u2014a crucial ability in numerous applications\u2014remains unexplored. This paper presents the first study examining the capability of LLMs to effectively combine such information within their parameter space. We introduce EpiK-Eval, a novel question-answering benchmark tailored to evaluate LLMs' proficiency in formulating a coherent and consistent knowledge representation from segmented narratives. Evaluations across various LLMs reveal significant weaknesses in this domain. We contend that these shortcomings stem from the intrinsic nature of prevailing training objectives. Consequently, we advocate for refining the approach towards knowledge consolidation, as it harbors the potential to dramatically improve their overall effectiveness and performance. The findings from this study offer insights for developing more robust and reliable LLMs. Our code and benchmark are available at https://github.com/chandar-lab/EpiK-Eval", "keywords": "large language models;large language model;LLMs;LLM;language model;language models;LM;LMs;EpiK-Eval;knowledge consolidation;story;benchmark;knowledge-base;KB;theory-of-mind;epistemic;hallucination;hallucinate;dataset;task;scale;scaling;knowledge representation;reasoning;consolidation;knowledge;context;evaluation;limitations;limitation;narrative;narratives;training objective;causal language modeling;masked language modeling", "primary_area": "", "supplementary_material": "", "author": "Gabriele Prato;Jerry Huang;Prasanna Parthasarathi;Shagun Sodhani;Sarath Chandar", "authorids": "~Gabriele_Prato1;~Jerry_Huang1;~Prasanna_Parthasarathi2;~Shagun_Sodhani1;~Sarath_Chandar1", "gender": ";;M;M;M", "homepage": ";;https://www.cs.mcgill.ca/~pparth2/;https://shagunsodhani.com;http://sarathchandar.in/", "dblp": ";;211/7503;http://dblp.uni-trier.de/pers/hd/s/Sodhani:Shagun;45/8542", "google_scholar": ";;https://scholar.google.co.in/citations?hl=en;ixp-vqMAAAAJ;https://scholar.google.co.in/citations?user=yxWtZLAAAAAJ", "or_profile": "~Gabriele_Prato1;~Jerry_Huang1;~Prasanna_Parthasarathi2;~Shagun_Sodhani1;~Sarath_Chandar1", "aff": ";;Huawei Technologies Ltd.;Meta Facebook;\u00c9cole Polytechnique de Montr\u00e9al", "aff_domain": ";;huawei.com;fb.com;polymtl.ca", "position": ";;Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nprato2023epikeval,\ntitle={EpiK-Eval: Evaluation for Language Models as Epistemic Models},\nauthor={Gabriele Prato and Jerry Huang and Prasanna Parthasarathi and Shagun Sodhani and Sarath Chandar},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=PomhVDrvco}\n}", "github": "", "project": "", "reviewers": "tdge;1wi7;hoTF", "site": "https://openreview.net/forum?id=PomhVDrvco", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "3;4;4", "reproducibility": "4;4;4", "correctness": "3;3;3", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;prasanna-parthasarathi/;shagun-sodhani-b2239879;", "aff_unique_index": "0;1;2", "aff_unique_norm": "Huawei;Meta;\u00c9cole Polytechnique de Montr\u00e9al", "aff_unique_dep": "Huawei Technologies;Meta Platforms, Inc.;", "aff_unique_url": "https://www.huawei.com;https://meta.com;https://www.polymtl.ca", "aff_unique_abbr": "Huawei;Meta;Polytechnique Montr\u00e9al", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montr\u00e9al", "aff_country_unique_index": "0;1;2", "aff_country_unique": "China;United States;Canada" }, { "id": "Pu5tJykUeT", "title": "ART: rule bAsed futuRe-inference deducTion", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Deductive reasoning is a crucial cognitive ability of humanity, allowing us to derive valid conclusions from premises and observations. However, existing works mainly focus on language-based premises and generally neglect deductive reasoning from visual observations. In this work, we introduce rule bAsed futuRe-inference deducTion (ART), which aims at deducing the correct future event based on the visual phenomenon (a video) and the rule-based premises, along with an explanation of the reasoning process. To advance this field, we construct a large-scale densely annotated dataset (Video-ART), where the premises, future event candidates, the reasoning process explanation, and auxiliary commonsense knowledge (e.g., actions and appearance) are annotated by annotators. Upon Video-ART, we develop a strong baseline named ARTNet. In essence, guided by commonsense knowledge, ARTNet learns to identify the target video character and perceives its visual clues related to the future event. Then, ARTNet rigorously applies the given premises to conduct reasoning from the identified information to future events, through a non-parametric rule reasoning network and a reasoning-path review module. Empirical studies validate the rationality of ARTNet in deductive reasoning upon visual observations and the effectiveness over existing works.", "keywords": "cross-modal;deductive reasoning;deep learning", "primary_area": "", "supplementary_material": "", "author": "Mengze Li;Tianqi Zhao;Bai Jionghao;Baoyi He;Jiaxu Miao;Wei Ji;Zheqi Lv;Zhou Zhao;Shengyu Zhang;Wenqiao Zhang;Fei Wu", "authorids": "~Mengze_Li2;~Tianqi_Zhao4;~Bai_Jionghao1;~Baoyi_He1;~Jiaxu_Miao2;~Wei_Ji1;~Zheqi_Lv1;~Zhou_Zhao3;~Shengyu_Zhang2;~Wenqiao_Zhang1;~Fei_Wu1", "gender": "M;M;M;F;M;M;;;M;M;M", "homepage": "https://www.researchgate.net/profile/Mengze-Li-13;;https://www.researchgate.net/profile/Bai-Jionghao;https://www.researchgate.net/profile/Baoyi-He;;https://jiwei0523.github.io/;;;https://shengyuzhang.github.io/;;https://person.zju.edu.cn/wufei", "dblp": "173/5918-1;;356/8993;359/4134;259/5073;52/3220-8;;;47/3459-1;250/4486.html;84/3254-1", "google_scholar": ";https://scholar.google.com.hk/citations?view_op=list_works;https://scholar.google.com/citations?hl=zh-CN;;kQ-FWd8AAAAJ;69OFB-AAAAAJ;;;l4Dyt7EAAAAJ;https://scholar.google.com/citations?hl=zh-CN;XJLn4MYAAAAJ", "or_profile": "~Mengze_Li2;~Tianqi_Zhao4;~Bai_Jionghao1;~Baoyi_He1;~Jiaxu_Miao2;~Wei_Ji1;~Zheqi_Lv1;~Zhou_Zhao3;~Shengyu_Zhang2;~Wenqiao_Zhang1;~Fei_Wu1", "aff": "Zhejiang University, Tsinghua University;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;;;;Zhejiang University;National University of Singapore;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;;;;zju.edu.cn;nus.edu.sg;zju.edu.cn", "position": "PhD student;MS student;Undergrad student;Undergrad student;Postdoc;;;;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nli2023art,\ntitle={{ART}: rule bAsed futuRe-inference deducTion},\nauthor={Mengze Li and Tianqi Zhao and Bai Jionghao and Baoyi He and Jiaxu Miao and Wei Ji and Zheqi Lv and Zhou Zhao and Shengyu Zhang and Wenqiao Zhang and Fei Wu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Pu5tJykUeT}\n}", "github": "", "project": "", "reviewers": "urk3;XipY;Gibb", "site": "https://openreview.net/forum?id=Pu5tJykUeT", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;3", "excitement": "4;3;4", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 11, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0009-0005-7106-513X;;0000-0002-4238-8475;0000-0002-8106-9768;;;0000-0002-0030-8289;0000-0002-5988-7609;", "linkedin": ";;;;;;;;;;", "aff_unique_index": "0;0;0;0;0;0;1;0", "aff_unique_norm": "Zhejiang University;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "http://www.zju.edu.cn;https://www.nus.edu.sg", "aff_unique_abbr": "ZJU;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;1;0", "aff_country_unique": "China;Singapore" }, { "id": "Pw9vYSPJKk", "title": "DiffusionRet: Diffusion-Enhanced Generative Retriever using Constrained Decoding", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Generative retrieval, which maps from a query to its relevant document identifiers (docids), has recently emerged as a new information retrieval (IR) paradigm, however, having suffered from 1) the $\\textit{lack of the intermediate reasoning step}$, caused by the manner of merely using a query to perform the hierarchical classification, and 2) the $\\textit{pretrain-finetune discrepancy}$, which comes from the use of the artificial symbols of docids. To address these limitations, we propose the novel approach of using the document generation from a query as an intermediate step before the retrieval, thus presenting $\\underline{diffusion}$-enhanced generative $\\underline{ret}$rieval ($\\textbf{DiffusionRet}$), which consists of two processing steps: 1) the $\\textit{diffusion-based document generation}$, which employs the sequence-to-sequence diffusion model to produce a pseudo document sample from a query, being expected to semantically close to a relevant document; 2) $\\textit{N-gram-based generative retrieval}$, which use another sequence-to-sequence model to generate n-grams that appear in the collection index for linking a generated sample to an original document. Experiment results on MS MARCO and Natural Questions dataset show that the proposed DiffusionRet significantly outperforms all the existing generative retrieval methods and leads to the state-of-the-art performances, even with much smaller number of parameters.", "keywords": "Infromation Retrieval;Diffusion Model;Generative Retrieval;Model-based Retrieval", "primary_area": "", "supplementary_material": "", "author": "Shanbao Qiao;Xuebing Liu;Seung-Hoon Na", "authorids": "~Shanbao_Qiao1;~Xuebing_Liu1;~Seung-Hoon_Na1", "gender": "M;M;M", "homepage": ";;https://nlp.jbnu.ac.kr/", "dblp": "361/3636;154/4716;56/3784", "google_scholar": "zM2cUxUAAAAJ;;vZB0BiQAAAAJ", "or_profile": "~Shanbao_Qiao1;~Xuebing_Liu1;~Seung-Hoon_Na1", "aff": "Chonbuk National University;Chonbuk National University;Chonbuk National University", "aff_domain": "jbnu.ac.kr;jbnu.ac.kr;jbnu.ac.kr", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nqiao2023diffusionret,\ntitle={DiffusionRet: Diffusion-Enhanced Generative Retriever using Constrained Decoding},\nauthor={Shanbao Qiao and Xuebing Liu and Seung-Hoon Na},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Pw9vYSPJKk}\n}", "github": "", "project": "", "reviewers": "qdE4;PVJR;B9MB", "site": "https://openreview.net/forum?id=Pw9vYSPJKk", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;5", "excitement": "3;3;3", "reproducibility": "4;3;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-9580-2641;0000-0002-9705-5678;0000-0002-4372-7125", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Chonbuk National University", "aff_unique_dep": "", "aff_unique_url": "http://www.cbnu.ac.kr", "aff_unique_abbr": "CBNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "PxEhoPiBB0", "title": "Is GPT-4 a Good Data Analyst?", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "As large language models (LLMs) have demonstrated their powerful capabilities in plenty of domains and tasks, including context understanding, code generation, language generation, data storytelling, etc., many data analysts may raise concerns if their jobs will be replaced by artificial intelligence (AI). This controversial topic has drawn great attention in public. However, we are still at a stage of divergent opinions without any definitive conclusion. Motivated by this, we raise the research question of \"is GPT-4 a good data analyst?\" in this work and aim to answer it by conducting head-to-head comparative studies. In detail, we regard GPT-4 as a data analyst to perform end-to-end data analysis with databases from a wide range of domains. We propose a framework to tackle the problems by carefully designing the prompts for GPT-4 to conduct experiments. We also design several task-specific evaluation metrics to systematically compare the performance between several professional human data analysts and GPT-4. Experimental results show that GPT-4 can achieve comparable performance to humans. We also provide in-depth discussions about our results to shed light on further studies before reaching the conclusion that GPT-4 can replace data analysts.", "keywords": "data analyst;GPT4", "primary_area": "", "supplementary_material": "", "author": "Liying Cheng;Xingxuan Li;Lidong Bing", "authorids": "~Liying_Cheng1;~Xingxuan_Li1;~Lidong_Bing2", "gender": "F;M;", "homepage": "https://liyingcheng95.github.io/;https://xingxuanli.github.io/;https://lidongbing.github.io", "dblp": "221/0115;222/9407;53/6625", "google_scholar": "https://scholar.google.com.sg/citations?user=xkZCRy0kBHEC;IqVxTDAAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Liying_Cheng1;~Xingxuan_Li1;~Lidong_Bing3", "aff": "Alibaba Group;Alibaba Group;Alibaba Group", "aff_domain": "alibaba-inc.com;alibaba-inc.com;alibaba-inc.com", "position": "Researcher;PhD student;Scientist", "bibtex": "@inproceedings{\ncheng2023is,\ntitle={Is {GPT}-4 a Good Data Analyst?},\nauthor={Liying Cheng and Xingxuan Li and Lidong Bing},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=PxEhoPiBB0}\n}", "github": "", "project": "", "reviewers": "QeUo;jqtG;pggJ;AeQb", "site": "https://openreview.net/forum?id=PxEhoPiBB0", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "4;4;4;3", "excitement": "3;4;3;3", "reproducibility": "4;4;4;3", "correctness": "3;4;3;3", "rating_avg": 5.0, "confidence_avg": 3.75, "excitement_avg": 3.25, "reproducibility_avg": 3.75, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Alibaba Group", "aff_unique_dep": "", "aff_unique_url": "https://www.alibaba.com", "aff_unique_abbr": "Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "PyAzL6Z802", "title": "Multilingual estimation of political-party positioning: From label aggregation to long-input Transformers", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Scaling analysis is a technique in computational political science that assigns a political actor (e.g. politician or party) a score on a predefined scale based on a (typically long) body of text (e.g. a parliamentary speech or an election manifesto). For example, political scientists have often used the left\u2013right scale to systematically analyse political landscapes of different countries. NLP methods for automatic scaling analysis can find broad application provided they (i) are able to deal with long texts and (ii) work robustly across domains and languages. In this work, we implement and compare two approaches to automatic scaling analysis of political-party manifestos: label aggregation, a pipeline strategy relying on annotations of individual statements from the manifestos, and long-input-Transformer-based models, which compute scaling values directly from raw text. We carry out the analysis of the Comparative Manifestos Project dataset across 41 countries and 27 languages and find that the task can be efficiently solved by state-of-the-art models, with label aggregation producing the best results.", "keywords": "computational social science;scaling analysis;political science;political party positioning", "primary_area": "", "supplementary_material": "", "author": "Dmitry Nikolaev;Tanise Ceron;Sebastian Pad\u00f3", "authorids": "~Dmitry_Nikolaev1;~Tanise_Ceron1;~Sebastian_Pad\u00f32", "gender": "M;F;M", "homepage": "https://dnikolaev.com;https://tceron.github.io;https://nlpado.de/~sebastian", "dblp": "264/5979;300/0946;p/SebastianPado", "google_scholar": "Myl8EpkAAAAJ;RvYLVPsAAAAJ;vKqag_AAAAAJ", "or_profile": "~Dmitry_Nikolaev1;~Tanise_Ceron1;~Sebastian_Pado1", "aff": "University of Stuttgart, Universit\u00e4t Stuttgart;University of Stuttgart, Universit\u00e4t Stuttgart;University of Stuttgart, Universit\u00e4t Stuttgart", "aff_domain": "ims.uni-stuttgart.de;ims.uni-stuttgart.de;ims.uni-stuttgart.de", "position": "Postdoc;PhD student;Professor", "bibtex": "@inproceedings{\nnikolaev2023multilingual,\ntitle={Multilingual estimation of political-party positioning: From label aggregation to long-input Transformers},\nauthor={Dmitry Nikolaev and Tanise Ceron and Sebastian Pad{\\'o}},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=PyAzL6Z802}\n}", "github": "", "project": "", "reviewers": "gyFa;VGTx;MK6B", "site": "https://openreview.net/forum?id=PyAzL6Z802", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;5", "excitement": "4;4;4", "reproducibility": "5;4;5", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 4.666666666666667, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-3034-9794;0009-0002-4845-2789;", "linkedin": "dmitry-nikolaev-9421405a/;taniseceron/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Stuttgart", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-stuttgart.de", "aff_unique_abbr": "Uni Stuttgart", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "PyJ78pUMEE", "title": "Just Adjust One Prompt: Enhancing In-Context Dialogue Scoring via Constructing the Optimal Subgraph of Demonstrations and Prompts", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The use of modern Large Language Models (LLMs) as chatbots still has some problems such as hallucinations and lack of empathy. Identifying these issues can help improve chatbot performance. The community has been continually iterating on reference-free dialogue evaluation methods based on large language models (LLMs) that can be readily applied. However, many of these LLM-based metrics require selecting specific datasets and developing specialized training tasks for different evaluation dimensions (e.g., coherence, informative). The developing step can be time-consuming and may need to be repeated for new evaluation dimensions. To enable efficient and flexible adaptation to diverse needs of dialogue evaluation, we propose a dimension-agnostic scoring method that leverages the in-context learning (ICL) capability of LLMs to learn from human scoring to the fullest extent. Our method has three key features. To begin with, rather than manual prompt crafting, we propose automatically generating prompts, allowing the LLM to observe human labels and summarize the most suitable prompt. Additionally, since the LLM has a token limit and ICL is sensitive to demonstration variations, we train a selector to finely customize demonstrations and prompts for each dialogue input. Finally, during inference, we propose to request the LLM multiple times with a subgraph of demonstrations and prompts that are diverse and suitable to maximize ICL from various human scoring. We validate the efficacy of our method on five datasets, even with a small amount of annotated data, our method outperforms all strong baselines. Code is available at https://github.com/iamlxb3/EMNLP2023-ADOROR.", "keywords": "dialogue evaluation;in-context learning;large language models;prompt generation", "primary_area": "", "supplementary_material": "", "author": "Jiashu Pu;ling Cheng;Lu Fan;Tangjie Lv;Rongsheng Zhang", "authorids": "~Jiashu_Pu1;~ling_Cheng1;~Lu_Fan1;~Tangjie_Lv1;~Rongsheng_Zhang1", "gender": "M;M;Not Specified;M;M", "homepage": ";;;;", "dblp": "205/9148;69/764-2;47/137;;", "google_scholar": "DuFytioAAAAJ;_y020noAAAAJ;-zxMk6sAAAAJ;EIuWpJcAAAAJ;H1VQcLAAAAAJ", "or_profile": "~Jiashu_Pu1;~ling_Cheng1;~Lu_Fan1;~Tangjie_Lv1;~Rongsheng_Zhang1", "aff": "NetEase, Inc;Singapore Management University;Hong Kong Polytechnic University;NetEase, Inc.;Fuxi AI Lab, Netease", "aff_domain": "corp.netease.com;smu.edu.sg;polyu.edu.hk;netease.com;netease.com", "position": "Researcher;PhD student;PhD student;Researcher;Researcher", "bibtex": "@inproceedings{\npu2023just,\ntitle={Just Adjust One Prompt: Enhancing In-Context Dialogue Scoring via Constructing the Optimal Subgraph of Demonstrations and Prompts},\nauthor={Jiashu Pu and ling Cheng and Lu Fan and Tangjie Lv and Rongsheng Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=PyJ78pUMEE}\n}", "github": "", "project": "", "reviewers": "yAzT;JKu9;r8LW;Hsj3", "site": "https://openreview.net/forum?id=PyJ78pUMEE", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "3;4;4;3", "excitement": "4;4;4;4", "reproducibility": "3;4;5;5", "correctness": "4;4;4;4", "rating_avg": 5.0, "confidence_avg": 3.5, "excitement_avg": 4.0, "reproducibility_avg": 4.25, "correctness_avg": 4.0, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-0549-9563;0000-0002-2834-9728;0000-0003-1230-7854;0000-0001-9858-809X;0009-0008-1248-2090", "linkedin": "jiashupu/;ling-cheng-601426197/;;;", "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "NetEase, Inc;Singapore Management University;Hong Kong Polytechnic University;NetEase, Inc.;Netease", "aff_unique_dep": ";;;;Fuxi AI Lab", "aff_unique_url": "https://www.163.com;https://www.smu.edu.sg;https://www.polyu.edu.hk;https://www.163.com;https://www.netease.com", "aff_unique_abbr": "NetEase;SMU;PolyU;NetEase;Netease", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;Singapore" }, { "id": "PzINxIyV9o", "title": "InterFair: Debiasing with Natural Language Feedback for Fair Interpretable Predictions", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Debiasing methods in NLP models traditionally focus on isolating information related to a sensitive attribute (e.g., gender or race).\nWe instead argue that a favorable debiasing method should use sensitive information 'fairly,' with explanations, rather than blindly eliminating it. This fair balance is often subjective and can be challenging to achieve algorithmically. We explore two interactive setups with a frozen predictive model and show that users able to provide feedback can achieve a better and \\emph{fairer} balance between task performance and bias mitigation. In one setup, users, by interacting with test examples, further decreased bias in the explanations (5-8%) while maintaining the same prediction accuracy. In the other setup, human feedback was able to disentangle associated bias and predictive information from the input leading to superior bias mitigation and improved task performance (4-5%) simultaneously.", "keywords": "Debiasing;Language Models;Rationale;Interactions;User Interventions", "primary_area": "", "supplementary_material": "", "author": "Bodhisattwa Prasad Majumder;Zexue He;Julian McAuley", "authorids": "~Bodhisattwa_Prasad_Majumder1;~Zexue_He1;~Julian_McAuley1", "gender": ";F;M", "homepage": "https://www.majumderb.com/;https://zexuehe.github.io/;http://cseweb.ucsd.edu/~jmcauley/", "dblp": "138/6177;215/4688;29/3483", "google_scholar": "cEM1a5gAAAAJ;-JrCM0AAAAAJ;icbo4M0AAAAJ", "or_profile": "~Bodhisattwa_Prasad_Majumder1;~Zexue_He1;~Julian_McAuley1", "aff": "University of California, San Diego;University of California, San Diego;University of California, San Diego, University of California, San Diego", "aff_domain": "ucsd.edu;ucsd.edu;eng.ucsd.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nmajumder2023interfair,\ntitle={InterFair: Debiasing with Natural Language Feedback for Fair Interpretable Predictions},\nauthor={Bodhisattwa Prasad Majumder and Zexue He and Julian McAuley},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=PzINxIyV9o}\n}", "github": "", "project": "", "reviewers": "mchw;Kj4w;beK8", "site": "https://openreview.net/forum?id=PzINxIyV9o", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;4;3", "excitement": "4;3;3", "reproducibility": "1;4;3", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-0955-7588", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "Q2IInBu2kz", "title": "PCMID: Multi-Intent Detection through Supervised Prototypical Contrastive Learning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Intent detection is a major task in Natural Language Understanding (NLU) and is the component of dialogue systems for interpreting users\u2019 intentions based on their utterances. Many works have explored detecting intents by assuming that each utterance represents only a single intent. Such systems have achieved very good results; however, intent detection is a far more challenging task in typical real-world scenarios, where each user utterance can be highly complex and express multiple intents. Therefore, in this paper, we propose PCMID, a novel Multi-Intent Detection framework enabled by Prototypical Contrastive Learning under a supervised setting. The PCMID model can learn multiple semantic representations of a given user utterance under the context of different intent labels in an optimized semantic space. Our experiments show that PCMID achieves the current state-of-the-art performance on both multiple public benchmark datasets and a private real-world dataset for the multi-intent detection task.", "keywords": "Dialogue System;Multi-Intent Detection", "primary_area": "", "supplementary_material": "", "author": "Yurun song;Junchen Zhao;Spencer B. Koehler;Amir Abdullah;Ian Harris", "authorids": "~Yurun_song1;~Junchen_Zhao1;~Spencer_B._Koehler1;~Amir_Abdullah1;~Ian_Harris1", "gender": "M;M;M;M;M", "homepage": ";https://www.junchenzhao97.com/;;;http://www.ics.uci.edu/~harris/", "dblp": ";;;358/8964;99/4204", "google_scholar": "XMQTUvYAAAAJ;gH3HsTkAAAAJ;;jPEbq5wAAAAJ;NpcLBDsAAAAJ", "or_profile": "~Yurun_song1;~Junchen_Zhao1;~Spencer_B._Koehler1;~Amir_Abdullah1;~Ian_Harris1", "aff": "University of California, Irvine;University of California, Irvine;;GetGuru;University of California-Irvine", "aff_domain": "uci.edu;uci.edu;;getguru.com;ics.uci.edu", "position": "PhD student;PhD student;;Data Scientist;Full Professor", "bibtex": "@inproceedings{\nsong2023pcmid,\ntitle={{PCMID}: Multi-Intent Detection through Supervised Prototypical Contrastive Learning},\nauthor={Yurun song and Junchen Zhao and Spencer B. Koehler and Amir Abdullah and Ian Harris},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Q2IInBu2kz}\n}", "github": "", "project": "", "reviewers": "XUFX;ZoeN;NXkp;G3TU", "site": "https://openreview.net/forum?id=Q2IInBu2kz", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "5;5;4;4", "excitement": "3;2;3;3", "reproducibility": "4;2;4;3", "correctness": "4;2;3;3", "rating_avg": 3.0, "confidence_avg": 4.5, "excitement_avg": 2.75, "reproducibility_avg": 3.25, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";junchen-kevin-zhao-8a6674195/;spencerkoehler/;amirali-abdullah-23273314/;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of California, Irvine;GetGuru", "aff_unique_dep": ";", "aff_unique_url": "https://www.uci.edu;", "aff_unique_abbr": "UCI;", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Irvine;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States;" }, { "id": "Q2Wu2Cfp2x", "title": "Practical Computational Power of Linear Transformers and Their Recurrent and Self-Referential Extensions", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Recent studies of the computational power of recurrent neural networks (RNNs) reveal a hierarchy of RNN architectures, given real-time and finite-precision assumptions. Here we study auto-regressive Transformers with linearised attention, a.k.a. linear Transformers (LTs) or Fast Weight Programmers (FWPs). LTs are special in the sense that they are equivalent to RNN-like sequence processors with a fixed-size state, while they can also be expressed as the now-popular self-attention networks. We show that many well-known results for the standard Transformer directly transfer to LTs/FWPs. Our formal language recognition experiments demonstrate how recently proposed FWP extensions such as recurrent FWPs and self-referential weight matrices successfully overcome certain limitations of the LT, e.g., allowing for generalisation on the parity problem. Our code is public.", "keywords": "recurrent neural networks;RNNs;transformers;computational power;automata;counter machines;formal languages;linear transformers;self-reference;self-referential weight matrix", "primary_area": "", "supplementary_material": "", "author": "Kazuki Irie;R\u00f3bert Csord\u00e1s;J\u00fcrgen Schmidhuber", "authorids": "~Kazuki_Irie1;~R\u00f3bert_Csord\u00e1s1;~J\u00fcrgen_Schmidhuber1", "gender": ";M;M", "homepage": "https://sites.harvard.edu/kazuki-irie/;https://robertcsordas.github.io/;http://people.idsia.ch/~juergen/", "dblp": "148/9667;166/4773.html;s/JurgenSchmidhuber", "google_scholar": "https://scholar.google.de/citations?user=-gZ-BdwAAAAJ;av1lplwAAAAJ;https://scholar.google.ch/citations?user=gLnCTgIAAAAJ", "or_profile": "~Kazuki_Irie1;~R\u00f3bert_Csord\u00e1s1;~J\u00fcrgen_Schmidhuber1", "aff": "The Swiss AI Lab IDSIA, Dalle Molle Institute for Artificial Intelligence Research;IDSIA;IDSIA", "aff_domain": "idsia.ch;idsia.ch;idsia.ch", "position": "Postdoc;PhD student;Scientific Director", "bibtex": "@inproceedings{\nirie2023practical,\ntitle={Practical Computational Power of Linear Transformers and Their Recurrent and Self-Referential Extensions},\nauthor={Kazuki Irie and R{\\'o}bert Csord{\\'a}s and J{\\\"u}rgen Schmidhuber},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Q2Wu2Cfp2x}\n}", "github": "", "project": "", "reviewers": "Vzz8;AXki;Dg9f;5khN;RMs4", "site": "https://openreview.net/forum?id=Q2Wu2Cfp2x", "pdf_size": 0, "rating": "4;4;4;4;4", "confidence": "2;1;2;1;2", "excitement": "2;2;4;3;4", "reproducibility": "4;3;2;4;4", "correctness": "4;2;3;3;3", "rating_avg": 4.0, "confidence_avg": 1.6, "excitement_avg": 3.0, "reproducibility_avg": 3.4, "correctness_avg": 3.0, "replies_avg": 17, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-0923-691X;;", "linkedin": ";robertcsordas/;", "aff_unique_index": "0;1;1", "aff_unique_norm": "IDSIA;Institute of Digital Technologies", "aff_unique_dep": "Swiss AI Lab;", "aff_unique_url": "https://www.idsia.ch/;https://www.idsia.ch", "aff_unique_abbr": "IDSIA;IDSIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "id": "Q4u18Ui7YS", "title": "Explore-Instruct: Enhancing Domain-Specific Instruction Coverage through Active Exploration", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Instruction-tuning can be substantially optimized through enhanced diversity, resulting in models capable of handling a broader spectrum of tasks. However, existing data employed for such tuning often exhibit an inadequate coverage of individual domains, limiting the scope for nuanced comprehension and interactions within these areas. To address this deficiency, we propose Explore-Instruct, a novel approach to enhance the data coverage to be used in domain-specific instruction-tuning through active exploration via Large Language Models (LLMs). Built upon representative domain use cases, Explore-Instruct explores a multitude of variations or possibilities by implementing a search algorithm to obtain diversified and domain-focused instruction-tuning data. Our data-centric analysis validates the effectiveness of this proposed approach in improving domain-specific instruction coverage. Moreover, our model's performance demonstrates considerable advancements over multiple baselines, including those utilizing domain-specific data enhancement. Our findings offer a promising opportunity to improve instruction coverage, especially in domain-specific contexts, thereby advancing the development of adaptable language models. Our code, model weights, and data are public at \\url{https://github.com/fanqiwan/Explore-Instruct}.", "keywords": "Large Language Models;Instruction-Tuning", "primary_area": "", "supplementary_material": "", "author": "Fanqi Wan;Xinting Huang;Tao Yang;Xiaojun Quan;Wei Bi;Shuming Shi", "authorids": "~Fanqi_Wan1;~Xinting_Huang1;~Tao_Yang13;~Xiaojun_Quan1;~Wei_Bi1;~Shuming_Shi1", "gender": "M;M;M;M;F;M", "homepage": "https://fanqiwan.github.io/;https://timhuang1.github.io/;https://taoyang225.github.io/;https://sites.google.com/site/xiaojunquan/;https://scholar.google.com.hk/citations?hl=en&user=aSJcgQMAAAAJ&view_op=list_works&sortby=pubdate#d=gsc_md_iad&u=%2Fcitations%3Fview_op%3Dimport_lookup%26hl%3Den%26imq%3DWei%2BBi%26json%3D%26btnA%3D1;", "dblp": "347/8267;;;90/5936;38/1163;s/ShumingShi", "google_scholar": "AeS1tmEAAAAJ;QmyPDWQAAAAJ;i3to2x8AAAAJ;dRpg4t8AAAAJ;https://scholar.google.com.hk/citations?hl=en;Lg31AKMAAAAJ", "or_profile": "~Fanqi_Wan1;~Xinting_Huang1;~Tao_Yang13;~Xiaojun_Quan1;~Wei_Bi1;~Shuming_Shi1", "aff": "SUN YAT-SEN UNIVERSITY;Tencent;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;Hong Kong University of Science and Technology;Tencent AI Lab", "aff_domain": "sysu.edu.cn;tencent.com;sysu.edu.cn;sysu.edu.cn;ust.hk;tencent.com", "position": "MS student;Researcher;PhD student;Full Professor;PhD student;Principal Researcher", "bibtex": "@inproceedings{\nwan2023exploreinstruct,\ntitle={Explore-Instruct: Enhancing Domain-Specific Instruction Coverage through Active Exploration},\nauthor={Fanqi Wan and Xinting Huang and Tao Yang and Xiaojun Quan and Wei Bi and Shuming Shi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Q4u18Ui7YS}\n}", "github": "", "project": "", "reviewers": "TUPc;Rfvu;Jzc1", "site": "https://openreview.net/forum?id=Q4u18Ui7YS", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "4;3;4", "reproducibility": "4;3;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0001-8457-0630;", "linkedin": "fanqiwan/;xintingh/;;;;", "aff_unique_index": "0;1;0;0;2;1", "aff_unique_norm": "Sun Yat-sen University;Tencent;Hong Kong University of Science and Technology", "aff_unique_dep": ";Tencent Holdings Limited;", "aff_unique_url": "http://www.sysu.edu.cn;https://www.tencent.com;https://www.ust.hk", "aff_unique_abbr": "SYSU;Tencent;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "Q5nM3rpiVm", "title": "Towards Better Representations for Multi-Label Text Classification with Multi-granularity Information", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Multi-label text classification (MLTC) aims to assign multiple labels to a given text. Previous works have focused on text representation learning and label correlations modeling using pre-trained language models (PLMs). \nHowever, studies have shown that PLMs generate word frequency-oriented text representations, causing texts with different labels to be closely distributed in a narrow region, which is difficult to classify.\nTo address this, we present a novel framework $\\textbf{CL}$($\\underline{C}$ontrastive $\\underline{L}$earning)-$\\textbf{MIL}$ ($\\underline{M}$ulti-granularity $\\underline{I}$nformation $\\underline{L}$earning) to refine the text representation for MLTC task. We first use contrastive learning to generate uniform initial text representation and incorporate label frequency implicitly. Then, we design a multi-task learning module to integrate multi-granularity (diverse text-labels correlations, label-label relations and label frequency) information into text representations, enhancing their discriminative ability. Experimental results demonstrate the complementarity of the modules in CL-MIL, improving the quality of text representations and yielding stable and competitive improvements for MLTC.", "keywords": "Multi-label text classification;Text representation;Contrastive learning;Multi-granularity information", "primary_area": "", "supplementary_material": "", "author": "FangFang Li;PuZhen Su;Junwen Duan;Weidong Xiao", "authorids": "~FangFang_Li1;~PuZhen_Su1;~Junwen_Duan1;~Weidong_Xiao1", "gender": "F;M;M;M", "homepage": "https://faculty.csu.edu.cn/lifangfang/zh_CN/index.htm;https://github.com/Eneverg1veup;;", "dblp": ";362/8590.html;153/9564;87/5207", "google_scholar": ";;rF_NZFAAAAAJ;", "or_profile": "~FangFang_Li1;~PuZhen_Su1;~Junwen_Duan1;~Weidong_Xiao1", "aff": "Central South University;Central South University;Central South University;", "aff_domain": "csu.edu.cn;csu.edu.cn;csu.edu.cn;", "position": "Full Professor;MS student;Associate Professor;", "bibtex": "@inproceedings{\nli2023towards,\ntitle={Towards Better Representations for Multi-Label Text Classification with Multi-granularity Information},\nauthor={FangFang Li and PuZhen Su and Junwen Duan and Weidong Xiao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Q5nM3rpiVm}\n}", "github": "", "project": "", "reviewers": "WQiA;xJF7;HWyb", "site": "https://openreview.net/forum?id=Q5nM3rpiVm", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;4;3", "reproducibility": "3;3;3", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0001-7613-1192;;", "linkedin": ";;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Central South University", "aff_unique_dep": "", "aff_unique_url": "https://www.csu.edu.cn", "aff_unique_abbr": "CSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "Q93hLxLKLB", "title": "We Need to Talk About Reproducibility in NLP Model Comparison", "track": "main", "status": "Long Main", "tldr": "", "abstract": "NLPers frequently face reproducibility crisis in a comparison of various models of a real-world NLP task. Many studies have empirically showed that the standard splits tend to produce low reproducible and unreliable conclusions, and they attempted to improve the splits by using more random repetitions. However, the improvement on the reproducibility in a comparison of NLP models is limited attributed to a lack of investigation on the relationship between the reproducibility and the estimator induced by a splitting strategy. In this paper, we formulate the reproducibility in a model comparison into a probabilistic function with regard to a conclusion. Furthermore, we theoretically illustrate that the reproducibility is qualitatively dominated by the signal-to-noise ratio (SNR) of a model performance estimator obtained on a corpus splitting strategy. Specifically, a higher value of the SNR of an estimator probably indicates a better reproducibility. On the basis of the theoretical motivations, we develop a novel mixture estimator of the performance of an NLP model with a regularized corpus splitting strategy based on a blocked $3\\times 2$ cross-validation. We conduct numerical experiments on multiple NLP tasks to show that the proposed estimator achieves a high SNR, and it substantially increases the reproducibility. Therefore, we recommend the NLP practitioners to use the proposed method to compare NLP models instead of the methods based on the widely-used standard splits and the random splits with multiple repetitions.", "keywords": "reproducibility;NLP model comparison;corpus splitting strategy", "primary_area": "", "supplementary_material": "", "author": "Yan Xue;Xuefei Cao;Xingli Yang;Yu Wang;Ruibo Wang;Jihong Li", "authorids": "~Yan_Xue1;~Xuefei_Cao2;~Xingli_Yang1;~Yu_Wang28;~Ruibo_Wang1;~Jihong_Li1", "gender": "F;M;F;M;M;M", "homepage": "https://gitee.com/xue_202012407011;;;;https://rambowang.github.io/;", "dblp": ";;;;;", "google_scholar": ";;;;WlMCW88AAAAJ;", "or_profile": "~Yan_Xue1;~Xuefei_Cao2;~Xingli_Yang1;~Yu_Wang28;~Ruibo_Wang1;~Jihong_Li1", "aff": "Shanxi University;Shanxi University;Shanxi university;Shanxi University;Shanxi University;Shanxi University", "aff_domain": "sxu.edu.cn;sxu.edu.cn;sxu.edu.cn;sxu.edu.cn;sxu.edu.cn;sxu.edu.cn", "position": "PhD student;Associate Professor;Lecturer;Full Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nxue2023we,\ntitle={We Need to Talk About Reproducibility in {NLP} Model Comparison},\nauthor={Yan Xue and Xuefei Cao and Xingli Yang and Yu Wang and Ruibo Wang and Jihong Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Q93hLxLKLB}\n}", "github": "", "project": "", "reviewers": "zPdM;wxeB;39w5", "site": "https://openreview.net/forum?id=Q93hLxLKLB", "pdf_size": 0, "rating": "5;5;5", "confidence": "2;3;4", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-5095-5604;0000-0001-9204-0613;0000-0002-5458-5513;0000-0002-9542-5081;0000-0003-0692-7176", "linkedin": ";;;;;", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Shanxi University", "aff_unique_dep": "", "aff_unique_url": "http://www.sxu.edu.cn", "aff_unique_abbr": "SXU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "Q9BLbN1p6h", "title": "Mixture-of-Linguistic-Experts Adapters for Improving and Interpreting Pre-trained Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In this work, we propose a method that combines two popular research areas by injecting linguistic structures into pre-trained language models in the parameter-efficient fine-tuning (PEFT) setting. In our approach, parallel adapter modules encoding different linguistic structures are combined using a novel Mixture-of-Linguistic-Experts architecture, where Gumbel-Softmax gates are used to determine the importance of these modules at each layer of the model. To reduce the number of parameters, we first train the model for a fixed small number of steps before pruning the experts based on their important scores. Our experiment results with three different pre-trained models show that our approach can outperform state-of-the-art PEFT methods with a comparable number of parameters. In addition, we provide additional analysis to examine the experts selected by each model at each layer to provide insights for future studies.", "keywords": "adapters;graph neural networks;parameter-efficient fine-tuning;interpretability;dependency trees", "primary_area": "", "supplementary_material": "", "author": "Raymond Li;Gabriel Murray;Giuseppe Carenini", "authorids": "~Raymond_Li2;~Gabriel_Murray2;~Giuseppe_Carenini2", "gender": ";M;M", "homepage": ";https://gabrielmurray.ca/;https://www.cs.ubc.ca/~carenini/", "dblp": ";69/4961.html;", "google_scholar": "https://scholar.google.ca/citations?user=NrxW5xwAAAAJ;8dzFxBkAAAAJ;", "or_profile": "~Raymond_Li2;~Gabriel_Murray2;~Giuseppe_Carenini2", "aff": "University of British Columbia;University of the Fraser Valley;, University of British Columbia", "aff_domain": "cs.ubc.ca;ufv.ca;cs.ubc.ca", "position": "PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nli2023mixtureoflinguisticexperts,\ntitle={Mixture-of-Linguistic-Experts Adapters for Improving and Interpreting Pre-trained Language Models},\nauthor={Raymond Li and Gabriel Murray and Giuseppe Carenini},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Q9BLbN1p6h}\n}", "github": "", "project": "", "reviewers": "otyM;bbmx;y8mp", "site": "https://openreview.net/forum?id=Q9BLbN1p6h", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;5;3", "excitement": "2;4;3", "reproducibility": "4;5;4", "correctness": "2;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";murraygabriel/;", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of British Columbia;University of the Fraser Valley", "aff_unique_dep": ";", "aff_unique_url": "https://www.ubc.ca;https://www.ufv.ca", "aff_unique_abbr": "UBC;UFV", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "id": "QA1jlb1VG7", "title": "CITB: A Benchmark for Continual Instruction Tuning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Continual learning (CL) is a paradigm that aims to replicate the human ability to learn and accumulate knowledge continually without forgetting previous knowledge and transferring it to new tasks. Recent instruction tuning (IT) involves fine-tuning models to make them more adaptable to solving NLP tasks in general. However, it is still uncertain how instruction tuning works in the context of CL tasks. This challenging yet practical problem is formulated as Continual Instruction Tuning (CIT). In this work, we establish a CIT benchmark consisting of learning and evaluation protocols. We curate two long dialogue task streams of different types, InstrDialog and InstrDialog++, to study various CL methods systematically. Our experiments show that existing CL methods do not effectively leverage the rich natural language instructions, and fine-tuning an instruction-tuned model sequentially can yield similar or better results. We further explore different aspects that might affect the learning of CIT. We hope this benchmark will facilitate more research in this direction.", "keywords": "Instruction tuning;continual learning;benchmark;evaluation", "primary_area": "", "supplementary_material": "", "author": "Zihan Zhang;Meng Fang;Ling Chen;Mohammad Reza Namazi Rad", "authorids": "~Zihan_Zhang3;~Meng_Fang1;~Ling_Chen5;~Mohammad_Reza_Namazi_Rad2", "gender": "M;M;F;M", "homepage": "https://zhangzihangit.github.io/;;https://profiles.uts.edu.au/Ling.Chen;https://www.linkedin.com/in/mo-namazi/", "dblp": ";67/463;17/1237-6;", "google_scholar": "https://scholar.google.com.au/citations?hl=en;IcNYP1oAAAAJ;https://scholar.google.com.au/citations?user=L5aYWQcAAAAJ;https://scholar.google.com.au/citations?user=uoGBVTYAAAAJ", "or_profile": "~Zihan_Zhang3;~Meng_Fang1;~Ling_Chen5;~Mohammad_Reza_Namazi_Rad2", "aff": "University of Technology Sydney;Eindhoven University of Technology;University of Technology Sydney;", "aff_domain": "uts.edu.au;tue.nl;uts.edu.au;", "position": "PhD student;Assistant Professor;Full Professor;", "bibtex": "@inproceedings{\nzhang2023citb,\ntitle={{CITB}: A Benchmark for Continual Instruction Tuning},\nauthor={Zihan Zhang and Meng Fang and Ling Chen and Mohammad Reza Namazi Rad},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=QA1jlb1VG7}\n}", "github": "", "project": "", "reviewers": "eWi8;y4HB;Nb3H;okNW", "site": "https://openreview.net/forum?id=QA1jlb1VG7", "pdf_size": 0, "rating": "2;2;2;2", "confidence": "3;5;3;4", "excitement": "3;4;3;3", "reproducibility": "4;4;3;4", "correctness": "3;4;3;3", "rating_avg": 2.0, "confidence_avg": 3.75, "excitement_avg": 3.25, "reproducibility_avg": 3.75, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-6468-5729;", "linkedin": "zihan-zhang-a40855172/;;;", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Technology Sydney;Eindhoven University of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.uts.edu.au;https://www.tue.nl", "aff_unique_abbr": "UTS;TU/e", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Australia;Netherlands" }, { "id": "QAT5suGpNL", "title": "Breaking the Language Barrier: Improving Cross-Lingual Reasoning with Structured Self-Attention", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In this work, we study whether multilingual language models (MultiLMs) can transfer logical reasoning abilities to other languages when they are fine-tuned for reasoning in a different language. We evaluate the cross-lingual reasoning abilities of MultiLMs in two schemes: (1) where the language of the context and the question remain the same in the new languages that are tested (i.e., the reasoning is still monolingual, but the model must transfer the learned reasoning ability across languages), and (2) where the language of the context and the question is different (which we term code-switched reasoning). On two logical reasoning datasets, RuleTaker and LeapOfThought, we demonstrate that although MultiLMs can transfer reasoning ability across languages in a monolingual setting, they struggle to transfer reasoning abilities in a code-switched setting. Following this observation, we propose a novel attention mechanism that uses a dedicated set of parameters to encourage cross-lingual attention in code-switched sequences, which improves the reasoning performance by up to 14% and 4% on the RuleTaker and LeapOfThought datasets, respectively.", "keywords": "Cross-lingual;Multilingual;Reasoning", "primary_area": "", "supplementary_material": "", "author": "Negar Foroutan;Mohammadreza Banaei;Karl Aberer;Antoine Bosselut", "authorids": "~Negar_Foroutan1;~Mohammadreza_Banaei1;~Karl_Aberer1;~Antoine_Bosselut1", "gender": "F;M;;M", "homepage": "http://negar.foroutan.info/;https://people.epfl.ch/mohammadreza.banaei?lang=en;https://people.epfl.ch/karl.aberer;https://atcbosselut.github.io/", "dblp": "174/4070;266/9645;a/KarlAberer;184/3742", "google_scholar": "jHeHoScAAAAJ;;;XD9hkJwAAAAJ", "or_profile": "~Negar_Foroutan1;~Mohammadreza_Banaei1;~Karl_Aberer1;~Antoine_Bosselut1", "aff": "School of Computer and Communication Sciences, EPFL - EPF Lausanne;EPFL - EPF Lausanne;School of Computer and Communication Sciences, EPFL - EPF Lausanne;Swiss Federal Institute of Technology Lausanne", "aff_domain": "ic.epfl.ch;epfl.ch;ic.epfl.ch;epfl.ch", "position": "PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nforoutan2023breaking,\ntitle={Breaking the Language Barrier: Improving Cross-Lingual Reasoning with Structured Self-Attention},\nauthor={Negar Foroutan and Mohammadreza Banaei and Karl Aberer and Antoine Bosselut},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=QAT5suGpNL}\n}", "github": "", "project": "", "reviewers": "ocuD;SPqx;Gdvn", "site": "https://openreview.net/forum?id=QAT5suGpNL", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "3;4;4", "reproducibility": "4;4;4", "correctness": "3;4;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";mohammadreza-banaei-45638773/?originalSubdomain=ch;;", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "EPFL;Swiss Federal Institute of Technology Lausanne", "aff_unique_dep": "School of Computer and Communication Sciences;", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch", "aff_unique_abbr": "EPFL;EPFL", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "id": "QAZ2QV8SqN", "title": "KG-GPT: A General Framework for Reasoning on Knowledge Graphs Using Large Language Models", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "While large language models (LLMs) have made considerable advancements in understanding and generating unstructured text, their application in structured data remains underexplored. Particularly, using LLMs for complex reasoning tasks on knowledge graphs (KGs) remains largely untouched. To address this, we propose KG-GPT, a multi-purpose framework leveraging LLMs for tasks employing KGs. KG-GPT comprises three steps: Sentence Segmentation, Graph Retrieval, and Inference, each aimed at partitioning sentences, retrieving relevant graph components, and deriving logical conclusions, respectively. We evaluate KG-GPT using KG-based fact verification and KGQA benchmarks, with the model showing competitive and robust performance, even outperforming several fully-supervised models. Our work, therefore, marks a significant step in unifying structured and unstructured data processing within the realm of LLMs.", "keywords": "Large Language Model;Knowledge Graph;Reasoning;Question Answering;Fact Verification", "primary_area": "", "supplementary_material": "", "author": "Jiho Kim;Yeonsu Kwon;Yohan Jo;Edward Choi", "authorids": "~Jiho_Kim1;~Yeonsu_Kwon1;~Yohan_Jo1;~Edward_Choi1", "gender": ";;;M", "homepage": ";https://sites.google.com/view/yeonsukwon;https://yohanjo.github.io/;http://mp2893.com", "dblp": ";;40/8877;41/3886", "google_scholar": "https://scholar.google.com/citations?hl=en;;xp3LGRQAAAAJ;GUlGIPkAAAAJ", "or_profile": "~Jiho_Kim1;~Yeonsu_Kwon1;~Yohan_Jo1;~Edward_Choi1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Amazon;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.edu;amazon.com;kaist.ac.kr", "position": "PhD student;MS student;Applied Scientist;Associate Professor", "bibtex": "@inproceedings{\nkim2023kggpt,\ntitle={{KG}-{GPT}: A General Framework for Reasoning on Knowledge Graphs Using Large Language Models},\nauthor={Jiho Kim and Yeonsu Kwon and Yohan Jo and Edward Choi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=QAZ2QV8SqN}\n}", "github": "", "project": "", "reviewers": "Z28W;PVWa;USSs", "site": "https://openreview.net/forum?id=QAZ2QV8SqN", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;3;3", "reproducibility": "3;4;2", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.kaist.ac.kr;https://www.amazon.com", "aff_unique_abbr": "KAIST;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "South Korea;United States" }, { "id": "QEf1MyZGZu", "title": "Target-Aware Spatio-Temporal Reasoning via Answering Questions in Dynamic Audio-Visual Scenarios", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Audio-visual question answering (AVQA) is a challenging task that requires multistep spatio-temporal reasoning over multimodal contexts. Recent works rely on elaborate target-agnostic parsing of audio-visual scenes for spatial grounding while mistreating audio and video as separate entities for temporal grounding. This paper proposes a new target-aware joint spatio-temporal grounding network for AVQA. It consists of two key components: the target-aware spatial grounding module (TSG) and the single-stream joint audio-visual temporal grounding module (JTG). The TSG can focus on audio-visual cues relevant to the query subject by utilizing explicit semantics from the question. Unlike previous two-stream temporal grounding modules that required an additional audio-visual fusion module, JTG incorporates audio-visual fusion and question-aware temporal grounding into one module with a simpler single-stream architecture. The temporal synchronization between audio and video in the JTG is facilitated by our proposed cross-modal synchrony loss (CSL). Extensive experiments verified the effectiveness of our proposed method over existing state-of-the-art methods.", "keywords": "audio-visual question answering;spatio-temporal reasoning;multimodal learning;video question answering", "primary_area": "", "supplementary_material": "", "author": "Yuanyuan Jiang;Jianqin Yin", "authorids": "~Yuanyuan_Jiang4;~Jianqin_Yin1", "gender": "F;F", "homepage": ";https://teacher.bupt.edu.cn/yinjianqin/zh_CN/index.htm", "dblp": ";21/2631.html", "google_scholar": "Q88Bf_0AAAAJ;QK5K52MAAAAJ", "or_profile": "~Yuanyuan_Jiang4;~Jianqin_Yin1", "aff": "Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications", "aff_domain": "bupt.edu.cn;bupt.edu.cn", "position": "MS student;Full Professor", "bibtex": "@inproceedings{\njiang2023targetaware,\ntitle={Target-Aware Spatio-Temporal Reasoning via Answering Questions in Dynamic Audio-Visual Scenarios},\nauthor={Yuanyuan Jiang and Jianqin Yin},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=QEf1MyZGZu}\n}", "github": "", "project": "", "reviewers": "dLRt;ctyk;jPEZ;gs1d", "site": "https://openreview.net/forum?id=QEf1MyZGZu", "pdf_size": 0, "rating": "2;2;2;2", "confidence": "4;3;4;3", "excitement": "3;3;1;3", "reproducibility": "4;2;1;3", "correctness": "3;3;1;3", "rating_avg": 2.0, "confidence_avg": 3.5, "excitement_avg": 2.5, "reproducibility_avg": 2.5, "correctness_avg": 2.5, "replies_avg": 14, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-3494-3117;0000-0002-1595-2499", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications", "aff_unique_dep": "", "aff_unique_url": "http://www.bupt.edu.cn/", "aff_unique_abbr": "BUPT", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Beijing", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "QG4BWnsX6m", "title": "Multilingual Lottery Tickets to Pretrain Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The curse of multilinguality in training multilingual pretrained language models (mPLMs) refers to the negative interference between languages, especially when the capacity is limited. While increasing the capacity may appear intuitive for overcoming this curse,\nit negatively affects both training and inference costs. Our distinction is pursuing the competing goals of reducing negative interference, while keeping capacity per each language more or less the same. Specifically, we first scale the model to reduce interference, then search for a per-language subnetwork, or a lottery ticket, with comparable performance to the full model. According to lottery ticket hypothesis, this scale-then-find-ticket approach alleviates interfering signals as in the scaled model, but redistributes parameters to keep the parameters reduced. Finally, to avoid the cost of multiple retraining for searching multilingual tickets, we explore zero-shot neural architecture search (NAS) methods. We investigate the most appropriate zero-shot NAS method to find multilingual tickets. Our proposed multilingual tickets reduce the inference cost of models for each languages, while boosting the performances. The ticket search cost is negligible and tickets found qualitatively preserve linguistic similarity. Our code is publicly available.", "keywords": "lottery ticket hypothesis;negative interference;zero-shot neural architecture search;multilingual pretrained language model", "primary_area": "", "supplementary_material": "", "author": "Jaeseong Lee;seung-won hwang", "authorids": "~Jaeseong_Lee1;~seung-won_hwang2", "gender": ";", "homepage": ";http://seungwonh.github.io", "dblp": "141/9456-2;h/SeungwonHwang", "google_scholar": ";63bBmc3mYrAC", "or_profile": "~Jaeseong_Lee1;~seung-won_hwang2", "aff": "Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nlee2023multilingual,\ntitle={Multilingual Lottery Tickets to Pretrain Language Models},\nauthor={Jaeseong Lee and seung-won hwang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=QG4BWnsX6m}\n}", "github": "", "project": "", "reviewers": "kJa9;spkU;TbBM", "site": "https://openreview.net/forum?id=QG4BWnsX6m", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;1;2", "excitement": "3;5;4", "reproducibility": "4;5;3", "correctness": "3;5;3", "rating_avg": 3.0, "confidence_avg": 2.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "QH19wfJrX1", "title": "mLongT5: A Multilingual and Efficient Text-To-Text Transformer for Longer Sequences", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "We present our work on developing a multilingual, efficient text-to-text transformer that is suitable for handling long inputs. This model, called mLongT5, builds upon the architecture of LongT5, while leveraging the multilingual datasets used for pretraining mT5 and the pretraining tasks of UL2. We evaluate this model on a variety of multilingual summarization and question-answering tasks, and the results show stronger performance for mLongT5 when compared to existing multilingual models such as mBART or M-BERT.", "keywords": "Multilinguality;Efficient model;Long inputs", "primary_area": "", "supplementary_material": "", "author": "David Uthus;Santiago Ontanon;Joshua Ainslie;Mandy Guo", "authorids": "~David_Uthus1;~Santiago_Ontanon1;~Joshua_Ainslie1;~Mandy_Guo2", "gender": ";;;F", "homepage": ";https://sites.google.com/site/santiagoontanonvillar/;;", "dblp": "09/2971.html;https://dblp.org/pers/o/Onta=ntilde==oacute=n:Santiago.html;263/3363;", "google_scholar": "9k31iVQAAAAJ;aS-DrOwAAAAJ;;qOiCKewAAAAJ", "or_profile": "~David_Uthus1;~Santiago_Ontanon1;~Joshua_Ainslie1;~Xiaoyue_Guo1", "aff": "Google;Drexel University;Google;", "aff_domain": "google.com;drexel.edu;google.com;", "position": "Software Engineer;Associate Professor;Software Engineer;", "bibtex": "@inproceedings{\nuthus2023mlongt,\ntitle={mLongT5: A Multilingual and Efficient Text-To-Text Transformer for Longer Sequences},\nauthor={David Uthus and Santiago Ontanon and Joshua Ainslie and Mandy Guo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=QH19wfJrX1}\n}", "github": "", "project": "", "reviewers": "bAoP;5oDq;NgeM", "site": "https://openreview.net/forum?id=QH19wfJrX1", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;3;3", "reproducibility": "4;5;3", "correctness": "4;4;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Google;Drexel University", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.drexel.edu", "aff_unique_abbr": "Google;Drexel", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "QH4EMvwF8I", "title": "Query2doc: Query Expansion with Large Language Models", "track": "main", "status": "Short Main", "tldr": "", "abstract": "This paper introduces a simple yet effective query expansion approach, denoted as query2doc, to improve both sparse and dense retrieval systems. The proposed method first generates pseudo-documents by few-shot prompting large language models (LLMs), and then expands the query with generated pseudo documents. LLMs are trained on web-scale text corpora and are adept at knowledge memorization. The pseudo-documents from LLMs often contain highly relevant information that can aid in query disambiguation and guide the retrievers. Experimental results demonstrate that query2doc boosts the performance of BM25 by 3% to 15% on ad-hoc IR datasets, such as MS-MARCO and TREC DL, without any model fine-tuning. Furthermore, our method also benefits state-of-the-art dense retrievers in terms of both in-domain and out-of-domain results.", "keywords": "query expansion;large language models;information retrieval", "primary_area": "", "supplementary_material": "", "author": "Liang Wang;Nan Yang;Furu Wei", "authorids": "~Liang_Wang2;~Nan_Yang5;~Furu_Wei1", "gender": "M;;M", "homepage": "https://github.com/intfloat;;https://www.microsoft.com/en-us/research/people/fuwei/", "dblp": "56/4499;https://dblp.uni-trier.de/pers/hd/y/Yang_0002:Nan;72/5870", "google_scholar": "NfJbKJ4AAAAJ;InAQ3o0AAAAJ;G-V1VpwAAAAJ", "or_profile": "~Liang_Wang2;~Nan_Yang5;~Furu_Wei1", "aff": "Microsoft Research;Microsoft Research Asia;Microsoft Research", "aff_domain": "microsoft.com;microsoft.com;microsoft.com", "position": "Researcher;Researcher;Distinguished Scientist", "bibtex": "@inproceedings{\nwang2023querydoc,\ntitle={Query2doc: Query Expansion with Large Language Models},\nauthor={Liang Wang and Nan Yang and Furu Wei},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=QH4EMvwF8I}\n}", "github": "", "project": "", "reviewers": "VHFs;hWUX;dbnL", "site": "https://openreview.net/forum?id=QH4EMvwF8I", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;4;4", "reproducibility": "3;4;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-4664-7136;;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Research", "aff_unique_url": "https://www.microsoft.com/en-us/research", "aff_unique_abbr": "MSR", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;China" }, { "id": "QMCjppVJbB", "title": "SEAHORSE: A Multilingual, Multifaceted Dataset for Summarization Evaluation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Reliable automatic evaluation of summarization systems is challenging due to the multifaceted and subjective nature of the task. This is especially the case for languages other than English, where human evaluations are scarce. In this work, we introduce SEAHORSE, a dataset for multilingual, multifaceted summarization evaluation. SEAHORSE consists of 96K summaries with human ratings along 6 dimensions of text quality: comprehensibility, repetition, grammar, attribution, main ideas, and conciseness, covering 6 languages, 9 systems, and 4 datasets. As a result of its size and scope, SEAHORSE can serve both as a benchmark to evaluate learnt metrics, as well as a large-scale resource for training such metrics. We show that metrics trained with SEAHORSE achieve strong performance on the out-of-domain meta-evaluation benchmarks TRUE (Honovich et al., 2022) and mFACE (Aharoni et al., 2022). We make the SEAHORSE dataset and metrics publicly available for future research on multilingual and multifaceted summarization evaluation.", "keywords": "summarization;evaluation;multilingual;human evaluation;automatic metrics;NLG", "primary_area": "", "supplementary_material": "", "author": "Elizabeth Clark;Shruti Rijhwani;Sebastian Gehrmann;Joshua Maynez;Roee Aharoni;Vitaly Nikolaev;Thibault Sellam;Aditya Siddhant;Dipanjan Das;Ankur P Parikh", "authorids": "~Elizabeth_Clark2;~Shruti_Rijhwani1;~Sebastian_Gehrmann1;~Joshua_Maynez1;~Roee_Aharoni1;~Vitaly_Nikolaev1;~Thibault_Sellam2;~Aditya_Siddhant1;~Dipanjan_Das1;~Ankur_P_Parikh1", "gender": ";;M;M;M;;M;M;M;M", "homepage": "https://eaclark07.github.io/;https://shrutirij.github.io;https://sebastiangehrmann.com;;http://www.roeeaharoni.com;;http://sellam.me;;http://www.dipanjandas.com;", "dblp": "148/6935;188/9080;131/1378;220/3863;148/9506;;14/11515;211/7727;90/3182-1;80/8411", "google_scholar": ";_MQ_lNgAAAAJ;R401sNwAAAAJ;ZOYd-0oAAAAJ;https://scholar.google.co.il/citations?user=wV0mHWgAAAAJ;m2UQEwwAAAAJ;1UDO7B4AAAAJ;YMaEuzoAAAAJ;Xlv5PDYAAAAJ;bRpjhycAAAAJ", "or_profile": "~Elizabeth_Clark2;~Shruti_Rijhwani1;~Sebastian_Gehrmann1;~Joshua_Maynez1;~Roee_Aharoni1;~Vitaly_Nikolaev1;~Thibault_Sellam2;~Aditya_Siddhant1;~Dipanjan_Das1;~Ankur_P_Parikh1", "aff": "Google;Google DeepMind;Bloomberg;Google;Google;Research, Google;Google;Google;Google Deepmind;Google", "aff_domain": "google.com;google.com;bloomberg.com;google.com;google.com;research.google.com;google.com;google.com;google.com;google.com", "position": "Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Research Engineer;Researcher;Research Scientist", "bibtex": "@inproceedings{\nclark2023seahorse,\ntitle={{SEAHORSE}: A Multilingual, Multifaceted Dataset for Summarization Evaluation},\nauthor={Elizabeth Clark and Shruti Rijhwani and Sebastian Gehrmann and Joshua Maynez and Roee Aharoni and Vitaly Nikolaev and Thibault Sellam and Aditya Siddhant and Dipanjan Das and Ankur P Parikh},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=QMCjppVJbB}\n}", "github": "", "project": "", "reviewers": "SFUF;8cDd;A7j7", "site": "https://openreview.net/forum?id=QMCjppVJbB", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;4;4", "reproducibility": "5;3;3", "correctness": "5;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 10, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;;;", "linkedin": ";;;;roeeaharoni;;;asiddhant/;dipdas;ankur-parikh-2a240979", "aff_unique_index": "0;0;1;0;0;0;0;0;2;0", "aff_unique_norm": "Google;Bloomberg;DeepMind", "aff_unique_dep": "Google;;DeepMind", "aff_unique_url": "https://www.google.com;https://www.bloomberg.com;https://deepmind.com", "aff_unique_abbr": "Google;Bloomberg;DeepMind", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;0;0;0;0;0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "QPP8wNMBBk", "title": "Unsupervised Lexical Simplification with Context Augmentation", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "We propose a new unsupervised lexical simplification method that uses only monolingual data and pre-trained language models. Given a target word and its context, our method generates substitutes based on the target context and also additional contexts sampled from monolingual data. We conduct experiments in English, Portuguese, and Spanish on the TSAR-2022 shared task, and show that our model substantially outperforms other unsupervised systems across all languages. We also establish a new state-of-the-art by ensembling our model with GPT-3.5. Lastly, we evaluate our model on the SWORDS lexical substitution data set, achieving a state-of-the-art result.", "keywords": "lexical simplification;lexical substitution;lexical semantics;unsupervised", "primary_area": "", "supplementary_material": "", "author": "Takashi Wada;Timothy Baldwin;Jey Han Lau", "authorids": "~Takashi_Wada1;~Timothy_Baldwin1;~Jey_Han_Lau2", "gender": "M;;", "homepage": "https://twadada.github.io/;https://eltimster.github.io/www/;https://jeyhan.my/", "dblp": "92/5752-1;65/4863;32/9014.html", "google_scholar": "RLwFtDsAAAAJ;wjBD1dkAAAAJ;https://scholar.google.com.au/citations?user=MFi65f4AAAAJ", "or_profile": "~Takashi_Wada1;~Timothy_Baldwin1;~Jey_Han_Lau2", "aff": "Mohamed bin Zayed University of Artificial Intelligence;The University of Melbourne;The University of Melbourne", "aff_domain": "mbzuai.ac.ae;unimelb.edu.au;unimelb.edu.au", "position": "Researcher;Full Professor;Senior Lecturer", "bibtex": "@inproceedings{\nwada2023unsupervised,\ntitle={Unsupervised Lexical Simplification with Context Augmentation},\nauthor={Takashi Wada and Timothy Baldwin and Jey Han Lau},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=QPP8wNMBBk}\n}", "github": "", "project": "", "reviewers": "LjvW;iLSG;vBu5", "site": "https://openreview.net/forum?id=QPP8wNMBBk", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "3;4;4", "reproducibility": "3;4;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-4525-6950;0000-0002-1647-4628", "linkedin": ";;", "aff_unique_index": "0;1;1", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence;University of Melbourne", "aff_unique_dep": ";", "aff_unique_url": "https://mbzuai.ac.ae;https://www.unimelb.edu.au", "aff_unique_abbr": "MBZUAI;UniMelb", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United Arab Emirates;Australia" }, { "id": "QV79qiKAjD", "title": "On the Benefits of Learning to Route in Mixture-of-Experts Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Mixture-of-Expert (MoE) Transformer models, such as the Switch Transformer, allow us to successfully scale up model sizes while keeping the amount of compute time fixed. Prior work has established the computational efficiency benefits of using these models. A core component of these models is a router that routes input tokens to different experts in a layer. We show theoretical and empirical evidence that the router's ability to route tokens intelligently confers a significant advantage to MoE models. We study synthetic settings where the input data is distributed in clusters and show theoretically and empirically that the router learns to route the inputs according to these clusters. Then we perform experiments on real data using the T5X library, where we observe that a trainable router confers a non-trivial benefit instead of a non-trainable router.", "keywords": "mixture-of-experts;transformer;router;efficiency;conditional compute;sparsely activated models;theory", "primary_area": "", "supplementary_material": "", "author": "Nishanth Dikkala;Nikhil Ghosh;Raghu Meka;Rina Panigrahy;Nikhil Vyas;Xin Wang", "authorids": "~Nishanth_Dikkala1;~Nikhil_Ghosh1;~Raghu_Meka1;~Rina_Panigrahy1;~Nikhil_Vyas1;~Xin_Wang30", "gender": "M;M;M;;M;M", "homepage": "http://people.csail.mit.edu/nishanthd/;;http://raghumeka.org;;https://nikhilvyas.github.io/;", "dblp": "138/8092;251/8779;76/1906;p/RinaPanigrahy;176/1074;", "google_scholar": "CMZoOTIAAAAJ;0Fv4bikAAAAJ;xuDZ9-sAAAAJ;;;7BjA8ccAAAAJ", "or_profile": "~Nishanth_Dikkala1;~Nikhil_Ghosh1;~Raghu_Meka1;~Rina_Panigrahy1;~Nikhil_Vyas1;~Xin_Wang30", "aff": "Google;University of California, Berkeley;University of California, Los Angeles;Google;Harvard University;Google", "aff_domain": "google.com;berkeley.edu;ucla.edu;google.com;harvard.edu;google.com", "position": "Google Research;PhD student;Associate Professor;Research Scientist;Postdoc;Software Engineer", "bibtex": "@inproceedings{\ndikkala2023on,\ntitle={On the Benefits of Learning to Route in Mixture-of-Experts Models},\nauthor={Nishanth Dikkala and Nikhil Ghosh and Raghu Meka and Rina Panigrahy and Nikhil Vyas and Xin Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=QV79qiKAjD}\n}", "github": "", "project": "", "reviewers": "qHJT;iWp2;6A1b", "site": "https://openreview.net/forum?id=QV79qiKAjD", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;2", "excitement": "3;3;3", "reproducibility": "4;4;3", "correctness": "3;4;3", "rating_avg": 5.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";nikhil-ghosh-03389199/;;;;", "aff_unique_index": "0;1;2;0;3;0", "aff_unique_norm": "Google;University of California, Berkeley;University of California, Los Angeles;Harvard University", "aff_unique_dep": "Google;;;", "aff_unique_url": "https://www.google.com;https://www.berkeley.edu;https://www.ucla.edu;https://www.harvard.edu", "aff_unique_abbr": "Google;UC Berkeley;UCLA;Harvard", "aff_campus_unique_index": "0;1;2;0;0", "aff_campus_unique": "Mountain View;Berkeley;Los Angeles;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "QVnlBmGrWS", "title": "ORCHID: A Chinese Debate Corpus for Target-Independent Stance Detection and Argumentative Dialogue Summarization", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Dialogue agents have been receiving increasing attention for years, and this trend has been further boosted by the recent progress of large language models (LLMs). Stance detection and dialogue summarization are two core tasks of dialogue agents in application scenarios that involve argumentative dialogues. However, research on these tasks is limited by the insufficiency of public datasets, especially for non-English languages. To address this language resource gap in Chinese, we present ORCHID (Oral Chinese Debate), the first Chinese dataset for benchmarking target-independent stance detection and debate summarization. Our dataset consists of 1,218 real-world debates that were conducted in Chinese on 476 unique topics, containing 2,436 stance-specific summaries and 14,133 fully annotated utterances. Besides providing a versatile testbed for future research, we also conduct an empirical study on the dataset and propose an integrated task. The results show the challenging nature of the dataset and suggest a potential of incorporating stance detection in summarization for argumentative dialogue.", "keywords": "dialogue dataset;long dialogue summarization;target-independent stance detection", "primary_area": "", "supplementary_material": "", "author": "Xiutian Zhao;Ke Wang;Wei Peng", "authorids": "~Xiutian_Zhao1;~Ke_Wang2;~Wei_Peng6", "gender": "M;M;M", "homepage": "https://xiutian.github.io;;https://www.rmit.edu.au/profiles/p/wei-peng3", "dblp": "362/7856;https://dblp.uni-trier.de/pid/181/2613.html;", "google_scholar": "HfOmKncAAAAJ;https://scholar.google.com/citations?hl=en;", "or_profile": "~Xiutian_Zhao1;~Ke_Wang2;~Wei_Peng6", "aff": ";Huawei Technologies Ltd.;Huawei Technologies Ltd.", "aff_domain": ";huawei.com;huawei.com", "position": ";Researcher;Principal Researcher", "bibtex": "@inproceedings{\nzhao2023orchid,\ntitle={{ORCHID}: A Chinese Debate Corpus for Target-Independent Stance Detection and Argumentative Dialogue Summarization},\nauthor={Xiutian Zhao and Ke Wang and Wei Peng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=QVnlBmGrWS}\n}", "github": "", "project": "", "reviewers": "rcmE;B1gR;LX9i", "site": "https://openreview.net/forum?id=QVnlBmGrWS", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;3;3", "reproducibility": "4;3;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-2300-0743;", "linkedin": ";;wei-peng-phd-in-ai-4515ba22/?originalSubdomain=au", "aff_unique_index": "0;0", "aff_unique_norm": "Huawei", "aff_unique_dep": "Huawei Technologies", "aff_unique_url": "https://www.huawei.com", "aff_unique_abbr": "Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "QVuVwt1QLh", "title": "Unifying Text, Tables, and Images for Multimodal Question Answering", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Multimodal question answering (MMQA), which aims to derive the answer from multiple knowledge modalities (e.g., text, tables, and images), has received increasing attention due to its board applications. Current approaches to MMQA often rely on single-modal or bi-modal QA models, which limits their ability to effectively integrate information across all modalities and leverage the power of pre-trained language models. To address these limitations, we propose a novel framework called UniMMQA, which unifies three different input modalities into a text-to-text format by employing position-enhanced table linearization and diversified image captioning techniques. Additionally, we enhance cross-modal reasoning by incorporating a multimodal rationale generator, which produces textual descriptions of cross-modal relations for adaptation into the text-to-text generation process. Experimental results on three MMQA benchmark datasets show the superiority of UniMMQA in both supervised and unsupervised settings.", "keywords": "Multimodal Question Answering;Modality Unification;Image Caption", "primary_area": "", "supplementary_material": "", "author": "Haohao Luo;Ying Shen;Yang Deng", "authorids": "~Haohao_Luo1;~Ying_Shen1;~Yang_Deng4", "gender": "M;F;M", "homepage": ";http://ise.sysu.edu.cn/teacher/teacher02/1371452.htm;https://dengyang17.github.io/", "dblp": "362/8696;01/8558-1;115/6282-2", "google_scholar": "nDL-qb8AAAAJ;rVpl7SIAAAAJ;https://scholar.google.com.hk/citations?user=OshWT3UAAAAJ", "or_profile": "~Haohao_Luo1;~Ying_Shen1;~Yang_Deng4", "aff": "SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY, Tsinghua University;The Chinese University of Hong Kong", "aff_domain": "sysu.edu.cn;sysu.edu.cn;cuhk.edu.hk", "position": "Undergrad student;Associate Professor;PhD student", "bibtex": "@inproceedings{\nluo2023unifying,\ntitle={Unifying Text, Tables, and Images for Multimodal Question Answering},\nauthor={Haohao Luo and Ying Shen and Yang Deng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=QVuVwt1QLh}\n}", "github": "", "project": "", "reviewers": "smpP;htMw;T6GB", "site": "https://openreview.net/forum?id=QVuVwt1QLh", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "2;3;4", "reproducibility": "3;3;4", "correctness": "2;3;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-3220-904X;", "linkedin": ";;", "aff_unique_index": "0;0;1", "aff_unique_norm": "Sun Yat-sen University;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "http://www.sysu.edu.cn;https://www.cuhk.edu.hk", "aff_unique_abbr": "SYSU;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "QYSPlIZ6bV", "title": "TalkUp: Paving the Way for Understanding Empowering Language", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Empowering language is important in many real-world contexts, from education to workplace dynamics to healthcare. Though language technologies are growing more prevalent in these contexts, empowerment has seldom been studied in NLP, and moreover, it is inherently challenging to operationalize because of its implicit nature. This work builds from linguistic and social psychology literature to explore what characterizes empowering language. We then crowdsource a novel dataset of Reddit posts labeled for empowerment, reasons why these posts are empowering to readers, and the social relationships between posters and readers. Our preliminary analyses show that this dataset, which we call TalkUp, can be used to train language models that capture empowering and disempowering language. More broadly, TalkUp provides an avenue to explore implication, presuppositions, and how social context influences the meaning of language.", "keywords": "natural language processing;empowerment", "primary_area": "", "supplementary_material": "", "author": "Lucille Njoo;Chan Young Park;Octavia Stappart;Marvin Thielk;Yi Chu;Yulia Tsvetkov", "authorids": "~Lucille_Njoo1;~Chan_Young_Park1;~Octavia_Stappart1;~Marvin_Thielk1;~Yi_Chu1;~Yulia_Tsvetkov1", "gender": "F;F;F;;F;F", "homepage": ";https://chan0park.github.io;;https://neuralcoder.science/;;https://homes.cs.washington.edu/~yuliats/", "dblp": ";15/480;;;86/6573;75/8157", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com/citations?hl=en;;;;SEDPkrsAAAAJ", "or_profile": "~Lucille_Njoo1;~Chan_Young_Park1;~Octavia_Stappart1;~Marvin_Thielk1;~Yi_Chu1;~Yulia_Tsvetkov1", "aff": "University of Washington;School of Computer Science, Carnegie Mellon University;Department of Computer Science;;;Department of Computer Science, University of Washington", "aff_domain": "uw.edu;cs.cmu.edu;cs.washington.edu;;;cs.washington.edu", "position": "PhD student;PhD student;Undergrad student;;;Assistant Professor", "bibtex": "@inproceedings{\nnjoo2023talkup,\ntitle={TalkUp: Paving the Way for Understanding Empowering Language},\nauthor={Lucille Njoo and Chan Young Park and Octavia Stappart and Marvin Thielk and Yi Chu and Yulia Tsvetkov},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=QYSPlIZ6bV}\n}", "github": "", "project": "", "reviewers": "PeP9;8biv;83gy", "site": "https://openreview.net/forum?id=QYSPlIZ6bV", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "3;4;3", "reproducibility": "4;3;5", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0002-4634-7128", "linkedin": ";;octavia-s/;;yi-chu-05a0a816/;", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Washington;Carnegie Mellon University;Unknown Institution", "aff_unique_dep": ";School of Computer Science;Department of Computer Science", "aff_unique_url": "https://www.washington.edu;https://www.cmu.edu;", "aff_unique_abbr": "UW;CMU;", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Pittsburgh;Seattle", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States;" }, { "id": "QYvFUlF19n", "title": "In-Context Learning Creates Task Vectors", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "In-context learning (ICL) in Large Language Models (LLMs) has emerged as a powerful new learning paradigm. However, its underlying mechanism is still not well understood.\nIn particular, it is challenging to map it to the \"standard\" machine learning framework, where one uses a training set $S$ to find a best-fitting function $f(x)$ in some hypothesis class. Here we make progress on this problem by showing that the functions learned by ICL often have a very simple structure: they correspond to the transformer LLM whose only inputs are the query $x$ and a single \"task vector\" calculated from the training set. Thus, ICL can be seen as compressing $S$ into a single task vector $\\boldsymbol{\\theta}(S)$ and then using this task vector to modulate the transformer to produce the output.\nWe support the above claim via comprehensive experiments across a range of models and tasks.", "keywords": "Large Language Models;In-Context Learning;Interpretability", "primary_area": "", "supplementary_material": "", "author": "Roee Hendel;Mor Geva;Amir Globerson", "authorids": "~Roee_Hendel1;~Mor_Geva1;~Amir_Globerson1", "gender": "M;F;M", "homepage": "https://roeehendel.github.io/;https://mega002.github.io/;http://www.cs.tau.ac.il/~gamir/", "dblp": ";203/9159;08/4162.html", "google_scholar": ";https://scholar.google.co.il/citations?user=GxpQbSkAAAAJ;https://scholar.google.com.tw/citations?user=5JserkUAAAAJ", "or_profile": "~Roee_Hendel1;~Mor_Geva1;~Amir_Globerson1", "aff": "Tel Aviv University;Google DeepMind;Tel Aviv University", "aff_domain": "tau.ac.il;google.com;tau.ac.il", "position": "MS student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nhendel2023incontext,\ntitle={In-Context Learning Creates Task Vectors},\nauthor={Roee Hendel and Mor Geva and Amir Globerson},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=QYvFUlF19n}\n}", "github": "", "project": "", "reviewers": "qoQT;PY9i;s9kR", "site": "https://openreview.net/forum?id=QYvFUlF19n", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;3;3", "excitement": "3;4;3", "reproducibility": "2;4;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";morgeva/;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Tel Aviv University;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.tau.ac.il;https://deepmind.com", "aff_unique_abbr": "TAU;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Israel;United Kingdom" }, { "id": "QdhjuI19nv", "title": "Compositional Generalization for Data-to-Text Generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Data-to-text generation involves transforming structured data, often represented as predicate-argument tuples, into coherent textual descriptions. Despite recent advances, systems still struggle when confronted with unseen combinations of predicates, producing unfaithful descriptions (e.g.,hallucinations or omissions). We refer to this issue as compositional generalisation, and it encouraged us to create a benchmark for assessing the performance of different approaches on this specific problem. Furthermore, we propose a novel model that addresses compositional generalization by clustering predicates into groups. Our model generates text in a sentence-by-sentence manner, relying on one cluster of predicates at a time. This approach significantly outperforms T5-baselines across all evaluation metrics. Notably, it achieved a 31% improvement over T5 in terms of a metric focused on maintaining faithfulness to the input.", "keywords": "Compositional Generalization;Data-to-Text Generation;Natural Language Generation;Clustering;Reinforcement Learning;Benchmark", "primary_area": "", "supplementary_material": "", "author": "Xinnuo Xu;Ivan Titov;Mirella Lapata", "authorids": "~Xinnuo_Xu1;~Ivan_Titov1;~Mirella_Lapata1", "gender": "F;;F", "homepage": ";http://ivan-titov.org;https://homepages.inf.ed.ac.uk/mlap/", "dblp": "211/7908;08/5391;59/6701", "google_scholar": "osgiI-AAAAAJ;https://scholar.google.nl/citations?user=FKUc3vsAAAAJ;j67B9Q4AAAAJ", "or_profile": "~Xinnuo_Xu1;~Ivan_Titov1;~Mirella_Lapata1", "aff": "University of Edinburgh, University of Edinburgh;University of Amsterdam;Edinburgh University, University of Edinburgh", "aff_domain": "ed.ac.uk;uva.nl;inf.ed.ac.uk", "position": "Postdoc;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nxu2023compositional,\ntitle={Compositional Generalization for Data-to-Text Generation},\nauthor={Xinnuo Xu and Ivan Titov and Mirella Lapata},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=QdhjuI19nv}\n}", "github": "", "project": "", "reviewers": "7idM;Yzid;GZ7d;1UpM", "site": "https://openreview.net/forum?id=QdhjuI19nv", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "2;4;2;4", "excitement": "3;4;3;3", "reproducibility": "4;4;3;5", "correctness": "4;4;3;4", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.25, "reproducibility_avg": 4.0, "correctness_avg": 3.75, "replies_avg": 13, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Edinburgh;University of Amsterdam", "aff_unique_dep": ";", "aff_unique_url": "https://www.ed.ac.uk;https://www.uva.nl", "aff_unique_abbr": "Edinburgh;UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;Netherlands" }, { "id": "QkCYv3TlGk", "title": "Non-parallel Accent Transfer based on Fine-grained Controllable Accent Modelling", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Existing accent transfer works rely on parallel data or speech recognition models. This paper focuses on the practical application of accent transfer and aims to implement accent transfer using non-parallel datasets. The study has encountered the challenge of speech representation disentanglement and modeling accents. In our accent modeling transfer framework, we manage to solve these problems by two proposed methods. First, we learn the suprasegmental information associated with tone to finely model the accents in terms of tone and rhythm. Second, we propose to use mutual information learning to disentangle the accent features and control the accent of the generated speech during the inference time. Experiments show that the proposed framework attains superior performance to the baseline models in terms of accentedness and audio quality.", "keywords": "Accent Transfer\uff0cFine-grained Controllable Accent Modelling\uff0cNon-parallel", "primary_area": "", "supplementary_material": "", "author": "linqin wang;Zhengtao Yu;Yuanzhang Yang;Shengxiang Gao;Cunli Mao;Yuxin Huang", "authorids": "~linqin_wang1;~Zhengtao_Yu2;~Yuanzhang_Yang1;~Shengxiang_Gao1;~Cunli_Mao1;~Yuxin_Huang6", "gender": "M;M;M;F;M;M", "homepage": "https://liip.kust.edu.cn/QTGL/CyxxView.do?xh=00322;http://rsc.kmust.edu.cn/info/1181/1081.htm;https://github.com/yyz845935161;https://xzy.kmust.edu.cn/info/1159/3051.htm;http://xzy.kmust.edu.cn/info/1127/1559.htm;https://xzy.kmust.edu.cn/info/1129/2020.htm", "dblp": ";03/6757;;47/10188.html;35/2229.html;", "google_scholar": ";;;;;", "or_profile": "~linqin_wang1;~Zhengtao_Yu2;~Yuanzhang_Yang1;~Shengxiang_Gao1;~Cunli_Mao1;~Yuxin_Huang6", "aff": "Kunmimg University of Science and Technology;Kunming University of Science and Technology;;Kunming University of Science and Technology;Kunmimg University of Science and Technology;Kunming university science and technology", "aff_domain": "kmust.edu.cn;kmust.edu.cn;;kust.edu;kmust.edu.cn;kust.edu.cn", "position": "PhD student;Full Professor;;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2023nonparallel,\ntitle={Non-parallel Accent Transfer based on Fine-grained Controllable Accent Modelling},\nauthor={linqin wang and Zhengtao Yu and Yuanzhang Yang and Shengxiang Gao and Cunli Mao and Yuxin Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=QkCYv3TlGk}\n}", "github": "", "project": "", "reviewers": "sM6F;4Wqo;hXtk", "site": "https://openreview.net/forum?id=QkCYv3TlGk", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;2;3", "excitement": "3;3;3", "reproducibility": "4;3;4", "correctness": "3;3;2", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-8952-8984;;;;0000-0003-1277-6212", "linkedin": ";;;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Kunming University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.kust.edu.cn", "aff_unique_abbr": "", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Kunming", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "QlY0TSxVIl", "title": "Revisiting Automated Topic Model Evaluation with Large Language Models", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Topic models help us make sense of large text collections. Automatically evaluating their output and determining the optimal number of topics are both longstanding challenges, with no effective automated solutions to date. This paper proposes using large language models (LLMs) for these tasks. We find that LLMs appropriately assess the resulting topics, correlating more strongly with human judgments than existing automated metrics. However, the setup of the evaluation task is crucial \u2014 LLMs perform better on coherence ratings of word sets than on intrustion detection. We find that LLMs can also assist us in guiding us towards a reasonable number of topics. In actual applications, topic models are typically used to answer a research question related to a collection of texts. We can incorporate this research question in the prompt to the LLM, which helps estimating the optimal number of topics.", "keywords": "topic model evaluation;interpretability;large language models;text clustering", "primary_area": "", "supplementary_material": "", "author": "Dominik Stammbach;Vil\u00e9m Zouhar;Alexander Hoyle;Mrinmaya Sachan;Elliott Ash", "authorids": "~Dominik_Stammbach1;~Vil\u00e9m_Zouhar1;~Alexander_Hoyle1;~Mrinmaya_Sachan3;~Elliott_Ash1", "gender": "M;Not Specified;M;;M", "homepage": "https://lawecon.ethz.ch/group/scientific-team/stammbach.html;https://vilda.net;https://alexanderhoyle.com;https://elliottash.com;https://sites.google.com/site/mrinsachan/", "dblp": "242/4666;254/1832;297/8769;271/7737;86/10440.html", "google_scholar": "J6RHVgYAAAAJ;2EUDwtkAAAAJ;NpK0IXgAAAAJ;o5uDfHMAAAAJ;Tpp9ZjoAAAAJ", "or_profile": "~Dominik_Stammbach1;~Vil\u00e9m_Zouhar1;~Alexander_Hoyle1;~Elliott_Ash1;~MRINMAYA_SACHAN2", "aff": "ETHZ - ETH Zurich;Amazon;University of Maryland, College Park;Swiss Federal Institute of Technology;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;amazon.com;umd.edu;ethz.ch;ethz.ch", "position": "PhD student;Intern;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nstammbach2023revisiting,\ntitle={Revisiting Automated Topic Model Evaluation with Large Language Models},\nauthor={Dominik Stammbach and Vil{\\'e}m Zouhar and Alexander Hoyle and Mrinmaya Sachan and Elliott Ash},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=QlY0TSxVIl}\n}", "github": "", "project": "", "reviewers": "2esg;aBQ5;sobM", "site": "https://openreview.net/forum?id=QlY0TSxVIl", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;2;3", "excitement": "3;4;4", "reproducibility": "4;4;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-1631-3020;;;0000-0002-6817-7529;", "linkedin": ";vil%C3%A9m-zouhar-192988288/;;;", "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "ETH Zurich;Amazon;University of Maryland;Swiss Federal Institute of Technology", "aff_unique_dep": ";Amazon.com, Inc.;;", "aff_unique_url": "https://www.ethz.ch;https://www.amazon.com;https://www/umd.edu;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;Amazon;UMD;ETH Zurich", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "Switzerland;United States" }, { "id": "QnXfnQ3MFe", "title": "Dynamic Low-rank Estimation for Transformer-based Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Matrix decomposition methods, such as Singular Value Decomposition (SVD) and its importance-weighted variants, have been widely used for compressing Transformer-based language models. While importance-weighted decomposition methods alleviate the strong assumption of equal importance for each parameter in SVD, they still rely on two fundamental assumptions: 1) unchanged importance distribution during further fine-tuning, 2) equal importance across weight matrices in different layers. \nFurthermore, these methods necessitate a well-trained task-specific model as the starting point and require additional fine-tuning after compression.\nIn this work, we proposed RankDyna, a matrix decomposition method that enables dynamic rank resource allocation among matrices across different layers during the training process.\nStarting from a general pre-trained model, RankDyna accomplishes the dual goals of compression and adaptation to the downstream task, all within a single round of fine-tuning.\nThe extensive evaluations demonstrate that RankDyna can outperform current SOTA methods under various parameter budget levels, and the advantage of RankDyna is further enhanced with higher compression rates.", "keywords": "low-rank estimation; matrix factorization;", "primary_area": "", "supplementary_material": "", "author": "Ting Hua;Xiao Li;Shangqian Gao;Yen-Chang Hsu;Yilin Shen;Hongxia Jin", "authorids": "~Ting_Hua1;~Xiao_Li8;~Shangqian_Gao1;~Yen-Chang_Hsu1;~Yilin_Shen1;~Hongxia_Jin1", "gender": ";;;M;M;", "homepage": ";https://heimine.github.io/;;;;", "dblp": ";66/2069-26.html;195/2523;172/1140;30/383;", "google_scholar": ";aAX0au8AAAAJ;9mNI83oAAAAJ;7QWAiigAAAAJ;9PSFMzAAAAAJ;", "or_profile": "~Ting_Hua1;~Xiao_Li8;~Shangqian_Gao1;~Yen-Chang_Hsu1;~Yilin_Shen1;~Hongxia_Jin1", "aff": ";University of Michigan;University of Pittsburgh;Samsung Research America;Samsung Research America;", "aff_domain": ";umich.edu;pitt.edu;samsung.com;gmail.com;", "position": ";PhD student;PhD student;Research Scientist;Principal Researcher;", "bibtex": "@inproceedings{\nhua2023dynamic,\ntitle={Dynamic Low-rank Estimation for Transformer-based Language Models},\nauthor={Ting Hua and Xiao Li and Shangqian Gao and Yen-Chang Hsu and Yilin Shen and Hongxia Jin},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=QnXfnQ3MFe}\n}", "github": "", "project": "", "reviewers": "YbZE;WKgh;ncwh;NNuk", "site": "https://openreview.net/forum?id=QnXfnQ3MFe", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;2;3;4", "excitement": "3;3;3;2", "reproducibility": "4;3;4;3", "correctness": "3;3;3;4", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 2.75, "reproducibility_avg": 3.5, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";;;yenchanghsu/;;", "aff_unique_index": "0;1;2;2", "aff_unique_norm": "University of Michigan;University of Pittsburgh;Samsung", "aff_unique_dep": ";;Samsung Research America", "aff_unique_url": "https://www.umich.edu;https://www.pitt.edu;https://www.samsung.com/us/careers/research/", "aff_unique_abbr": "UM;Pitt;SRA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "QoiOmXy3A7", "title": "Describe Me an Auklet: Generating Grounded Perceptual Category Descriptions", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Human speakers can generate descriptions of perceptual concepts, abstracted from the instance-level. Moreover, such descriptions can be used by other speakers to learn provisional representations of those concepts. Learning and using abstract perceptual concepts is under-investigated in the language-and-vision field. The problem is also highly relevant to the field of representation learning in multi-modal NLP. In this paper, we introduce a framework for testing category-level perceptual grounding in multi-modal language models. In particular, we train separate neural networks to **generate** and **interpret** descriptions of visual categories. We measure the *communicative success* of the two models with the zero-shot classification performance of the interpretation model, which we argue is an indicator of perceptual grounding. Using this framework, we compare the performance of *prototype*- and *exemplar*-based representations. Finally, we show that communicative success exposes performance issues in the generation model, not captured by traditional intrinsic NLG evaluation metrics, and argue that these issues stem from a failure to properly ground language in vision at the category level.", "keywords": "language-and-vision;grounding;zero-shot;cognitive theories of categorisation;natural language generation;natural language interpretation", "primary_area": "", "supplementary_material": "", "author": "Bill Noble;Nikolai Ilinykh", "authorids": "~Bill_Noble1;~Nikolai_Ilinykh1", "gender": ";M", "homepage": "https://winobes.github.io;", "dblp": ";229/2498", "google_scholar": ";xvVbd4EAAAAJ", "or_profile": "~Bill_Noble1;~Nikolai_Ilinykh1", "aff": "G\u00f6teborg University;G\u00f6teborg University", "aff_domain": "gu.se;gu.se", "position": "Researcher;PhD student", "bibtex": "@inproceedings{\nnoble2023describe,\ntitle={Describe Me an Auklet: Generating Grounded Perceptual Category Descriptions},\nauthor={Bill Noble and Nikolai Ilinykh},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=QoiOmXy3A7}\n}", "github": "", "project": "", "reviewers": "kZxL;cpaf;uNB9", "site": "https://openreview.net/forum?id=QoiOmXy3A7", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;3;2", "reproducibility": "4;4;4", "correctness": "2;2;2", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 2.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-8323-4149;0000-0001-9048-5467", "linkedin": ";xilini/", "aff_unique_index": "0;0", "aff_unique_norm": "University of Gothenburg", "aff_unique_dep": "", "aff_unique_url": "https://www.gu.se", "aff_unique_abbr": "GU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Sweden" }, { "id": "QtOybganmT", "title": "Enhancing Retrieval-Augmented Large Language Models with Iterative Retrieval-Generation Synergy", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Retrieval-augmented generation has raise extensive attention as it is promising to address the limitations of large language models including outdated knowledge and hallucinations.\nHowever, retrievers struggle to capture relevance, especially for queries with complex information needs.\nRecent work has proposed to improve relevance modeling by having large language models actively involved in retrieval, i.e., to guide retrieval with generation.\nIn this paper, we show that strong performance can be achieved by a method we call Iter-RetGen, which synergizes retrieval and generation in an iterative manner:\na model's response to a task input shows what might be needed to finish the task, and thus can serve as an informative context for retrieving more relevant knowledge which in turn helps generate a better response in another iteration.\nCompared with recent work which interleaves retrieval with generation when completing a single output, Iter-RetGen processes all retrieved knowledge as a whole and largely preserves the flexibility in generation without structural constraints.\nWe evaluate Iter-RetGen on multi-hop question answering, fact verification, and commonsense reasoning, and show that it can flexibly leverage parametric knowledge and non-parametric knowledge, and is superior to or competitive with state-of-the-art retrieval-augmented baselines while causing fewer overheads of retrieval and generation.\nWe can further improve performance via generation-augmented retrieval adaptation.", "keywords": "retrieval-augmented language model;prompting;retrieval", "primary_area": "", "supplementary_material": "", "author": "Zhihong Shao;Yeyun Gong;yelong shen;Minlie Huang;Nan Duan;Weizhu Chen", "authorids": "~Zhihong_Shao1;~Yeyun_Gong2;~yelong_shen1;~Minlie_Huang1;~Nan_Duan1;~Weizhu_Chen1", "gender": "M;M;;M;M;M", "homepage": ";;;http://coai.cs.tsinghua.edu.cn/hml;https://nanduan.github.io/;https://www.microsoft.com/en-us/research/people/wzchen/", "dblp": "247/5748;06/10400.html;;;;79/2536", "google_scholar": "PZy4HEIAAAAJ;piUkwMYAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;Qaa6OxIAAAAJ;LG_E-4EAAAAJ", "or_profile": "~Zhihong_Shao1;~Yeyun_Gong2;~yelong_shen1;~Minlie_Huang1;~Nan_Duan1;~Weizhu_Chen1", "aff": "Tsinghua University;Microsoft;;Tsinghua University;Microsoft Research Asia;Microsoft GenAI", "aff_domain": "tsinghua.edu.cn;microsoft.com;;tsinghua.edu.cn;microsoft.com;microsoft.com", "position": "PhD student;Researcher;;Full Professor;Principal Researcher;Vice President", "bibtex": "@inproceedings{\nshao2023enhancing,\ntitle={Enhancing Retrieval-Augmented Large Language Models with Iterative Retrieval-Generation Synergy},\nauthor={Zhihong Shao and Yeyun Gong and yelong shen and Minlie Huang and Nan Duan and Weizhu Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=QtOybganmT}\n}", "github": "", "project": "", "reviewers": "ij2Q;yJLF;vbix", "site": "https://openreview.net/forum?id=QtOybganmT", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;4;4", "excitement": "3;3;3", "reproducibility": "3;4;4", "correctness": "4;3;3", "rating_avg": 2.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";;;;;", "aff_unique_index": "0;1;0;1;1", "aff_unique_norm": "Tsinghua University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.microsoft.com", "aff_unique_abbr": "THU;Microsoft", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "China;United States" }, { "id": "QtyJZe9Sfz", "title": "When and Why Does Bias Mitigation Work?", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Neural models have been shown to exploit shallow surface features to perform language understanding tasks, rather than learning the deeper language understanding and reasoning skills that practitioners desire. Previous work has developed debiasing techniques to pressure models away from spurious features or artifacts in datasets, with the goal of having models instead learn useful, task-relevant representations. However, what do models actually learn as a result of such debiasing procedures? In this work, we evaluate three model debiasing strategies, and through a set of carefully designed tests we show how debiasing can actually increase the model's reliance on hidden biases, instead of learning robust features that help it solve a task. Further, we demonstrate how even debiasing models against all shallow features in a dataset may still not help models address NLP tasks. As a result, we suggest that debiasing existing models may not be sufficient for many language understanding tasks, and future work should consider new learning paradigms, to address complex challenges such as commonsense reasoning and inference.", "keywords": "debiasing;lexical biases;natural language understanding", "primary_area": "", "supplementary_material": "", "author": "Abhilasha Ravichander;Joe Stacey;Marek Rei", "authorids": "~Abhilasha_Ravichander2;~Joe_Stacey1;~Marek_Rei1", "gender": ";M;M", "homepage": "https://www.cs.cmu.edu/~aravicha/;;https://www.marekrei.com/", "dblp": "170/4795.html;263/2589;136/9233", "google_scholar": "6vLsKGsAAAAJ;QCHR82MAAAAJ;https://scholar.google.co.uk/citations?user=nNeD95EAAAAJ", "or_profile": "~Abhilasha_Ravichander2;~Joe_Stacey1;~Marek_Rei1", "aff": "Allen Institute for Artificial Intelligence;Apple;Imperial College London", "aff_domain": "allenai.org;apple.com;imperial.ac.uk", "position": "Postdoc;Intern;Associate Professor", "bibtex": "@inproceedings{\nravichander2023when,\ntitle={When and Why Does Bias Mitigation Work?},\nauthor={Abhilasha Ravichander and Joe Stacey and Marek Rei},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=QtyJZe9Sfz}\n}", "github": "", "project": "", "reviewers": "BTLE;FLhX;BvPV", "site": "https://openreview.net/forum?id=QtyJZe9Sfz", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;1", "excitement": "4;3;3", "reproducibility": "4;5;3", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "abhilasha-ravichander-57524958;joe-stacey-74572754/;marek-rei-2a462341/", "aff_unique_index": "0;1;2", "aff_unique_norm": "Allen Institute for Artificial Intelligence;Apple;Imperial College London", "aff_unique_dep": ";Apple Inc.;", "aff_unique_url": "https://allenai.org;https://www.apple.com;https://www.imperial.ac.uk", "aff_unique_abbr": "AI2;Apple;ICL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "Qu0OZXL29t", "title": "Centering the Margins: Outlier-Based Identification of Harmed Populations in Toxicity Detection", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The impact of AI models on marginalized communities has traditionally been measured by identifying performance differences between specified demographic subgroups. Though this approach aims to center vulnerable groups, it risks obscuring patterns of harm faced by intersectional subgroups or shared across multiple groups. To address this, we draw on theories of marginalization from disability studies and related disciplines, which state that people farther from the norm face greater adversity, to consider the \"margins\" in the domain of toxicity detection. We operationalize the \"margins\" of a dataset by employing outlier detection to identify text about people with demographic attributes distant from the \"norm\". We find that model performance is consistently worse for demographic outliers, with mean squared error (MSE) between outliers and non-outliers up to 70.4% worse across toxicity types. It is also worse for text outliers, with a MSE up to 68.4% higher for outliers than non-outliers. We also find text and demographic outliers to be particularly susceptible to errors in the classification of severe toxicity and identity attacks. Compared to analysis of disparities using traditional demographic breakdowns, we find that our outlier analysis frequently surfaces greater harms faced by a larger, more intersectional group, which suggests that outlier analysis is particularly beneficial for identifying harms against those groups.", "keywords": "AI fairness;AI ethics;toxicity detection;hate speech detection;outlier analysis;marginalization;harm measurement", "primary_area": "", "supplementary_material": "", "author": "Vyoma Raman;Eve Fleisig;Dan Klein", "authorids": "~Vyoma_Raman1;~Eve_Fleisig1;~Dan_Klein1", "gender": ";F;", "homepage": ";https://www.efleisig.com;http://people.eecs.berkeley.edu/~klein/", "dblp": ";276/0223;", "google_scholar": ";NHlxXzwAAAAJ;", "or_profile": "~Vyoma_Raman1;~Eve_Fleisig1;~Dan_Klein1", "aff": ";University of California, Berkeley;University of California, Berkeley", "aff_domain": ";berkeley.edu;berkeley.edu", "position": ";PhD student;Full Professor", "bibtex": "@inproceedings{\nraman2023centering,\ntitle={Centering the Margins: Outlier-Based Identification of Harmed Populations in Toxicity Detection},\nauthor={Vyoma Raman and Eve Fleisig and Dan Klein},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Qu0OZXL29t}\n}", "github": "", "project": "", "reviewers": "KWca;xD9v;SNHy", "site": "https://openreview.net/forum?id=Qu0OZXL29t", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;5;4", "excitement": "4;3;4", "reproducibility": "2;3;5", "correctness": "4;2;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";eve-fleisig/;dan-klein/", "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "Qv2CTIcCPJ", "title": "The language of prompting: What linguistic properties make a prompt successful?", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The latest generation of LLMs can be prompted to achieve impressive zero-shot or few-shot performance in many NLP tasks. However, since performance is highly sensitive to the choice of prompts, considerable effort has been devoted to crowd-sourcing prompts or designing methods for prompt optimisation. Yet, we still lack a systematic understanding of how linguistic properties of prompts correlate with the task performance. In this work, we investigate how LLMs of different sizes, pre-trained and instruction-tuned, perform on prompts that are semantically equivalent, but vary in linguistic structure. We investigate both grammatical properties such as mood, tense, aspect and modality, as well as lexico-semantic variation through the use of synonyms. Our findings contradict the common assumption that LLMs achieve optimal performance on prompts which reflect language use in pretraining or instruction-tuning data. Prompts transfer poorly between datasets or models, and performance cannot generally be explained by perplexity, word frequency, word sense ambiguity or prompt length. Based on our results, we put forward a proposal for a more robust and comprehensive evaluation standard for prompting research.", "keywords": "prompting;evaluation;LLMs;zero-shot;robustness;instability;instruction-tuning", "primary_area": "", "supplementary_material": "", "author": "Alina Leidinger;Robert Van Rooij;Ekaterina Shutova", "authorids": "~Alina_Leidinger1;~Robert_Van_Rooij1;~Ekaterina_Shutova1", "gender": ";M;F", "homepage": ";https://www.uva.nl/profiel/r/o/r.a.m.vanrooij/r.a.m.vanrooij.html;https://www.shutova.org/", "dblp": ";51/303.html;33/8156", "google_scholar": ";https://scholar.google.nl/citations?user=J3b3yXYAAAAJ;jqOFBGoAAAAJ", "or_profile": "~Alina_Leidinger1;~Robert_Van_Rooij1;~Ekaterina_Shutova1", "aff": ";University of Amsterdam, University of Amsterdam;University of Amsterdam", "aff_domain": ";illc.uva.nl;uva.nl", "position": ";Full Professor;Associate Professor", "bibtex": "@inproceedings{\nleidinger2023the,\ntitle={The language of prompting: What linguistic properties make a prompt successful?},\nauthor={Alina Leidinger and Robert Van Rooij and Ekaterina Shutova},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Qv2CTIcCPJ}\n}", "github": "", "project": "", "reviewers": "z3Ht;Nfib;4Bot", "site": "https://openreview.net/forum?id=Qv2CTIcCPJ", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "2;2;4", "reproducibility": "4;4;4", "correctness": "3;4;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0", "aff_unique_norm": "University of Amsterdam", "aff_unique_dep": "", "aff_unique_url": "https://www.uva.nl", "aff_unique_abbr": "UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Netherlands" }, { "id": "QwejPcX96r", "title": "Large language models effectively leverage document-level context for literary translation, but critical errors persist", "track": "main", "status": "Reject", "tldr": "", "abstract": "Large language models (LLMs) are competitive with the state of the art on a wide range of sentence-level translation datasets. However, their ability to translate paragraphs and documents remains unexplored because evaluation in these settings is costly and difficult. \nWe show through a rigorous human evaluation that asking the Gpt-3.5 (text-davinci-003) LLM to translate an entire literary paragraph (e.g., from a novel) at once results in higher-quality translations than standard sentence-by-sentence translation across 18 linguistically-diverse language pairs (e.g., translating into and out of Japanese, Polish, and English). Our evaluation, which took approximately 350 hours of effort for annotation and analysis, is conducted by hiring translators fluent in both the source and target language and asking them to provide both span-level error annotations as well as preference judgments of which system's translations are better. We observe that discourse-level LLM translators commit fewer mistranslations, grammar errors, and stylistic inconsistencies than sentence-level approaches. With that said, critical errors still abound, including occasional content omissions, and a human translator's intervention remains necessary to ensure that the author's voice remains intact.\nWe publicly release our dataset and error annotations to spur future research on the evaluation of document-level literary translation.", "keywords": "machine translation;paragraph-level translation;literary translation;large language models", "primary_area": "", "supplementary_material": "", "author": "Marzena Karpinska;Mohit Iyyer", "authorids": "~Marzena_Karpinska1;~Mohit_Iyyer1", "gender": ";M", "homepage": ";http://cs.umass.edu/~miyyer", "dblp": ";148/9178", "google_scholar": ";rBVA5tcAAAAJ", "or_profile": "~Marzena_Karpinska1;~Mohit_Iyyer1", "aff": ";University of Massachusetts Amherst", "aff_domain": ";cs.umass.edu", "position": ";Assistant Professor", "bibtex": "@misc{\nkarpinska2023large,\ntitle={Large language models effectively leverage document-level context for literary translation, but critical errors persist},\nauthor={Marzena Karpinska and Mohit Iyyer},\nyear={2023},\nurl={https://openreview.net/forum?id=QwejPcX96r}\n}", "github": "", "project": "", "reviewers": "2yxk;KN7M;cWsQ", "site": "https://openreview.net/forum?id=QwejPcX96r", "pdf_size": 0, "rating": "1;1;1", "confidence": "2;4;3", "excitement": "3;2;4", "reproducibility": "2;2;3", "correctness": "3;2;4", "rating_avg": 1.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0", "aff_unique_norm": "University of Massachusetts Amherst", "aff_unique_dep": "", "aff_unique_url": "https://www.umass.edu", "aff_unique_abbr": "UMass Amherst", "aff_campus_unique_index": "0", "aff_campus_unique": "Amherst", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "Qyw4k4ohgr", "title": "Epsilon Sampling Rocks: Investigating Sampling Strategies for Minimum Bayes Risk Decoding for Machine Translation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recent advances in machine translation (MT) have shown that Minimum Bayes Risk (MBR) decoding can be a powerful alternative to beam search decoding, especially when combined with neural-based utility functions. However, the performance of MBR decoding depends heavily on how and how many candidates are sampled from the model. In this paper, we explore how different sampling approaches for generating candidate lists for MBR decoding affect performance. We evaluate popular sampling approaches, such as ancestral, nucleus, and top-k sampling. Based on our insights into their limitations, we experiment with the recently proposed epsilon-sampling approach, which prunes away all tokens with a probability smaller than epsilon, ensuring that each token in a sample receives a fair probability mass. Through extensive human evaluations, we demonstrate that MBR decoding based on epsilon-sampling significantly outperforms not only beam search decoding, but also MBR decoding with all other tested sampling methods across four language pairs.", "keywords": "machine translation;mbr decoding;decoding strategies;bleurt;automatic evaluation", "primary_area": "", "supplementary_material": "", "author": "Markus Freitag;Behrooz Ghorbani;Patrick Fernandes", "authorids": "~Markus_Freitag2;~Behrooz_Ghorbani1;~Patrick_Fernandes1", "gender": "M;;", "homepage": ";;https://coderpat.github.io", "dblp": "57/8503;162/0166;207/6964.html", "google_scholar": "https://scholar.google.com/citations?hl=en;;", "or_profile": "~Markus_Freitag2;~Behrooz_Ghorbani1;~Patrick_Fernandes1", "aff": "Google;Google;Instituto Superior T\u00e9cnico", "aff_domain": "google.com;google.com;tecnico.ulisboa.pt", "position": "Researcher;Researcher;PhD student", "bibtex": "@inproceedings{\nfreitag2023epsilon,\ntitle={Epsilon Sampling Rocks: Investigating Sampling Strategies for Minimum Bayes Risk Decoding for Machine Translation},\nauthor={Markus Freitag and Behrooz Ghorbani and Patrick Fernandes},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Qyw4k4ohgr}\n}", "github": "", "project": "", "reviewers": "FrLm;b3eK;YyrA", "site": "https://openreview.net/forum?id=Qyw4k4ohgr", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "4;5;2", "reproducibility": "4;5;3", "correctness": "4;5;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "markus-freitag-7b17b4101/;;", "aff_unique_index": "0;0;1", "aff_unique_norm": "Google;Instituto Superior T\u00e9cnico", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.ist.utl.pt", "aff_unique_abbr": "Google;IST", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Portugal" }, { "id": "R0XABYPVKI", "title": "Knowledge Corpus Error in Question Answering", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Recent works in open-domain question answering (QA) have explored generating context passages from large language models (LLMs), replacing the traditional retrieval step in the QA pipeline. However, it is not well understood why generated passages can be more effective than retrieved ones. This study revisits the conventional formulation of QA and introduces the concept of $\\textit{knowledge corpus error}$. This error arises when the knowledge corpus used for retrieval is only a subset of the entire string space, potentially excluding more helpful passages that exist outside the corpus. LLMs may mitigate this shortcoming by generating passages in a larger space. We come up with an experiment of paraphrasing human-annotated gold context using LLMs to observe knowledge corpus error empirically. Our results across three QA benchmarks reveal an increased performance (10\\% - 13\\%) when using paraphrased passage, indicating a signal for the existence of knowledge corpus error.", "keywords": "large language models;open-domain question answering;retrieval;qa;odqa", "primary_area": "", "supplementary_material": "", "author": "Yejoon Lee;Philhoon Oh;James Thorne", "authorids": "~Yejoon_Lee1;~Philhoon_Oh1;~James_Thorne1", "gender": "M;M;", "homepage": "https://yejoon-lee.github.io/;https://github.com/philhoonoh;https://jamesthorne.com", "dblp": "359/5791;;204/1380", "google_scholar": "lpsQnFkAAAAJ;;hao9RrgAAAAJ", "or_profile": "~Yejoon_Lee1;~Philhoon_Oh1;~James_Thorne1", "aff": "Seoul National University;Korea Advanced Institute of Science & Technology;KAIST", "aff_domain": "snu.ac.kr;kaist.edu;kaist.ac.kr", "position": "Undergrad student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nlee2023knowledge,\ntitle={Knowledge Corpus Error in Question Answering},\nauthor={Yejoon Lee and Philhoon Oh and James Thorne},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=R0XABYPVKI}\n}", "github": "", "project": "", "reviewers": "JpPN;dTRR;bwTs", "site": "https://openreview.net/forum?id=R0XABYPVKI", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;4;2", "reproducibility": "4;4;3", "correctness": "2;3;2", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 2.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "leeyejoon/;;", "aff_unique_index": "0;1;1", "aff_unique_norm": "Seoul National University;Korea Advanced Institute of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;https://www.kaist.ac.kr", "aff_unique_abbr": "SNU;KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "R2PwXB08i4", "title": "WordNet Is All You Need: A Surprisingly Effective Unsupervised Method for Graded Lexical Entailment", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "We propose a simple unsupervised approach which exclusively relies on WordNet (Miller,1995) for predicting graded lexical entailment (GLE) in English. Inspired by the seminal work of Resnik (1995), our method models GLE as the sum of two information-theoretic scores: a symmetric semantic similarity score and an asymmetric specificity loss score, both exploiting the hierarchical synset structure of WordNet. Our approach also includes a simple disambiguation mechanism to handle polysemy in a given word pair. Despite its simplicity, our method achieves performance above the state of the art (Spearman \u03c1 = 0.75) on HyperLex (Vulic et al., 2017), the largest GLE dataset, outperforming all previous methods, including specialized word embeddings approaches that use WordNet as weak supervision.", "keywords": "Graded Lexical Entailment;WordNet", "primary_area": "", "supplementary_material": "", "author": "Joseph Renner;Pascal Denis;R\u00e9mi GILLERON", "authorids": "~Joseph_Renner1;~Pascal_Denis1;~R\u00e9mi_GILLERON1", "gender": ";M;M", "homepage": ";http://researchers.lille.inria.fr/~pdenis/;https://sites.google.com/view/remi-gilleron", "dblp": ";18/4078;http://dblp.uni-trier.de/pers/hd/g/Gilleron:R=eacute=mi.html", "google_scholar": "zNIVbeEAAAAJ;Y1nQ6eUAAAAJ;", "or_profile": "~Joseph_Renner1;~Pascal_Denis1;~R\u00e9mi_GILLERON1", "aff": "INRIA;INRIA;", "aff_domain": "inria.fr;inria.fr;", "position": "Researcher;Researcher;", "bibtex": "@inproceedings{\nrenner2023wordnet,\ntitle={WordNet Is All You Need: A Surprisingly Effective Unsupervised Method for Graded Lexical Entailment},\nauthor={Joseph Renner and Pascal Denis and R{\\'e}mi GILLERON},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=R2PwXB08i4}\n}", "github": "", "project": "", "reviewers": "6et5;rz8W;pJrc", "site": "https://openreview.net/forum?id=R2PwXB08i4", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;2;4", "reproducibility": "4;3;4", "correctness": "3;2;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-4121-6337;", "linkedin": "joseph-renner-501946b4/;;", "aff_unique_index": "0;0", "aff_unique_norm": "INRIA", "aff_unique_dep": "", "aff_unique_url": "https://www.inria.fr", "aff_unique_abbr": "INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "id": "R4N3RNBNzJ", "title": "STINMatch: Semi-Supervised Semantic-Topological Iteration Network for Financial Risk Detection via News Label Diffusion", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Commercial news provide rich semantics and timely information for automated financial risk detection. However, unaffordable large-scale annotation as well as training data sparseness barrier the full exploitation of commercial news in risk detection. To address this problem, we propose a semi-supervised Semantic-Topological Iteration Network, STINMatch, along with a news-enterprise knowledge graph (NEKG) to endorse the risk detection enhancement. The proposed model incorporates a label correlation matrix and interactive consistency regularization techniques into the iterative joint learning framework of text and graph modules. The carefully designed framework takes full advantage of the labeled and unlabeled data as well as their interrelations, enabling deep label diffusion coordination between article-level semantics and label correlations following the topological structure. Extensive experiments demonstrate the superior effectiveness and generalization ability of STINMatch.", "keywords": "semi-supervised;text-graph joint learning;risk detection", "primary_area": "", "supplementary_material": "", "author": "Xurui Li;Yue Qin;Rui Zhu;tianqianjin lin;Yongming Fan;Yangyang Kang;Kaisong Song;Fubang Zhao;Changlong Sun;Haixu Tang;Xiaozhong Liu", "authorids": "~Xurui_Li3;~Yue_Qin1;~Rui_Zhu10;~tianqianjin_lin1;~Yongming_Fan1;~Yangyang_Kang1;~Kaisong_Song1;~Fubang_Zhao3;~Changlong_Sun2;~Haixu_Tang1;~Xiaozhong_Liu2", "gender": ";F;M;M;M;M;M;;M;M;M", "homepage": ";;https://www.zhurui.pro/bio;;https://www.cs.purdue.edu/people/graduate-students/fan322.html;;https://sites.google.com/site/kaisongsong;;;https://luddy.indiana.edu/contact/profile/?Haixu_Tang;https://www.wpi.edu/people/faculty/xliu14", "dblp": ";142/1169;;;;162/0109;30/11037;;https://dblp.uni-trier.de/pers/hd/s/Sun:Changlong;90/3951.html;11/6389.html", "google_scholar": ";;;;;https://scholar.google.com/citations?hl=zh-CN;Ms678voAAAAJ;;https://scholar.google.com/citations?;https://scholar.google.com.tw/citations?user=4Hywr5UAAAAJ;1BUByMcAAAAJ", "or_profile": "~Xurui_Li3;~Yue_Qin1;~Rui_Zhu10;~tianqianjin_lin1;~Yongming_Fan1;~Yangyang_Kang1;~Kaisong_Song1;~Fubang_Zhao3;~Changlong_Sun2;~Haixu_Tang1;~Xiaozhong_Liu2", "aff": ";Indiana University;Indiana University;Alibaba Group;Purdue University;Alibaba Group;Alibaba Group;;Alibaba Group;Indiana University;Worcester Polytechnic Institute", "aff_domain": ";iu.edu;iu.edu;alibaba-inc.com;purdue.edu;alibaba.com;alibaba-inc.com;;alibaba-inc.com;iu.edu;wpi.edu", "position": ";PhD student;PhD student;Intern;PhD student;Staff Algorithm Engineer;Algorithm Expert;;Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nli2023stinmatch,\ntitle={{STINM}atch: Semi-Supervised Semantic-Topological Iteration Network for Financial Risk Detection via News Label Diffusion},\nauthor={Xurui Li and Yue Qin and Rui Zhu and tianqianjin lin and Yongming Fan and Yangyang Kang and Kaisong Song and Fubang Zhao and Changlong Sun and Haixu Tang and Xiaozhong Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=R4N3RNBNzJ}\n}", "github": "", "project": "", "reviewers": "CCXt;BEjE;oisY;zFLA", "site": "https://openreview.net/forum?id=R4N3RNBNzJ", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;3;4;3", "excitement": "3;3;3;4", "reproducibility": "3;4;4;4", "correctness": "3;4;4;4", "rating_avg": 4.0, "confidence_avg": 3.25, "excitement_avg": 3.25, "reproducibility_avg": 3.75, "correctness_avg": 3.75, "replies_avg": 13, "authors#_avg": 11, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;0000-0002-5979-7769;;;0000-0001-8963-8155;", "linkedin": ";;;tianqianjin-lin-2a2900223/;;;;;;;", "aff_unique_index": "0;0;1;2;1;1;1;0;3", "aff_unique_norm": "Indiana University;Alibaba Group;Purdue University;Worcester Polytechnic Institute", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.indiana.edu;https://www.alibaba.com;https://www.purdue.edu;https://www.wpi.edu", "aff_unique_abbr": "IU;Alibaba;Purdue;WPI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;1;1;1;0;0", "aff_country_unique": "United States;China" }, { "id": "R4VfYDluYi", "title": "Learning Co-Speech Gesture for Multimodal Aphasia Type Detection", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Aphasia, a language disorder resulting from brain damage, requires accurate identification of specific aphasia types, such as Broca's and Wernicke's aphasia, for effective treatment. However, little attention has been paid to developing methods to detect different types of aphasia. Recognizing the importance of analyzing co-speech gestures for distinguish aphasia types, we propose a multimodal graph neural network for aphasia type detection using speech and corresponding gesture patterns. By learning the correlation between the speech and gesture modalities for each aphasia type, our model can generate textual representations sensitive to gesture information, leading to accurate aphasia type detection. Extensive experiments demonstrate the superiority of our approach over existing methods, achieving state-of-the-art results (F1 84.2%). We also show that gesture features outperform acoustic features, highlighting the significance of gesture expression in detecting aphasia types. We provide the codes for reproducibility purposes.", "keywords": "Aphasia;NLP;Applications;Speech;Multimodality", "primary_area": "", "supplementary_material": "", "author": "Daeun Lee;Sejung Son;Hyolim Jeon;Seungbae Kim;Jinyoung Han", "authorids": "~Daeun_Lee1;~Sejung_Son1;~Hyolim_Jeon1;~Seungbae_Kim1;~Jinyoung_Han2", "gender": ";F;F;M;M", "homepage": ";https://aaissj.github.io/;https://sites.google.com/g.skku.edu/hyolimjeon/gyfla;https://sites.google.com/site/sbkimcv/;http://dsail.skku.edu", "dblp": ";;;14/8398.html;94/7996.html", "google_scholar": ";https://scholar.google.com/citations?hl=en;;gFZlotAAAAAJ;https://scholar.google.co.kr/citations?user=4rkPSC8AAAAJ", "or_profile": "~Daeun_Lee1;~Sejung_Son1;~Hyolim_Jeon1;~Seungbae_Kim1;~Jinyoung_Han2", "aff": ";Sungkyunkwan University;Sungkyunkwan University;University of South Florida;Sungkyunkwan University", "aff_domain": ";skku.edu;skku.edu;usf.edu;skku.edu", "position": ";MS student;MS student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nlee2023learning,\ntitle={Learning Co-Speech Gesture for Multimodal Aphasia Type Detection},\nauthor={Daeun Lee and Sejung Son and Hyolim Jeon and Seungbae Kim and Jinyoung Han},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=R4VfYDluYi}\n}", "github": "", "project": "", "reviewers": "xF2x;kFrs;q4Gj", "site": "https://openreview.net/forum?id=R4VfYDluYi", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "4;4;4", "reproducibility": "3;3;3", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-5667-3560;", "linkedin": ";sejung-son-39695a244/;;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Sungkyunkwan University;University of South Florida", "aff_unique_dep": ";", "aff_unique_url": "https://www.skku.edu;https://www.usf.edu", "aff_unique_abbr": "SKKU;USF", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "South Korea;United States" }, { "id": "R4yb4m7Nus", "title": "Model-tuning Via Prompts Makes NLP Models Adversarially Robust", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In recent years,\nNLP practitioners have converged\non the following practice:\n(i) import an off-the-shelf pretrained (masked) language model;\n(ii) append a multilayer perceptron atop the CLS token's hidden representation\n(with randomly initialized weights);\nand (iii) fine-tune the entire model on a downstream task (MLP-FT).\nThis \nprocedure \nhas \nproduced massive gains \non standard NLP benchmarks,\nbut these models remain brittle, even to \nmild adversarial perturbations.\nIn this work, we demonstrate surprising gains \nin adversarial robustness enjoyed by \nModel-tuning Via Prompts (MVP),\nan alternative method of adapting to downstream tasks.\nRather than appending an MLP head to make output prediction, MVP appends a prompt template to the input, and makes prediction via text infilling/completion.\nAcross 5 NLP datasets, 4 adversarial attacks, and 3 different models, \nMVP improves performance against adversarial \nsubstitutions by an average of 8% \nover standard methods and even outperforms \nadversarial training-based state-of-art defenses by 3.5%.\nBy combining MVP with adversarial training, \nwe achieve further improvements in adversarial robustness\nwhile maintaining performance on unperturbed examples. \nFinally, we conduct ablations to investigate \nthe mechanism underlying these gains.\nNotably, we find that the main causes of vulnerability of MLP-FT \ncan be attributed to the misalignment between pre-training and fine-tuning tasks, \nand the randomly initialized MLP parameters.", "keywords": "Natural Language Processing;Language Models;BERT;RoBERTa;Prompting;Adversarial Robustness", "primary_area": "", "supplementary_material": "", "author": "Mrigank Raman;Pratyush Maini;J Zico Kolter;Zachary Chase Lipton;Danish Pruthi", "authorids": "~Mrigank_Raman2;~Pratyush_Maini1;~J_Zico_Kolter1;~Zachary_Chase_Lipton1;~Danish_Pruthi1", "gender": "M;M;Unspecified;M;M", "homepage": "https://zuluzazu.github.io/;https://pratyushmaini.github.io/;http://zacklipton.com;https://danishpruthi.com/;http://www.zicokolter.com", "dblp": ";248/8071;;192/7349;67/2526", "google_scholar": ";;MN9Kfg8AAAAJ;JpSx3EMAAAAJ;UXh1I6UAAAAJ", "or_profile": "~Mrigank_Raman2;~Pratyush_Maini1;~Zachary_Chase_Lipton1;~Danish_Pruthi1;~Zico_Kolter1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Indian Institute of Science, Bangalore ;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;cmu.edu;iisc.ac.in;cmu.edu", "position": "MS student;PhD student;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nraman2023modeltuning,\ntitle={Model-tuning Via Prompts Makes {NLP} Models Adversarially Robust},\nauthor={Mrigank Raman and Pratyush Maini and J Zico Kolter and Zachary Chase Lipton and Danish Pruthi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=R4yb4m7Nus}\n}", "github": "", "project": "", "reviewers": "QuR4;nbR2;CusV", "site": "https://openreview.net/forum?id=R4yb4m7Nus", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;4;3", "reproducibility": "4;5;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "mrigank-raman-3b9b42177/;;;;", "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Carnegie Mellon University;Indian Institute of Science", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.iisc.ac.in", "aff_unique_abbr": "CMU;IISc", "aff_campus_unique_index": "1", "aff_campus_unique": ";Bangalore", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;India" }, { "id": "R5NzXYY7S2", "title": "Modeling Legal Reasoning: LM Annotation at the Edge of Human Agreement", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Generative language models (LMs) are increasingly used for document class-prediction tasks and promise enormous improvements in cost and efficiency. Existing research often examines simple classification tasks, but the capability of LMs to classify on complex or specialized tasks is less well understood. We consider a highly complex task that is challenging even for humans: the classification of legal reasoning according to jurisprudential philosophy. Using a novel dataset of historical United States Supreme Court opinions annotated by a team of domain experts, we systematically test the performance of a variety of LMs. We find that generative models perform poorly when given instructions (i.e. prompts) equal to the instructions presented to human annotators through our codebook. Our strongest results derive from fine-tuning models on the annotated dataset; the best performing model is an in-domain model, LEGAL-BERT. We apply predictions from this fine-tuned model to study historical trends in jurisprudence, an exercise that both aligns with prominent qualitative historical accounts and points to areas of possible refinement in those accounts. Our findings generally sound a note of caution in the use of generative LMs on complex tasks without fine-tuning and point to the continued relevance of human annotation-intensive classification methods.", "keywords": "language modeling;annotation;legal reasoning;United States Supreme Court", "primary_area": "", "supplementary_material": "", "author": "Rosamond Elizabeth Thalken;Edward Stiglitz;David Mimno;Matthew Wilkens", "authorids": "~Rosamond_Elizabeth_Thalken1;~Edward_Stiglitz1;~David_Mimno1;~Matthew_Wilkens1", "gender": "F;M;M;M", "homepage": "https://rosamondthalken.com/;https://jlexcode.github.io/;https://mimno.infosci.cornell.edu/;https://mattwilkens.com", "dblp": ";;39/5487;222/8742", "google_scholar": ";kj6cAiQAAAAJ;uBFV6SUAAAAJ;FSBPSigAAAAJ", "or_profile": "~Rosamond_Elizabeth_Thalken1;~Edward_Stiglitz1;~David_Mimno1;~Matthew_Wilkens1", "aff": "Cornell University;Cornell University;Cornell University;Cornell University", "aff_domain": "cornell.edu;cornell.edu;cornell.edu;cornell.edu", "position": "PhD student;Full Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nthalken2023modeling,\ntitle={Modeling Legal Reasoning: {LM} Annotation at the Edge of Human Agreement},\nauthor={Rosamond Elizabeth Thalken and Edward Stiglitz and David Mimno and Matthew Wilkens},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=R5NzXYY7S2}\n}", "github": "", "project": "", "reviewers": "TywZ;WWog;ikuS", "site": "https://openreview.net/forum?id=R5NzXYY7S2", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "4;4;4", "reproducibility": "5;4;4", "correctness": "4;4;5", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-6290-074X;;;0000-0001-6749-9318", "linkedin": ";;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "R635gF7lXD", "title": "StructGPT: A General Framework for Large Language Model to Reason over Structured Data", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In this paper, we aim to improve the reasoning ability of large language models (LLMs) over structured data in a unified way. Inspired by the studies on tool augmentation for LLMs, we develop an Iterative Reading-then-Reasoning (IRR) framework to solve question answering tasks based on structured data, called StructGPT. In this framework, we construct the specialized interfaces to collect relevant evidence from structured data (i.e., reading), and let LLMs concentrate on the reasoning task based on the collected information (i.e., reasoning). Specially, we propose an invoking-linearization-generation procedure to support LLMs in reasoning on the structured data with the help of the interfaces. By iterating this procedure with provided interfaces, our approach can gradually approach the target answers to a given query. Experiments conducted on three types of structured data show that StructGPT greatly improves the performance of LLMs, under the few-shot and zero-shot settings.", "keywords": "Large Language Model;Structured Data", "primary_area": "", "supplementary_material": "", "author": "Jinhao Jiang;Kun Zhou;zican Dong;KeMing Ye;Xin Zhao;Ji-Rong Wen", "authorids": "~Jinhao_Jiang1;~Kun_Zhou2;~zican_Dong1;~KeMing_Ye1;~Xin_Zhao10;~Ji-Rong_Wen1", "gender": ";M;M;M;M;M", "homepage": ";https://lancelot39.github.io/;;;https://gsai.ruc.edu.cn/addons/teacher/index/info.html?user_id=5&ruccode=20140041&ln=cn;https://gsai.ruc.edu.cn/english/jrwen", "dblp": "261/6942;48/3927-2.html;336/7105;;https://dblp.uni-trier.de/pid/52/8700.html;w/JRWen", "google_scholar": ";bmRJVjwAAAAJ;https://scholar.google.com/citations?hl=zh-CN;zGNU7b8AAAAJ;JNhNacoAAAAJ;tbxCHJgAAAAJ", "or_profile": "~Jinhao_Jiang1;~Kun_Zhou2;~zican_Dong1;~KeMing_Ye1;~Xin_Zhao10;~Ji-Rong_Wen1", "aff": "Renmin University of China;Renmin University of China;Harbin Institute of Technology;University of Electronic Science and Technology of China;Renmin University of China;Renmin University of China", "aff_domain": "ruc.edu.cn;ruc.edu.cn;hit.edu.cn;uestc.edu.cn;ruc.edu.cn;ruc.edu.cn", "position": "PhD student;PhD student;Undergrad student;Undergrad student;Full Professor;Full Professor", "bibtex": "@inproceedings{\njiang2023structgpt,\ntitle={Struct{GPT}: A General Framework for Large Language Model to Reason over Structured Data},\nauthor={Jinhao Jiang and Kun Zhou and zican Dong and KeMing Ye and Xin Zhao and Ji-Rong Wen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=R635gF7lXD}\n}", "github": "", "project": "", "reviewers": "KDgm;1ymw;b2MA", "site": "https://openreview.net/forum?id=R635gF7lXD", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;5;3", "excitement": "3;3;3", "reproducibility": "3;5;4", "correctness": "3;5;5", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-8333-6196;0000-0002-9777-9676", "linkedin": ";;;;;", "aff_unique_index": "0;0;1;2;0;0", "aff_unique_norm": "Renmin University of China;Harbin Institute of Technology;University of Electronic Science and Technology of China", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ruc.edu.cn;http://www.hit.edu.cn/;https://www.uestc.edu.cn", "aff_unique_abbr": "RUC;HIT;UESTC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "R7Op9CHdPz", "title": "Causal Reasoning through Two Cognition Layers for Improving Generalization in Visual Question Answering", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Generalization in Visual Question Answering (VQA) requires models to answer questions about images with contexts beyond the training distribution. Existing attempts primarily refine unimodal aspects, overlooking enhancements in multimodal aspects. Besides, diverse interpretations of the input lead to various modes of answer generation, highlighting the role of causal reasoning between interpreting and answering steps in VQA. Through this lens, we propose Cognitive pathways VQA (CopVQA) improving the multimodal predictions by emphasizing causal reasoning factors. CopVQA first operates a pool of pathways that capture diverse causal reasoning flows through interpreting and answering stages. Mirroring human cognition, we decompose the responsibility of each stage into distinct experts and a cognition-enabled component (CC). The two CCs strategically execute one expert for each stage at a time. Finally, we prioritize answer predictions governed by pathways involving both CCs while disregarding answers produced by either CC, thereby emphasizing causal reasoning and supporting generalization. Our experiments on real-life and medical data consistently verify that CopVQA improves VQA performance and generalization across baselines and domains. Notably, CopVQA achieves a new state-of-the-art (SOTA) on the PathVQA dataset and comparable accuracy to the current SOTA on VQA-CPv2, VQAv2, and VQA- RAD, with one-fourth of the model size.", "keywords": "visual question answering;generalization;casual reasoning;human cognition", "primary_area": "", "supplementary_material": "", "author": "Trang Nguyen;Naoaki Okazaki", "authorids": "~Trang_Nguyen1;~Naoaki_Okazaki2", "gender": "F;M", "homepage": "https://baileytrang.github.io/;http://www.chokkan.org/", "dblp": "290/5980;49/4018", "google_scholar": "-yVY9T4AAAAJ;", "or_profile": "~Trang_Nguyen1;~Naoaki_Okazaki2", "aff": "Mila Institute;Tokyo Institute of Technology", "aff_domain": "mila.quebec;titech.ac.jp", "position": "Intern;Full Professor", "bibtex": "@inproceedings{\nnguyen2023causal,\ntitle={Causal Reasoning through Two Cognition Layers for Improving Generalization in Visual Question Answering},\nauthor={Trang Nguyen and Naoaki Okazaki},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=R7Op9CHdPz}\n}", "github": "", "project": "", "reviewers": "YXHL;ecEB;1oDT;UGPV", "site": "https://openreview.net/forum?id=R7Op9CHdPz", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "2;3;4;3", "excitement": "4;4;3;4", "reproducibility": "3;3;2;4", "correctness": "4;4;3;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 3.75, "reproducibility_avg": 3.0, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": "baileytrang/;", "aff_unique_index": "0;1", "aff_unique_norm": "Mila Institute for Quantum Computing;Tokyo Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://mila.quebec;https://www.titech.ac.jp", "aff_unique_abbr": "Mila;Titech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Canada;Japan" }, { "id": "R7f5euZ9RA", "title": "Ranking LLM-Generated Loop Invariants for Program Verification", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Synthesizing inductive loop invariants is fundamental to automating program verification. In this work we observe that Large Language Models (such as {gpt-3.5} or {gpt-4}) are capable of synthesizing loop invariants for a class of programs in a 0-shot setting, yet require several samples to generate the correct invariants. This can lead to a large number a calls to a program verifier to establish an invariant. To address this issue, we propose a {re-ranking} approach for the generated results of LLMs. We have designed a ranker that can distinguish between correct inductive invariants and incorrect attempts based on the problem definition. The ranker is optimized as a contrastive ranker. Experimental results demonstrate that this re-ranking mechanism significantly improves the ranking of correct invariants among the generated candidates, leading to a notable reduction in the number of calls to a verifier.", "keywords": "Large Language Model;Loop Invariant Synthesis;Re-ranking", "primary_area": "", "supplementary_material": "", "author": "Saikat Chakraborty;Shuvendu K Lahiri;Sarah Fakhoury;Akash Lal;Madanlal Musuvathi;Aseem Rastogi;Aditya Senthilnathan;Rahul Sharma;Nikhil Swamy", "authorids": "~Saikat_Chakraborty1;~Shuvendu_K_Lahiri1;~Sarah_Fakhoury1;~Akash_Lal1;~Madanlal_Musuvathi1;~Aseem_Rastogi1;~Aditya_Senthilnathan1;~Rahul_Sharma5;~Nikhil_Swamy1", "gender": "M;M;F;;M;M;M;;", "homepage": "https://saikatc.info;https://www.microsoft.com/en-us/research/people/shuvendu/;https://www.microsoft.com/en-us/research/people/sfakhoury/;https://www.microsoft.com/en-us/research/people/akashl/;;https://www.microsoft.com/en-us/research/people/aseemr/;https://adityanathan.github.io;;", "dblp": "137/5220;32/2903.html;;27/1008.html;95/6578;;;22/846-1;11/5568", "google_scholar": "Hl_6OwwAAAAJ;https://scholar.google.com/citations?hl=en;I_U63_AAAAAJ;https://scholar.google.com.sg/citations?user=oaHK7IQAAAAJ;;;uHURUYUAAAAJ;;", "or_profile": "~Saikat_Chakraborty1;~Shuvendu_K_Lahiri1;~Sarah_Fakhoury1;~Akash_Lal1;~Madanlal_Musuvathi1;~Aseem_Rastogi1;~Aditya_Senthilnathan1;~Rahul_Sharma5;~Nikhil_Swamy1", "aff": "Microsoft Research, Redmond, WA, USA;Microsoft Research;Microsoft Research;Microsoft Research;;Microsoft Research;Microsoft Research;Microsoft;", "aff_domain": "research.microsoft.com;research.microsoft.com;research.microsoft.com;research.microsoft.com;;research.microsoft.com;research.microsoft.com;microsoft.com;", "position": "Senior Researcher;Principal Researcher;Postdoc;Principal Researcher;;Researcher;Intern;Principal Researcher;", "bibtex": "@inproceedings{\nchakraborty2023ranking,\ntitle={Ranking {LLM}-Generated Loop Invariants for Program Verification},\nauthor={Saikat Chakraborty and Shuvendu K Lahiri and Sarah Fakhoury and Akash Lal and Madanlal Musuvathi and Aseem Rastogi and Aditya Senthilnathan and Rahul Sharma and Nikhil Swamy},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=R7f5euZ9RA}\n}", "github": "", "project": "", "reviewers": "p5MA;P8En;ZsSr", "site": "https://openreview.net/forum?id=R7f5euZ9RA", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;3;4", "reproducibility": "4;3;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-6889-7171;;;0009-0002-4359-9378;;;;;", "linkedin": "saikatch107/;shuvendu-lahiri-9a35151/;;;;;;;", "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Research", "aff_unique_url": "https://www.microsoft.com/en-us/research", "aff_unique_abbr": "MSR", "aff_campus_unique_index": "0", "aff_campus_unique": "Redmond;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "RAtrnAtAsM", "title": "LEGO: A Multi-agent Collaborative Framework with Role-playing and Iterative Feedback for Causality Explanation Generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Causality Explanation Generation refers to generate an explanation in natural language given an initial cause-effect pair. It demands rigorous explicit rationales to demonstrate the acquisition of implicit commonsense knowledge, which is unlikely to be easily memorized, making it challenging for large language models since they are often suffering from spurious causal associations when they encounter the content that does not exist in their memory. In this work, we introduce LEGO, a Multi-agent Collaborative Framework with Role-playing and Iterative Feedback for causality explanation generation. Specifically, we treat LLM as character malleable LEGO block and utilize role-playing to assign specific roles to five LLMs. We firstly devise a Fine-grained World Knowledge Integration Module to augment information about tasks for alleviating the phenomenon of spurious causal associations. Then, we leverage an Iterative Feedback and Refinement Module to improve the generated explanation by multi-aspect feedback. Extensive experiments on widely used WIKIWHY and e-CARE datasets show the superiority of our multi-agent framework in terms of reasoning about the causality among cause and effect.", "keywords": "Causality explanation generation; Commonsense reasoning; Large language model", "primary_area": "", "supplementary_material": "", "author": "Zhitao He;Pengfei Cao;Yubo Chen;Kang Liu;ruopeng li;Mengshu Sun;Jun Zhao", "authorids": "~Zhitao_He1;~Pengfei_Cao1;~Yubo_Chen1;~Kang_Liu1;~ruopeng_li1;~Mengshu_Sun2;~Jun_Zhao4", "gender": "M;;M;M;M;F;M", "homepage": ";https://cpf-nlpr.github.io/;http://www.nlpr.ia.ac.cn/cip/yubochen/index.html;http://www.nlpr.ia.ac.cn/cip/~liukang/index.html;;;http://nlpr-web.ia.ac.cn/cip/english/~junzhao/index.html", "dblp": ";182/7941;https://dblp.uni-trier.de/pid/90/7879.html;42/4903.html;;;https://dblp.uni-trier.de/pid/47/2026-1.html", "google_scholar": "ULvoYXgAAAAJ;lP5_LJIAAAAJ;https://scholar.google.com.hk/citations?user=9z7GPxIAAAAJ;DtZCfl0AAAAJ;;https://scholar.google.com.hk/citations?view_op=list_works;https://scholar.google.com.hk/citations?user=HljRttwAAAAJ", "or_profile": "~Zhitao_He1;~Pengfei_Cao1;~Yubo_Chen1;~Kang_Liu1;~ruopeng_li1;~Mengshu_Sun2;~Jun_Zhao4", "aff": "Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of automation, Chinese academy of science;Institute of Automation, Chinese Academy of Sciences;Ant Group;antgroup;Institute of automation, Chinese academy of science", "aff_domain": "ia.cas.cn;ia.ac.cn;nlpr.ia.ac.cn;ia.ac.cn;antgroup.com;antgroup.com;nlpr.ia.ac.cn", "position": "MS student;PhD student;Associate Professor;Professor;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nhe2023lego,\ntitle={{LEGO}: A Multi-agent Collaborative Framework with Role-playing and Iterative Feedback for Causality Explanation Generation},\nauthor={Zhitao He and Pengfei Cao and Yubo Chen and Kang Liu and ruopeng li and Mengshu Sun and Jun Zhao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=RAtrnAtAsM}\n}", "github": "", "project": "", "reviewers": "dYrk;3dKC;8kHz", "site": "https://openreview.net/forum?id=RAtrnAtAsM", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "4;2;4", "reproducibility": "4;3;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0003-3317-1260;;;;0000-0003-3329-2348;;", "linkedin": ";;;;ruopengli/;;", "aff_unique_index": "0;0;0;0;1;1;0", "aff_unique_norm": "Chinese Academy of Sciences;Ant Group", "aff_unique_dep": "Institute of Automation;", "aff_unique_url": "http://www.ia.cas.cn;https://www.antgroup.com", "aff_unique_abbr": "CAS;Ant Group", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "RE4oyAdAvM", "title": "Domain Adaptation for Conversational Query Production with the RAG Model Feedback", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Conversational query production is an emerging fundamental task for the dialogue system, where search queries are generated to explore the vast and continually updating knowledge from a search engine. To accelerate this line of research, previous studies have released several datasets with human-annotated search queries. However, the limited annotations still can not cover conversations of various domains. To solve this challenge, we propose a novel domain adaptation framework. It is inspired by a weakly supervised learning algorithm from previous work that guides a model using reinforcement learning with BM25 scores as feedback. Though effective, it is fragile facing noisy content on webpages from a commercial search engine and variance in conversations because of ignoring deep semantic information of dialogue contexts. Thus, we improve the algorithm by taking the advance of retrieval-augmented generation (RAG) and exploring several practical techniques such as knowledge distillation for stable training. We conduct experiments in multiple settings across different languages. Guided by the RAG model feedback, our model is more robust and performs significantly better especially in a more challenging setting over strong baselines.", "keywords": "conversational query production;knowledge-aided dialogue system;text generation", "primary_area": "", "supplementary_material": "", "author": "Ante Wang;Linfeng Song;Ge Xu;Jinsong Su", "authorids": "~Ante_Wang1;~Linfeng_Song1;~Ge_Xu1;~Jinsong_Su1", "gender": ";M;M;M", "homepage": "https://freesunshine0316.github.io/;;https://cdmc.xmu.edu.cn/info/1010/1054.htm;", "dblp": "136/3610;06/8656;05/9013;268/1405", "google_scholar": "yWZdmLYAAAAJ;;;", "or_profile": "~Linfeng_Song1;~Ge_Xu1;~Jinsong_Su1;~Wang_Ante1", "aff": "Tencent AI Lab;College of Computer and Control Engineering, Minjiang University;Xiamen University;Xiamen University", "aff_domain": "tencent.com;mju.edu.cn;xmu.edu.cn;xmu.edu.cn", "position": "Researcher;Full Professor;Researcher;PhD student", "bibtex": "@inproceedings{\nwang2023domain,\ntitle={Domain Adaptation for Conversational Query Production with the {RAG} Model Feedback},\nauthor={Ante Wang and Linfeng Song and Ge Xu and Jinsong Su},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=RE4oyAdAvM}\n}", "github": "", "project": "", "reviewers": "HE7F;NqYR;VdLz", "site": "https://openreview.net/forum?id=RE4oyAdAvM", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "3;4;3", "reproducibility": "3;3;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Tencent;Minjiang University;Xiamen University", "aff_unique_dep": "Tencent AI Lab;College of Computer and Control Engineering;", "aff_unique_url": "https://ai.tencent.com;http://www.mju.edu.cn;https://www.xmu.edu.cn", "aff_unique_abbr": "Tencent AI Lab;;XMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "RGmQOhSGp0", "title": "Beneath the Surface: Unveiling Harmful Memes with Multimodal Reasoning Distilled from Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The age of social media is rife with memes. Understanding and detecting harmful memes pose a significant challenge due to their implicit meaning that is not explicitly conveyed through the surface text and image. However, existing harmful meme detection approaches only recognize superficial harm-indicative signals in an end-to-end classification manner but ignore in-depth cognition of the meme text and image. In this paper, we attempt to detect harmful memes based on advanced reasoning over the interplay of multimodal information in memes. Inspired by the success of Large Language Models (LLMs) on complex reasoning, we first conduct abductive reasoning with LLMs. Then we propose a novel generative framework to learn reasonable thoughts from LLMs for better multimodal fusion and lightweight fine-tuning, which consists of two training stages: 1) Distill multimodal reasoning knowledge from LLMs; and 2) Fine-tune the generative framework to infer harmfulness. Extensive experiments conducted on three meme datasets demonstrate that our proposed approach achieves superior performance than state-of-the-art methods on the harmful meme detection task.", "keywords": "Harmful meme detection;multimodal reasoning;knowledge distillation;large language models", "primary_area": "", "supplementary_material": "", "author": "Hongzhan Lin;Ziyang Luo;Jing Ma;Long Chen", "authorids": "~Hongzhan_Lin1;~Ziyang_Luo2;~Jing_Ma4;~Long_Chen8", "gender": "M;M;F;M", "homepage": "https://daniellin97.github.io;https://chiyeunglaw.github.io/;https://majingcuhk.github.io/;https://zjuchenlong.github.io/", "dblp": "292/1751-1;;96/6129-4.html;64/5725-16", "google_scholar": "https://scholar.google.com.hk/citations?user=hOF1SLoAAAAJ;VI8NeJEAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.sg/citations?user=-gtmMpIAAAAJ", "or_profile": "~Hongzhan_Lin1;~Ziyang_Luo2;~Jing_Ma4;~Long_Chen8", "aff": "Hong Kong Baptist University;Microsoft;Hong Kong Baptist University;Columbia University", "aff_domain": "hkbu.edu.hk;microsoft.com;hkbu.edu.hk;columbia.edu", "position": "PhD student;Intern;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nlin2023beneath,\ntitle={Beneath the Surface: Unveiling Harmful Memes with Multimodal Reasoning Distilled from Large Language Models},\nauthor={Hongzhan Lin and Ziyang Luo and Jing Ma and Long Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=RGmQOhSGp0}\n}", "github": "", "project": "", "reviewers": "3phz;f6a5;ue2E", "site": "https://openreview.net/forum?id=RGmQOhSGp0", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;5;4", "excitement": "3;2;3", "reproducibility": "3;4;4", "correctness": "4;3;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-4111-8334;;;0000-0001-6148-9709", "linkedin": ";ziyang-luo-681a17192/;;", "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Hong Kong Baptist University;Microsoft;Columbia University", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "https://www.hkbu.edu.hk;https://www.microsoft.com;https://www.columbia.edu", "aff_unique_abbr": "HKBU;Microsoft;Columbia", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "China;United States" }, { "id": "RJRCWXGtds", "title": "Understanding Computational Models of Semantic Change: New Insights from the Speech Community", "track": "main", "status": "Short Main", "tldr": "", "abstract": "We investigate the descriptive relevance of widely used semantic change models in linguistic descriptions of present-day speech communities. We focus on the sociolinguistic issue of contact-induced semantic shifts in Quebec English, and analyze 40 target words using type-level and token-level word embeddings, empirical linguistic properties, and \u2013 crucially \u2013 acceptability ratings and qualitative remarks by 15 speakers from Montreal. Our results confirm the overall relevance of the computational approaches, but also highlight practical issues and the complementary nature of different semantic change estimates. To our knowledge, this is the first study to substantively engage with the speech community being described using semantic change models.", "keywords": "semantic change detection;semantic shifts;word embeddings;BERT;language contact", "primary_area": "", "supplementary_material": "", "author": "Filip Miletic;Anne PRZEWOZNY;Ludovic Tanguy", "authorids": "~Filip_Miletic1;~Anne_PRZEWOZNY1;~Ludovic_Tanguy1", "gender": "M;F;M", "homepage": ";https://clle.univ-tlse2.fr/accueil/annuaire/anne-przewozny-desriaux#/;http://w3.erss.univ-tlse2.fr/membre/tanguy/index-en.html", "dblp": "00/1182-2;;91/1807", "google_scholar": "https://scholar.google.co.uk/citations?user=w_abOCkAAAAJ;https://scholar.google.com/citations?hl=fr;https://scholar.google.fr/citations?user=7keS28oAAAAJ", "or_profile": "~Filip_Miletic1;~Anne_PRZEWOZNY1;~Ludovic_Tanguy1", "aff": "University of Stuttgart;Universit\u00e9 de Toulouse;University of Toulouse", "aff_domain": "ims.uni-stuttgart.de;univ-toulouse.fr;univ-tlse2.fr", "position": "Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\nmiletic2023understanding,\ntitle={Understanding Computational Models of Semantic Change: New Insights from the Speech Community},\nauthor={Filip Miletic and Anne PRZEWOZNY and Ludovic Tanguy},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=RJRCWXGtds}\n}", "github": "", "project": "", "reviewers": "TUJP;a31m;3VVP", "site": "https://openreview.net/forum?id=RJRCWXGtds", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;5;4", "excitement": "4;3;4", "reproducibility": "4;4;4", "correctness": "4;3;3", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-1147-196X;;", "linkedin": ";;", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Stuttgart;Universit\u00e9 de Toulouse;University of Toulouse", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-stuttgart.de;https://www.univ-toulouse.fr;https://www.univ-toulouse.fr", "aff_unique_abbr": "USTuttgart;UT;UT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Germany;France" }, { "id": "RJq3hJlK6w", "title": "Example-based Hypernetworks for Multi-source Adaptation to Unseen Domains", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "As Natural Language Processing (NLP) algorithms continually achieve new milestones, out-of-distribution generalization remains a significant challenge. This paper addresses the issue of multi-source adaptation for unfamiliar domains: We leverage labeled data from multiple source domains to generalize to unknown target domains at training. Our innovative framework employs example-based Hypernetwork adaptation: a T5 encoder-decoder initially generates a unique signature from an input example, embedding it within the source domains' semantic space. This signature is subsequently utilized by a Hypernetwork to generate the task classifier's weights. In an advanced version, the signature also enriches the input example's representation. We evaluated our method across two tasks\u2014sentiment classification and natural language inference\u2014in 29 adaptation scenarios, where it outpaced established algorithms. We also compare our finetuned architecture to few-shot GPT-3, demonstrating its effectiveness in essential use cases. To the best of our knowledge, this marks the first application of Hypernetworks to the adaptation for unknown domains.", "keywords": "Domain adaptation;our of distribution;cross-lingual;hypernetworks;prompting", "primary_area": "", "supplementary_material": "", "author": "Tomer Volk;Eyal Ben-David;Ohad Amosy;Gal Chechik;Roi Reichart", "authorids": "~Tomer_Volk1;~Eyal_Ben-David1;~Ohad_Amosy1;~Gal_Chechik1;~Roi_Reichart1", "gender": "M;M;;;M", "homepage": ";https://eyalbd2.github.io/;;https://chechiklab.biu.ac.il/~gal/;https://roireichart.com/", "dblp": "317/5308;234/9089;;c/GalChechik;96/5429", "google_scholar": "DsbXW88AAAAJ;ArqbkI4AAAAJ;;Wk2gAZUAAAAJ;https://scholar.google.co.il/citations?user=xXJIsh4AAAAJ", "or_profile": "~Tomer_Volk1;~Eyal_Ben-David1;~Ohad_Amosy1;~Gal_Chechik1;~Roi_Reichart1", "aff": ";Technion - Israel Institute of Technology, Technion;Bar Ilan University, Technion;NVIDIA;Technion, Israel Institute of Technology", "aff_domain": ";technion.ac.il;biu.ac.il;nvidia.com;technion.ac.il", "position": ";PhD student;PhD student;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nvolk2023examplebased,\ntitle={Example-based Hypernetworks for Multi-source Adaptation to Unseen Domains},\nauthor={Tomer Volk and Eyal Ben-David and Ohad Amosy and Gal Chechik and Roi Reichart},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=RJq3hJlK6w}\n}", "github": "", "project": "", "reviewers": "suhr;4JUK;StMx;cySR", "site": "https://openreview.net/forum?id=RJq3hJlK6w", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;3;3;2", "excitement": "3;3;3;3", "reproducibility": "3;4;3;4", "correctness": "2;3;3;2", "rating_avg": 4.0, "confidence_avg": 2.75, "excitement_avg": 3.0, "reproducibility_avg": 3.5, "correctness_avg": 2.5, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-9164-5303;", "linkedin": ";eyal-bd/;;;roi-reichart-ba2a8a7/", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Technion - Israel Institute of Technology;Bar-Ilan University;NVIDIA;Israel Institute of Technology", "aff_unique_dep": ";;NVIDIA Corporation;", "aff_unique_url": "https://www.technion.ac.il;https://www.biu.ac.il;https://www.nvidia.com;https://www.technion.ac.il/en/", "aff_unique_abbr": "Technion;BIU;NVIDIA;Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Israel;United States" }, { "id": "RKqtOoMC1M", "title": "Multilingual \\textit{k}-Nearest-Neighbor Machine Translation", "track": "main", "status": "Short Main", "tldr": "", "abstract": "\\textit{k}-nearest-neighbor machine translation has demonstrated remarkable improvements in machine translation quality by creating a datastore of cached examples. However, these improvements have been limited to high-resource language pairs, with large datastores, and remain a challenge for low-resource languages. In this paper, we address this issue by combining representations from multiple languages into a single datastore. Our results consistently demonstrate substantial improvements not only in low-resource translation quality (up to $+3.6$ BLEU), but also for high-resource translation quality (up to $+0.5$ BLEU). Our experiments show that it is possible to create multilingual datastores that are a quarter of the size, achieving a 5.3x speed improvement, by using linguistic similarities for datastore creation.\\footnote{We will release our code upon acceptance.}", "keywords": "multilingual machine translation;semi-parametric;kNN-MT", "primary_area": "", "supplementary_material": "", "author": "David Stap;Christof Monz", "authorids": "~David_Stap1;~Christof_Monz1", "gender": "M;M", "homepage": "https://davidstap.github.io;https://staff.fnwi.uva.nl/c.monz/", "dblp": ";m/ChristofMonz", "google_scholar": "u7c1llgAAAAJ;0r3PWLQAAAAJ", "or_profile": "~David_Stap1;~Christof_Monz1", "aff": "University of Amsterdam;University of Amsterdam, University of Amsterdam", "aff_domain": "uva.nl;ivi.uva.nl", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nstap2023multilingual,\ntitle={Multilingual {\\textbackslash}textit\\{k\\}-Nearest-Neighbor Machine Translation},\nauthor={David Stap and Christof Monz},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=RKqtOoMC1M}\n}", "github": "", "project": "", "reviewers": "aszQ;oA3E;oGvF", "site": "https://openreview.net/forum?id=RKqtOoMC1M", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;5;4", "excitement": "3;4;3", "reproducibility": "3;3;5", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "University of Amsterdam", "aff_unique_dep": "", "aff_unique_url": "https://www.uva.nl", "aff_unique_abbr": "UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Netherlands" }, { "id": "RLmpJ4xol2", "title": "Learning Preference Model for LLMs via Automatic Preference Data Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Despite the advanced capacities of the state-of-the-art large language models (LLMs), they suffer from issues of hallucination, stereotype, etc. Preference models play an important role in LLM alignment, yet training preference models predominantly rely on human-annotated data. This reliance limits their versatility and scalability. In this paper, we propose learning the preference model for LLMs via automatic preference data generation (AutoPM). Our approach involves both In-Breadth Data Generation, which elicits pairwise preference data from LLMs following the helpful-honest-harmless (HHH) criteria, and In-Depth Data Generation, which enriches the dataset with responses spanning a wide quality range. With HHH-guided preference data, our approach simultaneously enables the LLMs to learn human preferences and align with human values. Quantitative assessments on five benchmark datasets demonstrate the reliability and potential of AutoPM, pointing out a more general and scalable way to improve LLM performance.", "keywords": "Large Language Model;Reward Model;Preference Model", "primary_area": "", "supplementary_material": "", "author": "Shijia Huang;Jianqiao Zhao;Yanyang Li;Liwei Wang", "authorids": "~Shijia_Huang1;~Jianqiao_Zhao1;~Yanyang_Li1;~Liwei_Wang6", "gender": "M;M;M;M", "homepage": ";;;https://lwwangcse.github.io/", "dblp": "211/5807;;208/4741;47/1798-9", "google_scholar": ";ufLiZ4QAAAAJ;https://scholar.google.com/citations?hl=zh-CN;qnbdnZEAAAAJ", "or_profile": "~Shijia_Huang1;~Jianqiao_Zhao1;~Yanyang_Li1;~Liwei_Wang6", "aff": "The Chinese University of Hong Kong;The Chinese University of Hong Kong;SenseTime;The Chinese University of Hong Kong", "aff_domain": "cuhk.edu.hk;cuhk.edu.hk;sensetime.com;cuhk.edu.hk", "position": "PhD student;PhD student;Intern;Assistant Professor", "bibtex": "@inproceedings{\nhuang2023learning,\ntitle={Learning Preference Model for {LLM}s via Automatic Preference Data Generation},\nauthor={Shijia Huang and Jianqiao Zhao and Yanyang Li and Liwei Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=RLmpJ4xol2}\n}", "github": "", "project": "", "reviewers": "Dan7;D9n7;s7CG", "site": "https://openreview.net/forum?id=RLmpJ4xol2", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;4", "excitement": "3;4;4", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-3264-1294", "linkedin": ";jianqiao-zhao/;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Chinese University of Hong Kong;SenseTime", "aff_unique_dep": ";", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.sensetime.com", "aff_unique_abbr": "CUHK;SenseTime", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "RMDZNIjTt7", "title": "IBADR: an Iterative Bias-Aware Dataset Refinement Framework for Debiasing NLU models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "As commonly-used methods for debiasing natural language understanding (NLU) models, dataset refinement approaches heavily rely on manual data analysis, and thus maybe unable to cover all the potential biased features. In this paper, we propose IBADR, an Iterative Bias-Aware Dataset Refinement framework, which debiases NLU models without predefining biased features. We maintain an iteratively expanded sample pool. Specifically, at each iteration, we first train a shallow model to quantify the bias degree of samples in the pool. Then, we pair each sample with a bias indicator representing its bias degree, and use these extended samples to train a sample generator. In this way, this generator can effectively learn the correspondence relationship between bias indicators and samples. Furthermore, we employ the generator to produce pseudo samples with fewer biased features by feeding specific bias indicators. Finally,\nwe incorporate the generated pseudo samples into the pool. Experimental results and in-depth analyses on two NLU tasks show that IBADR not only significantly outperforms existing dataset refinement approaches, achieving SOTA, but also is compatible with model-centric methods.", "keywords": "debiased models; dataset refinement; spurious correlation", "primary_area": "", "supplementary_material": "", "author": "Xiaoyue Wang;Xin Liu;Lijie Wang;Yaoxiang Wang;Jinsong Su;Hua Wu", "authorids": "~Xiaoyue_Wang2;~Xin_Liu18;~Lijie_Wang2;~Yaoxiang_Wang1;~Jinsong_Su1;~Hua_Wu4", "gender": "F;M;M;M;F;F", "homepage": ";https://xinliu-cs.github.io;https://github.com/yxwang8775;https://cdmc.xmu.edu.cn/info/1010/1054.htm;https://wuhuanlp.github.io/;", "dblp": ";;;05/9013;27/6045-3;96/1435", "google_scholar": "https://scholar.google.com.hk/citations?user=KFMUkf8AAAAJ;AUUYG0QAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;9X2ThuAAAAAJ;bMsGAi0AAAAJ", "or_profile": "~Xiaoyue_Wang2;~Xin_Liu18;~Yaoxiang_Wang1;~Jinsong_Su1;~hua_wu1;~Wang_Lijie1", "aff": "Xiamen University;University of Michigan - Ann Arbor;Xiamen University;Xiamen University;Baidu;Baidu", "aff_domain": "xmu.edu.cn;umich.edu;xmu.edu.cn;xmu.edu.cn;baidu.com;baidu.com", "position": "PhD student;PhD student;Undergrad student;Researcher;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nwang2023ibadr,\ntitle={{IBADR}: an Iterative Bias-Aware Dataset Refinement Framework for Debiasing {NLU} models},\nauthor={Xiaoyue Wang and Xin Liu and Lijie Wang and Yaoxiang Wang and Jinsong Su and Hua Wu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=RMDZNIjTt7}\n}", "github": "", "project": "", "reviewers": "XyJx;HAyt;NJeA", "site": "https://openreview.net/forum?id=RMDZNIjTt7", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "3;4;3", "reproducibility": "4;4;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0001-8254-1561;", "linkedin": ";;;;;", "aff_unique_index": "0;1;0;0;2;2", "aff_unique_norm": "Xiamen University;University of Michigan;Baidu", "aff_unique_dep": ";;Baidu, Inc.", "aff_unique_url": "https://www.xmu.edu.cn;https://www.umich.edu;https://www.baidu.com", "aff_unique_abbr": "XMU;UM;Baidu", "aff_campus_unique_index": "1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "China;United States" }, { "id": "RN5KLywTll", "title": "What's \"up\" with vision-language models? Investigating their struggle with spatial reasoning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recent vision-language (VL) models are powerful, but can they reliably distinguish \"right\" from \"left\"? We curate three new corpora to quantify model comprehension of such basic spatial relations. These tests isolate spatial reasoning more precisely than existing datasets like VQAv2, e.g., our What'sUp benchmark contains sets of photographs varying only the spatial relations of objects, keeping their identity fixed (see Figure 1: models must comprehend not only the usual case of a dog under a table, but also, the same dog on top of the same table). We evaluate 18 VL models, finding that all perform poorly, e.g., BLIP finetuned on VQAv2, which nears human parity on VQAv2, achieves 56% accuracy on our benchmarks vs. humans at 99%. We conclude by studying causes of this surprising behavior, finding: 1) that popular vision-language pretraining corpora like LAION-2B contain little reliable data for learning spatial relationships; and 2) that basic modeling interventions like up-weighting preposition-containing instances or fine-tuning on our corpora are not sufficient to address the challenges our benchmarks pose. We are hopeful that these corpora will facilitate further research, and we release our data and code at https://github.com/amitakamath/whatsup_vlms.", "keywords": "vision-language;spatial relations;interpretability", "primary_area": "", "supplementary_material": "", "author": "Amita Kamath;Jack Hessel;Kai-Wei Chang", "authorids": "~Amita_Kamath1;~Jack_Hessel1;~Kai-Wei_Chang1", "gender": "F;M;M", "homepage": "https://amitakamath.github.io/;https://www.jmhessel.com;http://kwchang.net", "dblp": "267/9823;https://dblp.uni-trier.de/pid/132/5250.html;18/2428", "google_scholar": "B_ek5IIAAAAJ;SxQQ1msAAAAJ;fqDBtzYAAAAJ", "or_profile": "~Amita_Kamath1;~Jack_Hessel1;~Kai-Wei_Chang1", "aff": "UCLA Computer Science Department, University of California, Los Angeles;Allen Institute for Artificial Intelligence;Amazon", "aff_domain": "cs.ucla.edu;allenai.org;amazon.com", "position": "PhD student;Researcher;Researcher", "bibtex": "@inproceedings{\nkamath2023whats,\ntitle={What's ''up'' with vision-language models? Investigating their struggle with spatial reasoning},\nauthor={Amita Kamath and Jack Hessel and Kai-Wei Chang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=RN5KLywTll}\n}", "github": "", "project": "", "reviewers": "sZnW;4kKj;ciVg", "site": "https://openreview.net/forum?id=RN5KLywTll", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;5;4", "excitement": "4;3;4", "reproducibility": "3;4;5", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-4012-8979;0000-0001-5365-0072", "linkedin": ";;kai-wei-chang-41239040", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of California, Los Angeles;Allen Institute for Artificial Intelligence;Amazon", "aff_unique_dep": "Computer Science Department;;Amazon.com, Inc.", "aff_unique_url": "https://www.ucla.edu;https://allenai.org;https://www.amazon.com", "aff_unique_abbr": "UCLA;AI2;Amazon", "aff_campus_unique_index": "0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "RO460OVpev", "title": "Chinese Metaphorical Relation Extraction", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Metaphors are linguistic expressions that convey non-literal meanings, as well as cognitive mappings that establish connections between distinct domains of experience or knowledge.\nThis paper proposes a novel formulation of metaphor identification as a relation extraction problem. \nWe introduce metaphorical relations as links between two spans in text, a target span and a source-related span. \nWe create a dataset for Chinese metaphorical relation extraction, with more than 4,200 sentences annotated with metaphorical relations, corresponding target/source-related spans, and fine-grained span types. \nMetaphorical relation extraction is a process that detects metaphorical expressions and builds connections between target and source domains.\nWe develop a span-based end-to-end model for metaphorical relation extraction and demonstrate its effectiveness.\nWe expect that metaphorical relation extraction can serve as a bridge between linguistic metaphor identification and conceptual metaphor identification.\nOur data and code are available at https://github.com/cnunlp/CMRE.", "keywords": "Metaphor understanding;metaphorical relation extraction;linguistic metaphor;cognitive metaphor", "primary_area": "", "supplementary_material": "", "author": "Guihua Chen;Tiantian Wu;MiaoMiao Cheng;Xu Han;Jiefu Gong;Shijin Wang;Wei Song", "authorids": "~Guihua_Chen1;~Tiantian_Wu1;~MiaoMiao_Cheng1;~Xu_Han11;~Jiefu_Gong1;~Shijin_Wang1;~Wei_Song3", "gender": "F;F;;;M;M;M", "homepage": "https://blog.csdn.net/weixin_42625825?type=lately;https://github.com/sweet0405;;;;;https://cnunlp.github.io", "dblp": ";;;;223/2473.html;74/5750-1.html;62/1539-10", "google_scholar": ";;;;;;MdWnyicAAAAJ", "or_profile": "~Guihua_Chen1;~Tiantian_Wu1;~MiaoMiao_Cheng1;~Xu_Han11;~Jiefu_Gong1;~Shijin_Wang1;~Wei_Song3", "aff": "Capital Normal University;Capital Normal University;;;IFLYTEK CO.LTD.;State Key Laboratory of Cognitive Intelligence;Capital Normal University", "aff_domain": "cnu.edu.cn;cnu.edu.cn;;;iflytek.com;iflytek.com;cnu.edu.cn", "position": "Undergrad student;MS student;;;Researcher;Vice Dean;Full Professor", "bibtex": "@inproceedings{\nchen2023chinese,\ntitle={Chinese Metaphorical Relation Extraction},\nauthor={Guihua Chen and Tiantian Wu and MiaoMiao Cheng and Xu Han and Jiefu Gong and Shijin Wang and Wei Song},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=RO460OVpev}\n}", "github": "", "project": "", "reviewers": "oJ38;SXfz;7vFy", "site": "https://openreview.net/forum?id=RO460OVpev", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;4", "excitement": "3;4;3", "reproducibility": "3;3;3", "correctness": "3;4;4", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0002-9202-7678;", "linkedin": ";;;;;;", "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Capital Normal University;iFLYTEK;State Key Laboratory of Cognitive Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "http://www.cnu.edu.cn;https://www.iflytek.com;", "aff_unique_abbr": "CNU;iFLYTEK;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "RSuN6p3wXR", "title": "APrompt: Attention Prompt Tuning for Efficient Adaptation of Pre-trained Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "With the continuous growth of large language models, the process of fine-tuning these models for new tasks has become increasingly parameter-intensive. Prompt tuning, a method that involves tuning a small set of soft prompts, has emerged as an effective and efficient approach for adapting large pre-trained language models. However, most existing prompt tuning approaches only introduce prompts at the input layer, limiting their performance and leaving large rooms for improvement. In this work, we propose a novel Attention Prompt tuning method, namely APrompt, for efficient adaptation of pre-trained language models. We first demonstrate that existing prompt tuning can be considered as a special case of attention prompt tuning. We then formally introduce APrompt, which incorporates query, key, and value prompts into the attention layer to guide the attention computation during fine-tuning. Experimental results on the SuperGLUE benchmark consistently demonstrate that our proposed approach outperforms state-of-the-art baselines and full fine-tuning method with pre-trained models at different scales. In addition, a comprehensive set of ablation studies validate the effectiveness of the prompt design, as well as the efficiency of our approach.", "keywords": "Prompt Tuning;Parameter Efficient Learning;Attention Prompt", "primary_area": "", "supplementary_material": "", "author": "Qifan Wang;Yuning Mao;Jingang Wang;Hanchao Yu;Shaoliang Nie;Sinong Wang;Fuli Feng;Lifu Huang;Xiaojun Quan;Zenglin Xu;Dongfang Liu", "authorids": "~Qifan_Wang2;~Yuning_Mao1;~Jingang_Wang1;~Hanchao_Yu1;~Shaoliang_Nie1;~Sinong_Wang1;~Fuli_Feng1;~Lifu_Huang1;~Xiaojun_Quan1;~Zenglin_Xu2;~Dongfang_Liu1", "gender": "M;;M;M;M;M;M;M;M;;M", "homepage": "https://wqfcr.github.io/;https://morningmoni.github.io/;https://sites.google.com/site/bitwjg/;https://www.linkedin.com/in/hanchao-yu-9a9381a7/;https://snie2012.github.io;https://sites.google.com/site/snongwang/;https://fulifeng.github.io/;https://wilburone.github.io/;https://sites.google.com/site/xiaojunquan/;https://www.rit.edu/directory/dxleec-dongfang-liu;https://faculty.fudan.edu.cn/xuzenglin/en/index.htm", "dblp": "33/8610;178/3692;59/7807;69/9936;213/7860;140/0795;183/9198;127/0072;90/5936;;68/1538", "google_scholar": "LrSyLosAAAAJ;steJe6IAAAAJ;janU39IAAAAJ;vBkncqgAAAAJ;https://scholar.google.com/citations?hl=en;CYMAfxsAAAAJ;https://scholar.google.com.sg/citations?user=QePM4u8AAAAJ;76IEGtYAAAAJ;dRpg4t8AAAAJ;uICY0vEAAAAJ;gF0H9nEAAAAJ", "or_profile": "~Qifan_Wang2;~Yuning_Mao1;~Jingang_Wang1;~Hanchao_Yu1;~Shaoliang_Nie1;~Sinong_Wang1;~Fuli_Feng1;~Lifu_Huang1;~Xiaojun_Quan1;~Dongfang_Liu1;~Zenglin_Xu1", "aff": "Meta AI;Meta;Meituan;Meta Facebook;Meta Inc;Meta Facebook;University of Science and Technology of China;Virginia Tech;SUN YAT-SEN UNIVERSITY;Rochester Institute of Technology;Harbin Institute of Technology Shenzhen", "aff_domain": "fb.com;meta.com;meituan.com;fb.com;meta.com;fb.com;ustc.edu.cn;vt.edu;sysu.edu.cn;rit.edu;hit.edu.cn", "position": "Principal Researcher;Researcher;Researcher;Researcher;Researcher;Research scientist;Full Professor;Assistant Professor;Full Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nwang2023aprompt,\ntitle={{AP}rompt: Attention Prompt Tuning for Efficient Adaptation of Pre-trained Language Models},\nauthor={Qifan Wang and Yuning Mao and Jingang Wang and Hanchao Yu and Shaoliang Nie and Sinong Wang and Fuli Feng and Lifu Huang and Xiaojun Quan and Zenglin Xu and Dongfang Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=RSuN6p3wXR}\n}", "github": "", "project": "", "reviewers": "S6E8;97Ph;tGWP", "site": "https://openreview.net/forum?id=RSuN6p3wXR", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;5", "excitement": "4;3;3", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 4.666666666666667, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 11, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-7570-5756;;;0009-0000-4407-7796;;;0000-0002-5828-9842;;;;0000-0001-5550-6461", "linkedin": ";morningmoni/;;hanchao-yu-9a9381a7/;shaoliang-nie/;wang-s-simon-194512a7;;;;;", "aff_unique_index": "0;0;1;0;0;0;2;3;4;5;6", "aff_unique_norm": "Meta;Meituan;University of Science and Technology of China;Virginia Tech;Sun Yat-sen University;Rochester Institute of Technology;Harbin Institute of Technology", "aff_unique_dep": "Meta AI;;;;;;", "aff_unique_url": "https://meta.com;https://www.meituan.com;http://www.ustc.edu.cn;https://www.vt.edu;http://www.sysu.edu.cn;https://www.rit.edu;https://www.hit.edu.cn/", "aff_unique_abbr": "Meta;Meituan;USTC;VT;SYSU;RIT;HIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;1;0;0;0;1;0;1;0;1", "aff_country_unique": "United States;China" }, { "id": "RVQccn8rcr", "title": "Polar Ducks and Where to Find Them: Enhancing Entity Linking with Duck Typing and Polar Box Embeddings", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Entity linking methods based on dense retrieval are widely adopted in large-scale applications for their efficiency, but they can fall short of generative models, as they are sensitive to the structure of the embedding space. To address this issue, this paper introduces DUCK, an approach to infusing structural information in the space of entity representations, using prior knowledge of entity types. Inspired by duck typing in programming languages, we define the type of an entity based on its relations with other entities in a knowledge graph. Then, porting the concept of box embeddings to spherical polar coordinates, we represent relations as boxes on the hypersphere. We optimize the model to place entities inside the boxes corresponding to their relations, thereby clustering together entities of similar type. Our experiments show that our method sets new state-of-the-art results on standard entity-disambiguation benchmarks. It improves the performance of the model by up to 7.9 F1 points, outperforms other type-aware approaches, and matches the results of generative models with 18 times more parameters.", "keywords": "Entity linking;Entity disambiguation;Box embeddings", "primary_area": "", "supplementary_material": "", "author": "Mattia Atzeni;Mikhail Plekhanov;Frederic A Dreyer;Nora Kassner;Simone Merello;Louis Martin;Nicola Cancedda", "authorids": "~Mattia_Atzeni1;~Mikhail_Plekhanov1;~Frederic_A_Dreyer1;~Nora_Kassner1;~Simone_Merello1;~Louis_Martin1;~Nicola_Cancedda1", "gender": ";M;;;;;M", "homepage": ";;;;https://github.com/Simosound94;https://louismartin.eu;", "dblp": "204/8455.html;;;;;05/6214;19/2610", "google_scholar": "GxcjDq0AAAAJ;Nty9hAYAAAAJ;;;;W_Y6OKAAAAAJ;PXGsctkAAAAJ", "or_profile": "~Mattia_Atzeni1;~Mikhail_Plekhanov1;~Frederic_A_Dreyer1;~Nora_Kassner1;~Simone_Merello1;~Louis_Martin1;~Nicola_Cancedda1", "aff": "Meta;Meta Facebook;;;;Meta Facebook;Meta", "aff_domain": "meta.com;meta.com;;;;fb.com;meta.com", "position": "Intern;Researcher;;;;Researcher;Researcher", "bibtex": "@inproceedings{\natzeni2023polar,\ntitle={Polar Ducks and Where to Find Them: Enhancing Entity Linking with Duck Typing and Polar Box Embeddings},\nauthor={Mattia Atzeni and Mikhail Plekhanov and Frederic A Dreyer and Nora Kassner and Simone Merello and Louis Martin and Nicola Cancedda},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=RVQccn8rcr}\n}", "github": "", "project": "", "reviewers": "F4hB;89ts;VEWh", "site": "https://openreview.net/forum?id=RVQccn8rcr", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;4", "excitement": "4;4;4", "reproducibility": "2;3;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;", "linkedin": ";mikeplekhanov/;;;;;nicola-cancedda-a085261/", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Meta", "aff_unique_dep": "Meta Platforms, Inc.", "aff_unique_url": "https://meta.com", "aff_unique_abbr": "Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "RWH1WazQqE", "title": "Democratizing LLMs: An Exploration of Cost-Performance Trade-offs in Self-Refined Open-Source Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The dominance of proprietary LLMs has led to restricted access and raised information privacy concerns. The SoTA open-source alternatives are crucial for information-sensitive and high-volume applications but often lag behind in performance. To address this gap, we propose (1) A generalized variant of iterative self-critique and self-refinement devoid of external influence. (2) A novel ranking metric - Performance, Refinement, and Inference Cost Score (PeRFICS) - to find the optimal model for a given task considering refined performance and cost. Our experiments show that SoTA open source models of varying sizes from 7B - 65B, on average, improve 8.2\\% from their baseline performance. Strikingly, even models with extremely small memory footprints, such as Vicuna-7B, show a 11.74\\% improvement overall and up to a 25.39\\% improvement in high-creativity, open ended tasks on the Vicuna benchmark. Vicuna-13B takes it a step further and outperforms ChatGPT post-refinement. This work has profound implications for resource-constrained and information-sensitive environments seeking to leverage LLMs without incurring prohibitive costs, compromising on performance and privacy. The domain-agnostic self-refinement process coupled with our novel ranking metric facilitates informed decision-making in model selection, thereby reducing costs and democratizing access to high-performing language models, as evidenced by three case studies on personal computing, gaming and enterprise solutions.", "keywords": "large-language-model;open-source;self-refinement;ranking-metric;cost-analysis", "primary_area": "", "supplementary_material": "", "author": "Sumuk Shashidhar;Abhinav Chinta;Vaibhav Sahai;Zhenhailong Wang;Heng Ji", "authorids": "~Sumuk_Shashidhar1;~Abhinav_Chinta1;~Vaibhav_Sahai1;~Zhenhailong_Wang1;~Heng_Ji3", "gender": "M;M;M;M;F", "homepage": "https://sumuk.org;https://abhinavchinta.com/;;https://mikewangwzhl.github.io/;http://blender.cs.illinois.edu/hengji.html", "dblp": "358/8879.html;;;290/1319;", "google_scholar": "https://scholar.google.com/citations?hl=en;LVXh1vAAAAAJ;97d-oyIAAAAJ;arzvOlgAAAAJ;z7GCqT4AAAAJ", "or_profile": "~Sumuk_Shashidhar1;~Abhinav_Chinta1;~Vaibhav_Sahai1;~Zhenhailong_Wang1;~Heng_Ji3", "aff": "University of Illinois, Urbana-Champaign;University of Illinois Urbana Champaign;University of Illinois, Urbana-Champaign;University of Illinois Urbana-Champaign;University of Illinois, Urbana-Champaign", "aff_domain": "cs.illinois.edu;cs.illinois.edu;cs.illinois.edu;illinois.edu;uiuc.edu", "position": "Undergrad student;Undergrad student;Undergrad student;MS student;Full Professor", "bibtex": "@inproceedings{\nshashidhar2023democratizing,\ntitle={Democratizing {LLM}s: An Exploration of Cost-Performance Trade-offs in Self-Refined Open-Source Models},\nauthor={Sumuk Shashidhar and Abhinav Chinta and Vaibhav Sahai and Zhenhailong Wang and Heng Ji},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=RWH1WazQqE}\n}", "github": "", "project": "", "reviewers": "27dr;7UkX;9ySn", "site": "https://openreview.net/forum?id=RWH1WazQqE", "pdf_size": 0, "rating": "3;3;3", "confidence": "1;4;3", "excitement": "3;3;4", "reproducibility": "4;4;4", "correctness": "3;2;4", "rating_avg": 3.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-8265-9946;;;0000-0002-4704-5455;", "linkedin": "https://linkedin.com/in/sumuks;abhinavchinta/;vaibhav-sahai/;zhenhailong-wang-7952111b2/;", "aff_unique_index": "0;1;0;1;0", "aff_unique_norm": "University of Illinois;University of Illinois Urbana-Champaign", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://illinois.edu", "aff_unique_abbr": "UIUC;UIUC", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "RWJYEeaW1d", "title": "EasyQuant: An Efficient Data-free Quantization Algorithm for LLMs", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) have proven to be very superior to conventional methods in various tasks.\nHowever, their expensive computations and high memory requirements are prohibitive for deployment.\nModel quantization is an effective method for reducing this overhead. The problem is that in most\nprevious works, the quantized model was calibrated using few samples from the training data, which\nmight affect the generalization of the quantized LLMs to unknown cases and tasks. Hence in this work,\nwe explore an important question: Can we design a data-independent quantization method for LLMs to\nguarantee its generalization performance?\nIn this work, we propose EasyQuant, a training-free and data-independent weight-only quantization\nalgorithm for LLMs. Our observation indicates that two factors: outliers in the weight and quantization\nranges, are essential for reducing the quantization error. Therefore, in EasyQuant, we leave the outliers\n(less than 1%) unchanged and optimize the quantization range to reduce the reconstruction error. With\nthese methods, we surprisingly find that EasyQuant achieves comparable performance to the original model.\nSince EasyQuant does not depend on any training data, the generalization performance of quantized\nLLMs is safely guaranteed. Moreover, EasyQuant can be implemented in parallel so that the quantized\nmodel could be attained in a few minutes even for LLMs over 100B. To our best knowledge, we are the\nfirst work that achieves almost lossless quantization performance for LLMs under a data-independent\nsetting and our algorithm runs over 10 times faster than the data-dependent methods.", "keywords": "model quantization", "primary_area": "", "supplementary_material": "", "author": "Hanlin Tang;Yifu Sun;Decheng Wu;Kai Liu;Jianchen Zhu;Zhanhui Kang", "authorids": "~Hanlin_Tang2;~Yifu_Sun2;~Decheng_Wu2;~Kai_Liu15;~Jianchen_Zhu1;~Zhanhui_Kang1", "gender": ";F;M;M;M;M", "homepage": ";https://github.com/ifif-S;https://www.baidu.com/;http://www.qq.com;https://llm.hunyuan.tencent.com/;https://github.com/WOODchen7", "dblp": ";;;;157/6432;", "google_scholar": "RCGyfecAAAAJ;;;;;", "or_profile": "~Hanlin_Tang2;~Yifu_Sun2;~Kai_Liu15;~Jianchen_Zhu1;~Zhanhui_Kang1;~Wu_Decheng1", "aff": "Huawei Technologies Ltd.;;;;Tencent;", "aff_domain": "huawei.com;;;;tencent.com;", "position": "Researcher;;;;Researcher;", "bibtex": "@inproceedings{\ntang2023easyquant,\ntitle={EasyQuant: An Efficient Data-free Quantization Algorithm for {LLM}s},\nauthor={Hanlin Tang and Yifu Sun and Decheng Wu and Kai Liu and Jianchen Zhu and Zhanhui Kang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=RWJYEeaW1d}\n}", "github": "", "project": "", "reviewers": "uuQc;1vKT;Mg7b", "site": "https://openreview.net/forum?id=RWJYEeaW1d", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;4;2", "reproducibility": "3;4;3", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0009-0006-5151-4222;", "linkedin": ";;;;kang-kego-628b1b28/;", "aff_unique_index": "0;1", "aff_unique_norm": "Huawei;Tencent", "aff_unique_dep": "Huawei Technologies;Tencent Holdings Limited", "aff_unique_url": "https://www.huawei.com;https://www.tencent.com", "aff_unique_abbr": "Huawei;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "RXIYmRUWGD", "title": "Improved Unsupervised Chinese Word Segmentation Using Pre-trained Knowledge and Pseudo-labeling Transfer", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Unsupervised Chinese word segmentation (UCWS) has made progress by incorporating linguistic knowledge from pre-trained language models using parameter-free probing techniques. However, such approaches suffer from increased training time due to the need for multiple inferences using a pre-trained language model to perform word segmentation. This work introduces a novel way to enhance UCWS performance while maintaining training efficiency. Our proposed method integrates the segmentation signal from the unsupervised segmental language model to the pre-trained BERT classifier under a pseudo-labeling framework. Experimental results demonstrate that our approach achieves state-of-the-art performance on the eight UCWS tasks while considerably reducing the training time compared to previous approaches.", "keywords": "Unsupervised Chinese Word Segmentation", "primary_area": "", "supplementary_material": "", "author": "Hsiu-Wen Li;Ying-Jia Lin;Yi-Ting Li;Chun Yi Lin;Hung-Yu Kao", "authorids": "~Hsiu-Wen_Li1;~Ying-Jia_Lin1;~Yi-Ting_Li1;~Chun_Yi_Lin1;~Hung-Yu_Kao1", "gender": "M;M;M;M;M", "homepage": ";https://mcps5601.github.io/about/;;;http://140.116.245.107/advisor.html", "dblp": ";257/6587;;;64/5833.html", "google_scholar": ";TM4JxJkAAAAJ;;https://scholar.google.com.tw/citations?user=8I_uKDAAAAAJ;https://scholar.google.com.tw/citations?user=X5Is2lAAAAAJ", "or_profile": "~Hsiu-Wen_Li1;~Ying-Jia_Lin1;~Yi-Ting_Li1;~Chun_Yi_Lin1;~Hung-Yu_Kao1", "aff": "National Cheng Kung University;National Cheng Kung University;National Cheng Kung University;National Cheng Kung University;CSIE", "aff_domain": "ncku.edu.tw;ncku.edu.tw;ncku.edu.tw;ncku.edu.tw;csie.ncku.edu.tw", "position": "MS student;PhD student;MS student;MS student;Full Professor", "bibtex": "@inproceedings{\nli2023improved,\ntitle={Improved Unsupervised Chinese Word Segmentation Using Pre-trained Knowledge and Pseudo-labeling Transfer},\nauthor={Hsiu-Wen Li and Ying-Jia Lin and Yi-Ting Li and Chun Yi Lin and Hung-Yu Kao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=RXIYmRUWGD}\n}", "github": "", "project": "", "reviewers": "CMUv;9gJq;sybf", "site": "https://openreview.net/forum?id=RXIYmRUWGD", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;5;3", "excitement": "2;4;2", "reproducibility": "5;4;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-4347-0232;;0000-0002-2582-2356;0000-0002-8890-8544", "linkedin": "%E4%BF%AE%E6%96%87-%E9%BB%8E-335701213/;ying-jia-lin-0a1b1413b/;yi-ting-li-38a0b4232/;%E6%9E%97-%E5%B3%BB%E6%AF%85-7ba67b268/;", "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "National Cheng Kung University;College of Computer Science and Information Engineering", "aff_unique_dep": ";", "aff_unique_url": "https://www.ncku.edu.tw;", "aff_unique_abbr": "NCKU;CSIE", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Taiwan;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China;" }, { "id": "RYvNvCU109", "title": "Energy and Carbon Considerations of Fine-Tuning BERT", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Despite the popularity of the pre-train then fine-tune paradigm in the NLP community, existing work quantifying energy costs and associated carbon emissions has largely focused on language model pre-training. Although a single pre-training run draws substantially more energy than fine-tuning, fine-tuning is performed more frequently by many more individual actors, and thus must be accounted for when considering the energy and carbon footprint of NLP. In order to better characterize the role of fine-tuning in the landscape of energy and carbon emissions in NLP, we perform a careful empirical study of the computational costs of fine-tuning across tasks, datasets, hardware infrastructure and measurement modalities. Our experimental results allow us to place fine-tuning energy and carbon costs into perspective with respect to pre-training and inference, and outline recommendations to NLP researchers and practitioners who wish to improve their fine-tuning energy efficiency.", "keywords": "energy costs;fine-tuning;efficiency evaluation;efficiency;BERT;transformer", "primary_area": "", "supplementary_material": "", "author": "Xiaorong Wang;Clara Na;Emma Strubell;Sorelle Friedler;Sasha Luccioni", "authorids": "~Xiaorong_Wang2;~Clara_Na1;~Emma_Strubell1;~Sorelle_Friedler1;~Sasha_Luccioni1", "gender": "F;;Non-Binary;;", "homepage": ";;http://strubell.github.io;https://sorelle.friedler.net;", "dblp": ";;153/2253;59/7202;", "google_scholar": ";;UCDMtM0AAAAJ;XDHr1VIAAAAJ;", "or_profile": "~Xiaorong_Wang2;~Clara_Na1;~Emma_Strubell1;~Sorelle_Friedler1;~Sasha_Luccioni1", "aff": "Haverford College in Pennsylvania;;Allen Institute for Artificial Intelligence;Haverford College;", "aff_domain": "haverford.edu;;allenai.org;haverford.edu;", "position": "Undergrad student;;Visiting Researcher;Full Professor;", "bibtex": "@inproceedings{\nwang2023energy,\ntitle={Energy and Carbon Considerations of Fine-Tuning {BERT}},\nauthor={Xiaorong Wang and Clara Na and Emma Strubell and Sorelle Friedler and Sasha Luccioni},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=RYvNvCU109}\n}", "github": "", "project": "", "reviewers": "ikfR;NXfn;9NCQ", "site": "https://openreview.net/forum?id=RYvNvCU109", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;3", "excitement": "2;2;3", "reproducibility": "4;4;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 2.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-6023-1597;", "linkedin": "xiaorong-wang-1362201ba/;;;;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Haverford College;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.haverford.edu;https://allenai.org", "aff_unique_abbr": "Haverford;AI2", "aff_campus_unique_index": "0", "aff_campus_unique": "Pennsylvania;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "Ra6gfR3XuI", "title": "Understanding the Role of Input Token Characters in Language Models: How Does Information Loss Affect Performance?", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Understanding how and what pre-trained language models (PLMs) learn about language is an open challenge in natural language processing. Previous work has focused on identifying whether they capture semantic and syntactic information, and how the data or the pre-training objective affects their performance. However, to the best of our knowledge, no previous work has specifically examined how information loss in input token characters affects the performance of PLMs. In this study, we address this gap by pre-training language models using small subsets of characters from individual tokens. Surprisingly, we find that pre-training even under extreme settings, i.e. using only one character of each token, the performance retention in standard NLU benchmarks and probing tasks compared to full-token models is high. For instance, a model pre-trained only on single first characters from tokens achieves performance retention of approximately 90% and 77% of the full-token model in SuperGLUE and GLUE tasks, respectively.", "keywords": "LLMs;Interpretability;Pretraining", "primary_area": "", "supplementary_material": "", "author": "Ahmed Alajrami;Katerina Margatina;Nikolaos Aletras", "authorids": "~Ahmed_Alajrami1;~Katerina_Margatina1;~Nikolaos_Aletras1", "gender": "M;F;", "homepage": "https://www.linkedin.com/in/aajrami/;https://katerinamargatina.github.io/;", "dblp": ";227/2313;118/9116", "google_scholar": "https://scholar.google.co.uk/citations?hl=en;517t5gEAAAAJ;https://scholar.google.co.uk/citations?user=uxRWFhoAAAAJ", "or_profile": "~Ahmed_Alajrami1;~Katerina_Margatina1;~Nikolaos_Aletras1", "aff": "University of Sheffield;University of Sheffield;Amazon", "aff_domain": "sheffield.ac.uk;sheffield.ac.uk;amazon.com", "position": "PhD student;PhD student;Researcher", "bibtex": "@inproceedings{\nalajrami2023understanding,\ntitle={Understanding the Role of Input Token Characters in Language Models: How Does Information Loss Affect Performance?},\nauthor={Ahmed Alajrami and Katerina Margatina and Nikolaos Aletras},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Ra6gfR3XuI}\n}", "github": "", "project": "", "reviewers": "Ed3M;tAA1;77VM", "site": "https://openreview.net/forum?id=Ra6gfR3XuI", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;4;5", "excitement": "4;4;3", "reproducibility": "5;5;4", "correctness": "3;5;3", "rating_avg": 5.0, "confidence_avg": 4.666666666666667, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.666666666666667, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";katerina-margatina/;", "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Sheffield;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.sheffield.ac.uk;https://www.amazon.com", "aff_unique_abbr": "Sheffield;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "RbE83Pmtfk", "title": "DeTiME: Diffusion-Enhanced Topic Modeling using Encoder-decoder based LLM", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In the burgeoning field of natural language processing, Neural Topic Models (NTMs) and Large Language Models (LLMs) have emerged as areas of significant research interest. Despite this, NTMs primarily utilize contextual embeddings from LLMs, which are not optimal for clustering or capable for topic generation. Our study addresses this gap by introducing a novel framework named Diffusion-Enhanced Topic Modeling using Encoder-Decoder-based LLMs (DeTiME). DeTiME leverages Encoder-Decoder-based LLMs to produce highly clusterable embeddings that could generate topics that exhibit both superior clusterability and enhanced semantic coherence compared to existing methods. Additionally, by exploiting the power of diffusion, our framework also provides the capability to generate content relevant to the identified topics. This dual functionality allows users to efficiently produce highly clustered topics and related content simultaneously. DeTiME's potential extends to generating clustered embeddings as well. Notably, our proposed framework proves to be efficient to train and exhibits high adaptability, demonstrating its potential for a wide array of applications.", "keywords": "Topic Modeling;Diffusion;Encoder-Decoder LLM;FlanT5;CNN", "primary_area": "", "supplementary_material": "", "author": "Weijie Xu;Wenxiang Hu;Fanyou Wu;Srinivasan H. Sengamedu", "authorids": "~Weijie_Xu1;~Wenxiang_Hu2;~Fanyou_Wu1;~Srinivasan_H._Sengamedu1", "gender": "M;M;M;", "homepage": "https://www.weijiexu.com;;http://www.wufanyou.com;", "dblp": "195/1675;;229/8090;38/2372", "google_scholar": "lWjp-dQAAAAJ;PYC84mkAAAAJ;C8WYCTAAAAAJ;X9fVMRUAAAAJ", "or_profile": "~Weijie_Xu1;~Wenxiang_Hu2;~Fanyou_Wu1;~Srinivasan_H._Sengamedu1", "aff": "Amazon;Amazon;Amazon;Amazon", "aff_domain": "amazon.com;amazon.com;amazon.com;amazon.com", "position": "Researcher;Researcher;Researcher;Applied Science Manager", "bibtex": "@inproceedings{\nxu2023detime,\ntitle={DeTi{ME}: Diffusion-Enhanced Topic Modeling using Encoder-decoder based {LLM}},\nauthor={Weijie Xu and Wenxiang Hu and Fanyou Wu and Srinivasan H. Sengamedu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=RbE83Pmtfk}\n}", "github": "", "project": "", "reviewers": "WMyA;q8m2;Y61q", "site": "https://openreview.net/forum?id=RbE83Pmtfk", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;3;4", "excitement": "2;4;3", "reproducibility": "4;4;3", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-9958-797X;;0000-0003-1847-8398", "linkedin": "weijie-xu-936b23101/;;;srinivasan-h-sengamedu", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Amazon", "aff_unique_dep": "Amazon.com, Inc.", "aff_unique_url": "https://www.amazon.com", "aff_unique_abbr": "Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "RcvJnskt0n", "title": "Detection of Multiple Mental Disorders from Social Media with Two-Stream Psychiatric Experts", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Existing Mental Disease Detection (MDD) research largely studies the detection of a single disorder, overlooking the fact that mental diseases might occur in tandem. Many approaches are not backed by domain knowledge (e.g., psychiatric symptoms) and thus fail to produce interpretable results. \nTo tackle these issues, we propose an MDD framework that is capable of learning the shared clues of all diseases, while also capturing the specificity of each single disease. The two-stream architecture which simultaneously processes text and symptom features can combine \nthe strength of both modalities and offer knowledge-based explainability. Experiments on the detection of 7 diseases show that our model can boost detection performance by more than 10\\%, especially in relatively rare classes.", "keywords": "Mental disease detection;symptom;multi-task learning;interpretability;social media", "primary_area": "", "supplementary_material": "", "author": "Siyuan Chen;Zhiling Zhang;Mengyue Wu;Kenny Q. Zhu", "authorids": "~Siyuan_Chen1;~Zhiling_Zhang1;~Mengyue_Wu1;~Kenny_Q._Zhu1", "gender": "F;;F;M", "homepage": "https://chesiy.github.io;;https://speechlab.sjtu.edu.cn/members/mengyue-wu;http://www.cs.sjtu.edu.cn/~kzhu/", "dblp": "84/5999;;82/2416;z/KennyQiliZhu", "google_scholar": "SPngdHIAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=ZIRJ6lIAAAAJ", "or_profile": "~Siyuan_Chen1;~Zhiling_Zhang1;~Mengyue_Wu1;~Kenny_Q._Zhu1", "aff": "Shanghai Jiaotong University;;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;;sjtu.edu.cn;cs.sjtu.edu.cn", "position": "MS student;;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nchen2023detection,\ntitle={Detection of Multiple Mental Disorders from Social Media with Two-Stream Psychiatric Experts},\nauthor={Siyuan Chen and Zhiling Zhang and Mengyue Wu and Kenny Q. Zhu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=RcvJnskt0n}\n}", "github": "", "project": "", "reviewers": "CNuF;qJms;hhgj", "site": "https://openreview.net/forum?id=RcvJnskt0n", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;5;4", "excitement": "3;4;4", "reproducibility": "4;3;3", "correctness": "3;3;4", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "ReGzwoL3Sl", "title": "Semi-Structured Object Sequence Encoders", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In this paper we explore the task of modeling semi-structured object sequences; in particular, we focus our attention on the problem of developing a structure-aware input representation for such sequences. Examples of such data include user activity on websites, machine logs, and many others. This type of data is often represented as a sequence of sets of key-value pairs over time and can present modeling challenges due to an ever-increasing sequence length. \nWe propose a two-part approach, which first considers each key independently and encodes a representation of its values over time; we then self-attend over these value-aware key representations to accomplish a downstream task. This allows us to operate on longer object sequences than existing methods. We introduce a novel shared-attention-head architecture between the two modules and present an innovative training schedule that interleaves the training of both modules with shared weights for some attention heads. Our experiments on multiple prediction tasks using real-world data demonstrate that our approach outperforms a unified network with hierarchical encoding, as well as other methods including a {\\em record-centric} representation and a {\\em flattened} representation of the sequence.", "keywords": "Structured object encoders;long sequences", "primary_area": "", "supplementary_material": "", "author": "Rudra Murthy;Riyaz Ahmad Bhat;Chulaka Gunasekara;Siva Sankalp Patel;Hui Wan;Tejas Indulal Dhamecha;Danish Contractor;Marina Danilevsky", "authorids": "~Rudra_Murthy1;~Riyaz_Ahmad_Bhat1;~Chulaka_Gunasekara2;~Siva_Sankalp_Patel1;~Hui_Wan1;~Tejas_Indulal_Dhamecha1;~Danish_Contractor2;~Marina_Danilevsky1", "gender": "M;M;M;M;F;F;;M", "homepage": "http://murthyrudra.github.io;https://sites.google.com/site/riyazahbhat/;https://researcher.watson.ibm.com/researcher/view.php?person=ibm-chulaka.gunasekara;;;http://marinadanilevsky.com/;;", "dblp": "216/7282;146/3952;139/2323;228/8428.html;80/4275;22/8355;93/9012;120/8497", "google_scholar": "5bjj_9cAAAAJ;BGC4b-sAAAAJ;UqzJBpIAAAAJ;zUvL46cAAAAJ;nO12Ns8AAAAJ;3JQr0NYAAAAJ;https://scholar.google.co.uk/citations?hl=en;Fln8oI8AAAAJ", "or_profile": "~Rudra_Murthy1;~Riyaz_Ahmad_Bhat1;~Chulaka_Gunasekara2;~Siva_Sankalp_Patel1;~Hui_Wan1;~Marina_Danilevsky1;~Danish_Contractor1;~Tejas_Dhamecha1", "aff": "IBM India Pvt Ltd;International Business Machines;International Business Machines;International Business Machines;IBM Research AI;International Business Machines;International Business Machines;Microsoft", "aff_domain": "in.ibm.com;ibm.com;ibm.com;ibm.com;researcher.watson.ibm.com;ibm.com;ibm.com;microsoft.com", "position": "Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Principal Researcher;Principal Data and Applied Scientist", "bibtex": "@inproceedings{\nmurthy2023semistructured,\ntitle={Semi-Structured Object Sequence Encoders},\nauthor={Rudra Murthy and Riyaz Ahmad Bhat and Chulaka Gunasekara and Siva Sankalp Patel and Hui Wan and Tejas Indulal Dhamecha and Danish Contractor and Marina Danilevsky},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ReGzwoL3Sl}\n}", "github": "", "project": "", "reviewers": "ZUvN;sSHy;A9A6", "site": "https://openreview.net/forum?id=ReGzwoL3Sl", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "4;4;3", "reproducibility": "2;3;4", "correctness": "3;3;2", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 2.6666666666666665, "replies_avg": 10, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-6236-1931;;;;;;;", "linkedin": ";riyaz-a-bhat-51828423/;;sivasankalp/;;marina-danilevsky/;;", "aff_unique_index": "0;1;1;1;0;1;1;2", "aff_unique_norm": "IBM;International Business Machines Corporation;Microsoft", "aff_unique_dep": "IBM India Pvt Ltd;;Microsoft Corporation", "aff_unique_url": "https://www.ibm.com/in-en;https://www.ibm.com;https://www.microsoft.com", "aff_unique_abbr": "IBM India;IBM;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1;1;1", "aff_country_unique": "India;United States" }, { "id": "RenTc1sUb7", "title": "On Task-personalized Multimodal Few-shot Learning for Visually-rich Document Entity Retrieval", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Visually-rich document entity retrieval (VDER), which extracts key information (e.g. date, address) from document images like invoices and receipts, has become an important topic in industrial NLP applications. The emergence of new document types at a constant pace, each with its unique entity types, presents a unique challenge: many documents contain unseen entity types that occur only a couple of times. Addressing this challenge requires models to have the ability of learning entities in a few-shot manner. However, prior works for Few-shot VDER mainly address the problem at the document level with a predefined global entity space, which doesn't account for the entity-level few-shot scenario: target entity types are locally personalized by each task and entity occurrences vary significantly among documents. To address this unexplored scenario, this paper studies a novel entity-level few-shot VDER task. The challenges lie in the uniqueness of the label space for each task and the increased complexity of out-of-distribution (OOD) contents. To tackle this novel task, we present a task-aware meta-learning based framework, with a central focus on achieving effective task personalization that distinguishes between in-task and out-of-task distribution. Specifically, we adopt a hierarchical decoder (HC) and employ contrastive learning (ContrastProtoNet) to achieve this goal. Furthermore, we introduce a new dataset, FewVEX, to boost future research in the field of entity-level few-shot VDER. Experimental results demonstrate our approaches significantly improve the robustness of popular meta-learning baselines.", "keywords": "document understanding;multiple modalities;entity retrieval;few shots;meta learning;out of distribution", "primary_area": "", "supplementary_material": "", "author": "Jiayi Chen;Hanjun Dai;Bo Dai;Aidong Zhang;Wei Wei", "authorids": "~Jiayi_Chen4;~Hanjun_Dai1;~Bo_Dai1;~Aidong_Zhang2;~Wei_Wei15", "gender": "F;M;;F;M", "homepage": "https://jia-yi-chen.github.io/;https://hanjun-dai.github.io;https://bo-dai.github.io/;https://engineering.virginia.edu/faculty/aidong-zhang;http://www.weiwei.one", "dblp": "42/1159;144/7311;64/2903;z/AidongZhang.html;", "google_scholar": "f3Iz6qoAAAAJ;obpl7GQAAAAJ;TIKl_foAAAAJ;O8XxkE4AAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Jiayi_Chen4;~Hanjun_Dai1;~Bo_Dai1;~Aidong_Zhang2;~wei_wei3", "aff": "University of Virginia;Google Research;Google Brain;University of Virginia;Google", "aff_domain": "cs.virginia.edu;google.com;google.com;virginia.edu;google.com", "position": "PhD student;Researcher;Research Scientist;Full Professor;Research Scientist", "bibtex": "@inproceedings{\nchen2023on,\ntitle={On Task-personalized Multimodal Few-shot Learning for Visually-rich Document Entity Retrieval},\nauthor={Jiayi Chen and Hanjun Dai and Bo Dai and Aidong Zhang and Wei Wei},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=RenTc1sUb7}\n}", "github": "", "project": "", "reviewers": "1N1p;BXH7;KbGr", "site": "https://openreview.net/forum?id=RenTc1sUb7", "pdf_size": 0, "rating": "2;2;2", "confidence": "1;4;3", "excitement": "3;3;3", "reproducibility": "2;4;4", "correctness": "4;3;3", "rating_avg": 2.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-0217-6352;;0009-0002-8070-574X;0000-0001-9723-3246;", "linkedin": ";hanjun-dai;;;", "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "University of Virginia;Google", "aff_unique_dep": ";Google Research", "aff_unique_url": "https://www.virginia.edu;https://research.google", "aff_unique_abbr": "UVA;Google Research", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "RgA1tcrxan", "title": "M2DF: Multi-grained Multi-curriculum Denoising Framework for Multimodal Aspect-based Sentiment Analysis", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Multimodal Aspect-based Sentiment Analysis (MABSA) is a fine-grained Sentiment Analysis task, which has attracted growing research interests recently. Existing work mainly utilizes image information to improve the performance of MABSA task. However, most of the studies overestimate the importance of images since there are many noise images unrelated to the text in the dataset, which will have a negative impact on model learning. Although some work attempts to filter low-quality noise images by setting thresholds, relying on thresholds will inevitably filter out a lot of useful image information. Therefore, in this work, we focus on whether the negative impact of noisy images can be reduced without modifying the data. To achieve this goal, we borrow the idea of Curriculum Learning and propose a Multi-grained Multi-curriculum Denoising Framework (M2DF), which can achieve denoising by adjusting the order of training data. Extensive experimental results show that our framework consistently outperforms state-of-the-art work on three sub-tasks of MABSA.", "keywords": "aspect-based sentiment analysis", "primary_area": "", "supplementary_material": "", "author": "Fei Zhao;Chunhui Li;Zhen Wu;Yawen Ouyang;Jianbing Zhang;Xinyu Dai", "authorids": "~Fei_Zhao5;~Chunhui_Li2;~Zhen_Wu2;~Yawen_Ouyang1;~Jianbing_Zhang1;~Xinyu_Dai1", "gender": ";M;M;M;M;M", "homepage": ";https://chunhui99.github.io/;https://wuzhen247.github.io/;https://yawenouyang.github.io/about/;https://cs.nju.edu.cn/zhangjb/;http://cs.nju.edu.cn/daixinyu", "dblp": ";;16/4485-2;;11/6084;39/5815", "google_scholar": ";;IoGlgtoAAAAJ;;;https://scholar.google.com/citations?hl=en", "or_profile": "~Fei_Zhao5;~Chunhui_Li2;~Zhen_Wu2;~Yawen_Ouyang1;~Jianbing_Zhang1;~Xinyu_Dai1", "aff": ";Nanjing University;Nanjing University;Nanjing University;Nanjing University;Nanjing University", "aff_domain": ";nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": ";MS student;Researcher;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhao2023mdf,\ntitle={M2{DF}: Multi-grained Multi-curriculum Denoising Framework for Multimodal Aspect-based Sentiment Analysis},\nauthor={Fei Zhao and Chunhui Li and Zhen Wu and Yawen Ouyang and Jianbing Zhang and Xinyu Dai},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=RgA1tcrxan}\n}", "github": "", "project": "", "reviewers": "Szeq;sBBK;ANSt", "site": "https://openreview.net/forum?id=RgA1tcrxan", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;3", "excitement": "3;3;4", "reproducibility": "4;3;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-7678-103X;;;", "linkedin": ";;;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "RkqyZj5QNN", "title": "Text Classification via Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Despite the remarkable success of large-scale Language Models (LLMs) such as GPT-3, their performances still significantly underperform \nfine-tuned models in the task of text classification.This is due to (1) the lack of reasoning ability in addressing complex linguistic phenomena (e.g., intensification, contrast, irony etc); (2) limited number of tokens allowed in in-context learning. \n\nIn this paper, we introduce \\textbf{C}lue \\textbf{A}nd \\textbf{R}easoning \\textbf{P}rompting (CARP). CARP adopts a progressive reasoning strategy tailored to addressing the complex linguistic phenomena involved in text classification: CARP first prompts LLMs to find superficial clues (e.g., keywords, tones, semantic relations, references, etc), based on which a diagnostic reasoning process is induced for final decisions. To further address the limited-token issue, CARP uses a fine-tuned model on the supervised dataset for $k$NN demonstration search in the in-context learning, allowing the model to take the advantage of both LLM's generalization ability and the task-specific evidence provided by the full labeled dataset. Remarkably, CARP yields new SOTA performances on 4 out of 5 widely-used text-classification benchmarks, 97.39 (+1.24) on SST-2, 96.40 (+0.72) on AGNews, 98.78 (+0.25) on R8 and 96.95 (+0.6) on R52, and a performance comparable to SOTA on MR (92.39 v.s. 93.3). More importantly, we find that CARP delivers impressive abilities on low-resource and domain-adaptation setups. Specifically, using 16 examples per class, CARP achieves comparable performances to supervised models with 1,024 examples per class.", "keywords": "Large Language Model;Text Classification;Intermediate Rationale Explanations", "primary_area": "", "supplementary_material": "", "author": "Xiaofei Sun;Xiaoya Li;Jiwei Li;Fei Wu;Shangwei Guo;Tianwei Zhang;Guoyin Wang", "authorids": "~Xiaofei_Sun3;~Xiaoya_Li1;~Jiwei_Li1;~Fei_Wu1;~Shangwei_Guo1;~Tianwei_Zhang1;~Guoyin_Wang1", "gender": "M;F;M;M;M;M;M", "homepage": ";;https://nlp.stanford.edu/~bdlijiwei/;https://person.zju.edu.cn/wufei;http://www.cs.cqu.edu.cn/info/1332/5290.htm;https://personal.ntu.edu.sg/tianwei.zhang/index.html;", "dblp": "87/7297-1.html;77/5121-1;73/5746-1;84/3254-1;176/6479;77/7902-4;05/3838-2", "google_scholar": "hIokU_IAAAAJ;QMuveu8AAAAJ;PwU16JEAAAAJ;XJLn4MYAAAAJ;wQrVkBYAAAAJ;9vpiYDIAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Xiaofei_Sun3;~Xiaoya_Li1;~Jiwei_Li1;~Fei_Wu1;~Shangwei_Guo1;~Tianwei_Zhang1;~Guoyin_Wang1", "aff": "Zhejiang University;Shannon.AI;Zhejiang University;Zhejiang University;Chongqing University;Nanyang Technological University;Amazon", "aff_domain": "zju.edu.cn;shannonai.com;zju.edu.cn;zju.edu.cn;cqu.edu.cn;ntu.edu.sg;amazon.com", "position": "PhD student;Researcher;Assistant Professor;Full Professor;Associate Professor;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nsun2023text,\ntitle={Text Classification via Large Language Models},\nauthor={Xiaofei Sun and Xiaoya Li and Jiwei Li and Fei Wu and Shangwei Guo and Tianwei Zhang and Guoyin Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=RkqyZj5QNN}\n}", "github": "", "project": "", "reviewers": "jdhr;9Y4M;uxYB", "site": "https://openreview.net/forum?id=RkqyZj5QNN", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "4;3;2", "reproducibility": "4;4;4", "correctness": "4;4;2", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;", "linkedin": ";;;;;;", "aff_unique_index": "0;1;0;0;2;3;4", "aff_unique_norm": "Zhejiang University;Shannon.AI;Chongqing University;Nanyang Technological University;Amazon", "aff_unique_dep": ";;;;Amazon.com, Inc.", "aff_unique_url": "https://www.zju.edu.cn;https://www.shannon.ai;https://www.cqu.edu.cn;https://www.ntu.edu.sg;https://www.amazon.com", "aff_unique_abbr": "ZJU;Shannon.AI;CQU;NTU;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;2;1", "aff_country_unique": "China;United States;Singapore" }, { "id": "RlPI6mERbr", "title": "Pre-trained Speech Processing Models Contain Human-Like Biases that Propagate to Speech Emotion Recognition", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Previous work has established that a person's demographics and speech style affect how well speech processing models perform for them. But where does this bias come from? \nIn this work, we present the Speech Embedding Association Test (SpEAT), a method for detecting bias in one type of model used for many speech tasks: pre-trained models. The SpEAT is inspired by word embedding association tests in natural language processing, which quantify intrinsic bias in a model's representations of different concepts, such as race or valence\u2014something's pleasantness or unpleasantness\u2014and capture the extent to which a model trained on large-scale socio-cultural data has learned human-like biases. Using the SpEAT, we test for six types of bias in 16 English speech models (including 4 models also trained on multilingual data), which come from the wav2vec 2.0, HuBERT, WavLM, and Whisper model families. We find that 14 or more models reveal positive valence (pleasantness) associations with abled people over disabled people, with European-Americans over African-Americans, with females over males, with U.S. accented speakers over non-U.S. accented speakers, and with younger people over older people. Beyond establishing that pre-trained speech models contain these biases, we also show that they can have real world effects. We compare biases found in pre-trained models to biases in downstream models adapted to the task of Speech Emotion Recognition (SER) and find that in 66 of the 96 tests performed (69\\%), the group that is more associated with positive valence as indicated by the SpEAT also tends to be predicted as speaking with higher valence by the downstream model. Our work provides evidence that, like text and image-based models, pre-trained speech based-models frequently learn human-like biases when trained on large-scale socio-cultural datasets. Our work also shows that bias found in pre-trained models can propagate to the downstream task of SER.", "keywords": "AI bias;speech processing;embeddings;representation learning;bias propagation", "primary_area": "", "supplementary_material": "", "author": "Isaac Slaughter;Craig Greenberg;Reva Schwartz;Aylin Caliskan", "authorids": "~Isaac_Slaughter1;~Craig_Greenberg1;~Reva_Schwartz1;~Aylin_Caliskan1", "gender": ";M;F;Unspecified", "homepage": ";https://ciir.cs.umass.edu/graduate_students;https://www.nist.gov/people/reva-schwartz;https://faculty.washington.edu/aylin/", "dblp": ";94/9230;;116/4680", "google_scholar": ";XXPetHMAAAAJ;;zxzZAi0AAAAJ", "or_profile": "~Isaac_Slaughter1;~Craig_Greenberg1;~Reva_Schwartz1;~Aylin_Caliskan1", "aff": ";National Institute of Standards and Technology;NIST;University of Washington", "aff_domain": ";nist.gov;nist.gov;uw.edu", "position": ";Mathematician;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nslaughter2023pretrained,\ntitle={Pre-trained Speech Processing Models Contain Human-Like Biases that Propagate to Speech Emotion Recognition},\nauthor={Isaac Slaughter and Craig Greenberg and Reva Schwartz and Aylin Caliskan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=RlPI6mERbr}\n}", "github": "", "project": "", "reviewers": "PR28;FQKD;QLo4;b7KZ", "site": "https://openreview.net/forum?id=RlPI6mERbr", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;2;3;3", "excitement": "4;4;4;4", "reproducibility": "4;4;3;3", "correctness": "4;4;3;3", "rating_avg": 4.0, "confidence_avg": 2.75, "excitement_avg": 4.0, "reproducibility_avg": 3.5, "correctness_avg": 3.5, "replies_avg": 12, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-9012-6306;", "linkedin": ";;reva-schwartz/;", "aff_unique_index": "0;0;1", "aff_unique_norm": "National Institute of Standards and Technology;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "https://www.nist.gov;https://www.washington.edu", "aff_unique_abbr": "NIST;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "Rn1k3Na4Cn", "title": "CLAD-ST: Contrastive Learning with Adversarial Data for Robust Speech Translation", "track": "main", "status": "Short Main", "tldr": "", "abstract": "The cascaded approach continues to be the most popular choice for speech translation (ST). This approach consists of an automatic speech recognition (ASR) model and a machine translation (MT) model that are used in a pipeline to translate speech in one language to text in another language. MT models are often trained on the well-formed text and therefore lack robustness while translating noisy ASR outputs in the cascaded approach, degrading the overall translation quality significantly. We address this robustness problem in downstream MT models by forcing the MT encoder to bring the representations of a noisy input closer to its clean version in the semantic space. This is achieved by introducing a contrastive learning method that leverages adversarial examples in the form of ASR outputs paired with their corresponding human transcripts to optimize the network parameters. In addition, a curriculum learning strategy is then used to stabilize the training by alternating the standard MT log-likelihood loss and the contrastive losses. Our approach achieves significant gains of up to 3 BLEU scores in English-German and English-French speech translation without hurting the translation quality on clean text.", "keywords": "robust speech translation;contrastive learning", "primary_area": "", "supplementary_material": "", "author": "Sathish Reddy Indurthi;Shamil Chollampatt;Ravi Agrawal;Marco Turchi", "authorids": "~Sathish_Reddy_Indurthi2;~Shamil_Chollampatt1;~Ravi_Agrawal1;~Marco_Turchi2", "gender": "M;M;M;M", "homepage": ";https://shamil.github.io;;http://marcoturchi.com", "dblp": "223/2379;182/2351;;96/4886", "google_scholar": "xZrGdhgAAAAJ;b1B1DpYAAAAJ;OHRO1joAAAAJ;loHH3HcAAAAJ", "or_profile": "~Sathish_Reddy_Indurthi2;~Shamil_Chollampatt1;~Ravi_Agrawal1;~Marco_Turchi2", "aff": "Zoom Video Communications;Zoom Video Communications;Zoom Video Communications ;Zoom", "aff_domain": "zoom.us;zoom.us;zoom.us;zoom.us", "position": "Senior Research Scientist;Research Scientist;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nindurthi2023cladst,\ntitle={{CLAD}-{ST}: Contrastive Learning with Adversarial Data for Robust Speech Translation},\nauthor={Sathish Reddy Indurthi and Shamil Chollampatt and Ravi Agrawal and Marco Turchi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Rn1k3Na4Cn}\n}", "github": "", "project": "", "reviewers": "1msj;ZhJE;FsQ5", "site": "https://openreview.net/forum?id=Rn1k3Na4Cn", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;3;4", "reproducibility": "4;4;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-5899-4496", "linkedin": "sathishindurthi/;shamilcm/;umass-ravi/;", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Zoom Video Communications;Zoom Video Communications Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://zoom.us;https://zoom.us", "aff_unique_abbr": "Zoom;Zoom", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "RndkyLWLHc", "title": "Natural Language Annotations for Reasoning about Program Semantics", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "By grounding natural language inference in code (and vice versa), researchers aim to create programming assistants that explain their work, are \"coachable\" and can surface any gaps in their reasoning.\nCan we deduce automatically interesting properties of programs from their syntax and common-sense annotations alone, without resorting to static analysis? How much of program logic and behaviour can be captured in natural language? \nTo stimulate research in this direction and attempt to answer these questions we propose HTL, a dataset and protocol for annotating programs with natural language predicates at a finer granularity than code comments and without relying on internal compiler representations.\n\nThe dataset is available at the following address: https://doi.org/10.5281/zenodo.7893113 .", "keywords": "program understanding;natural language reasoning;dataset", "primary_area": "", "supplementary_material": "", "author": "Marco Zocca", "authorids": "~Marco_Zocca2", "gender": "", "homepage": "https://unfoldml.com", "dblp": "245/6118", "google_scholar": "", "or_profile": "~Marco_Zocca2", "aff": "UnfoldML", "aff_domain": "unfoldml.com", "position": "Founder", "bibtex": "@inproceedings{\nzocca2023natural,\ntitle={Natural Language Annotations for Reasoning about Program Semantics},\nauthor={Marco Zocca},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=RndkyLWLHc}\n}", "github": "", "project": "", "reviewers": "Q8NV;9DAE;nCxi", "site": "https://openreview.net/forum?id=RndkyLWLHc", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;3;3", "excitement": "2;3;3", "reproducibility": "0;5;0", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 1.6666666666666667, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 1, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "", "linkedin": "marcozocca/", "aff_unique_index": "0", "aff_unique_norm": "UnfoldML", "aff_unique_dep": "", "aff_unique_url": "", "aff_unique_abbr": "" }, { "id": "Ro3x3mCAkD", "title": "Connecting the Dots: What Graph-Based Text Representations Work Best for Text Classification using Graph Neural Networks?", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Given the success of Graph Neural Networks (GNNs) for structure-aware machine learning, many studies have explored their use for text classification, but mostly in specific domains with limited data characteristics. Moreover, some strategies prior to GNNs relied on graph mining and classical machine learning, making it difficult to assess their effectiveness in modern settings.\nThis work extensively investigates graph representation methods for text classification, identifying practical implications and open challenges.\nWe compare different graph construction schemes using a variety of GNN architectures and setups across five datasets, encompassing short and long documents as well as unbalanced scenarios in diverse domains. \nTwo Transformer-based large language models are also included to complement the study. \nThe results show that i) although the effectiveness of graphs depends on the textual input features and domain, simple graph constructions perform better the longer the documents are, ii) graph representations are especially beneficial for longer documents, outperforming Transformer-based models, iii) graph methods are particularly efficient for solving the task.", "keywords": "Graph-Based Text Representation;Graph Neural Networks;Text Classification", "primary_area": "", "supplementary_material": "", "author": "Margarita Bugue\u00f1o;Gerard de Melo", "authorids": "~Margarita_Bugue\u00f1o1;~Gerard_de_Melo3", "gender": "F;M", "homepage": "https://hpi.de/en/research-schools/hpi-dse/mitglieder/research-pages/margarita-bugueno.html;http://gerard.demelo.org/", "dblp": ";86/1747", "google_scholar": "7mD2fyMAAAAJ;https://scholar.google.com.tw/citations?user=WCQXaGkAAAAJ", "or_profile": "~Margarita_Bugue\u00f1o1;~Gerard_Melo1", "aff": "Hasso Plattner Institute;University of Potsdam", "aff_domain": "hpi.de;uni-potsdam.de", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nbugue{\\~n}o2023connecting,\ntitle={Connecting the Dots: What Graph-Based Text Representations Work Best for Text Classification using Graph Neural Networks?},\nauthor={Margarita Bugue{\\~n}o and Gerard de Melo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Ro3x3mCAkD}\n}", "github": "", "project": "", "reviewers": "LsjL;KR2u;bG3X", "site": "https://openreview.net/forum?id=Ro3x3mCAkD", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "4;3;4", "reproducibility": "4;3;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-3703-6387;0000-0002-2930-2059", "linkedin": "margarita-bugueno/;gdemelo/", "aff_unique_index": "0;1", "aff_unique_norm": "Hasso Plattner Institute;University of Potsdam", "aff_unique_dep": ";", "aff_unique_url": "https://www.hpi.de;https://www.uni-potsdam.de", "aff_unique_abbr": "HPI;UP", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "Ror9xJhbdc", "title": "Revisiting Instruction Fine-tuned Model Evaluation to Guide Industrial Applications", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Instruction Fine-Tuning (IFT) is a powerful paradigm that strengthens the zero-shot capabilities of Large Language Models (LLMs), but in doing so induces new evaluation metric requirements. We show LLM-based metrics to be well adapted to these requirements, and leverage them to conduct an investigation of task-specialization strategies, quantifying the trade-offs that emerge in practical industrial settings. Our findings offer practitioners actionable insights for real-world IFT model deployment.", "keywords": "Instruction Finetuning;Evaluation Metrics;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "Manuel Faysse;Gautier Viaud;CELINE HUDELOT;Pierre Colombo", "authorids": "~Manuel_Faysse1;~Gautier_Viaud1;~CELINE_HUDELOT1;~Pierre_Colombo2", "gender": "M;M;F;M", "homepage": "https://manuelfay.github.io/;https://www.illuin.tech/;http://perso.ecp.fr/~hudelotc/;https://pierrecolombo.github.io/", "dblp": "359/3589;149/2249;https://dblp.uni-trier.de/pers/hd/h/Hudelot:C=eacute=line;", "google_scholar": "ew4xsR4AAAAJ;;https://scholar.google.fr/citations?user=gFlAh6MAAAAJ;yPoMt8gAAAAJ", "or_profile": "~Manuel_Faysse1;~Gautier_Viaud1;~CELINE_HUDELOT1;~Pierre_Colombo2", "aff": "CentraleSupelec;ILLUIN Technology;CentraleSupelec;CentraleSupelec", "aff_domain": "centralesupelec.fr;illuin.tech;centralesupelec.fr;centralesupelec.fr", "position": "PhD student;Principal Researcher;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nfaysse2023revisiting,\ntitle={Revisiting Instruction Fine-tuned Model Evaluation to Guide Industrial Applications},\nauthor={Manuel Faysse and Gautier Viaud and CELINE HUDELOT and Pierre Colombo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Ror9xJhbdc}\n}", "github": "", "project": "", "reviewers": "52Xn;NrxC;9tn7", "site": "https://openreview.net/forum?id=Ror9xJhbdc", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;2;4", "excitement": "4;4;3", "reproducibility": "4;4;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-3849-4133;", "linkedin": "manuel-faysse/;gautier-viaud/;;", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "CentraleSup\u00e9lec;Illuin Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.centralesupelec.fr;", "aff_unique_abbr": "CS;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France;" }, { "id": "RsK483IRuO", "title": "A Closer Look into Using Large Language Models for Automatic Evaluation", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Using large language models (LLMs) to evaluate text quality has recently gained popularity.\nSome existing prior works explore the idea of using LLMs for evaluation, while they differ in some details of the evaluation process.\nIn this paper, we analyze *LLM evaluation* and *G-Eval*, and we discuss how those details in the evaluation process change how well the ratings given by LLMs correlate with human ratings.\nWe find that the auto Chain-of-Thought (CoT) used in G-Eval does not always make G-Eval more aligned with human ratings.\nWe also show that forcing the LLM to output only a numeric rating, as in G-Eval, is suboptimal.\nLast, we reveal that asking the LLM to explain its own ratings consistently improves the correlation between the ChatGPT and human ratings and pushes state-of-the-art (SoTA) correlations on two meta-evaluation datasets.", "keywords": "LLM;automatic evaluation;LLM evaluaiton", "primary_area": "", "supplementary_material": "", "author": "Cheng-Han Chiang;Hung-yi Lee", "authorids": "~Cheng-Han_Chiang1;~Hung-yi_Lee2", "gender": ";Non-Binary", "homepage": "https://github.com/d223302;https://speech.ee.ntu.edu.tw/~hylee/index.html", "dblp": "276/0431;81/8056", "google_scholar": "https://scholar.google.com.tw/citations?user=_DYQvPYAAAAJ;DxLO11IAAAAJ", "or_profile": "~Cheng-Han_Chiang1;~Hung-yi_Lee2", "aff": "National Taiwan University;National Taiwan University", "aff_domain": "ntu.edu.tw;ntu.edu.tw", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nchiang2023a,\ntitle={A Closer Look into Using Large Language Models for Automatic Evaluation},\nauthor={Cheng-Han Chiang and Hung-yi Lee},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=RsK483IRuO}\n}", "github": "", "project": "", "reviewers": "f4MA;Efov;fsws", "site": "https://openreview.net/forum?id=RsK483IRuO", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;5;3", "excitement": "4;3;3", "reproducibility": "4;5;4", "correctness": "3;3;3", "rating_avg": 2.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "National Taiwan University", "aff_unique_dep": "", "aff_unique_url": "https://www.ntu.edu.tw", "aff_unique_abbr": "NTU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "RubWYFBZbG", "title": "Fair Without Leveling Down: A New Intersectional Fairness Definition", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In this work, we consider the problem of intersectional group fairness in the classification setting, where the objective is to learn discrimination-free models in the presence of several intersecting sensitive groups.\nFirst, we illustrate various shortcomings of existing fairness measures commonly used to capture intersectional fairness. \nThen, we propose a new definition called the $\\alpha$-Intersectional Fairness, which combines the absolute and the relative performance across sensitive groups and can be seen as a generalization of the notion of differential fairness. \nWe highlight several desirable properties of the proposed definition and analyze its relation to other fairness measures.\nFinally, we benchmark multiple popular in-processing fair machine learning approaches using our new fairness definition and show that they do not achieve any improvement over a simple baseline. \nOur results reveal that the increase in fairness measured by previous definitions hides a ``leveling down'' effect, i.e., degrading the best performance over groups rather than improving the worst one.", "keywords": "Fairness; Intersectional; Leveling Down", "primary_area": "", "supplementary_material": "", "author": "Gaurav Maheshwari;Aur\u00e9lien Bellet;Pascal Denis;Mikaela Keller", "authorids": "~Gaurav_Maheshwari1;~Aur\u00e9lien_Bellet1;~Pascal_Denis1;~Mikaela_Keller1", "gender": "M;;M;F", "homepage": "https://gauravm.gitbook.io/about/;http://researchers.lille.inria.fr/abellet/;http://researchers.lille.inria.fr/~pdenis/;https://www.cristal.univ-lille.fr/profil/kellerm/", "dblp": "67/10152-1.html;61/8017;18/4078;14/4746", "google_scholar": "4dlGzdcAAAAJ;https://scholar.google.fr/citations?user=j8svx3IAAAAJ;Y1nQ6eUAAAAJ;", "or_profile": "~Gaurav_Maheshwari1;~Aur\u00e9lien_Bellet1;~Pascal_Denis1;~Mikaela_Keller1", "aff": "INRIA;INRIA;INRIA;Universit\u00e9 de Lille", "aff_domain": "inria.fr;inria.fr;inria.fr;univ-lille.fr", "position": "PhD student;Tenured researcher;Researcher;Associate Professor", "bibtex": "@inproceedings{\nmaheshwari2023fair,\ntitle={Fair Without Leveling Down: A New Intersectional Fairness Definition},\nauthor={Gaurav Maheshwari and Aur{\\'e}lien Bellet and Pascal Denis and Mikaela Keller},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=RubWYFBZbG}\n}", "github": "", "project": "", "reviewers": "w78s;Msnu;JvEV", "site": "https://openreview.net/forum?id=RubWYFBZbG", "pdf_size": 0, "rating": "5;5;5", "confidence": "2;3;3", "excitement": "3;3;3", "reproducibility": "4;5;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-3440-1251;0000-0003-4121-6337;", "linkedin": ";;;", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "INRIA;Universit\u00e9 de Lille", "aff_unique_dep": ";", "aff_unique_url": "https://www.inria.fr;https://www.univ-lille.fr", "aff_unique_abbr": "INRIA;UdeL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "id": "Rvz7LvHcdX", "title": "Type-Aware Decomposed Framework for Few-Shot Named Entity Recognition", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Despite the recent success achieved by several two-stage prototypical networks in few-shot named entity recognition (NER) task, the over-detected false spans at span detection stage and the inaccurate and unstable prototypes at type classification stage remain to be challenging problems.\nIn this paper, we propose a novel Type-Aware Decomposed framework, namely TadNER, to solve these problems.\nWe first present a type-aware span filtering strategy to filter out false spans by removing those semantically far away from type names. We then present a type-aware contrastive learning strategy to construct more accurate and stable prototypes by jointly exploiting support samples and type names as references.\nExtensive experiments on various benchmarks prove that our proposed TadNER framework yields a new state-of-the-art performance.", "keywords": "Named Entity Recognition;Few-Shot Learning", "primary_area": "", "supplementary_material": "", "author": "Yongqi Li;Yu Yu;Tieyun Qian", "authorids": "~Yongqi_Li3;~Yu_Yu4;~Tieyun_Qian1", "gender": "M;F;", "homepage": "https://liyongqi2002.github.io/;https://github.com/YuYuSG;", "dblp": "249/4156-2;;17/5583", "google_scholar": "2R_eMkkAAAAJ;;MYTt4EwAAAAJ", "or_profile": "~Yongqi_Li3;~Yu_Yu4;~Tieyun_Qian1", "aff": "Wuhan University;Wuhan University;Wuhan University", "aff_domain": "whu.edu.cn;whu.edu.cn;whu.edu.cn", "position": "Undergrad student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nli2023typeaware,\ntitle={Type-Aware Decomposed Framework for Few-Shot Named Entity Recognition},\nauthor={Yongqi Li and Yu Yu and Tieyun Qian},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Rvz7LvHcdX}\n}", "github": "", "project": "", "reviewers": "NUFY;RZMJ;WiN1", "site": "https://openreview.net/forum?id=Rvz7LvHcdX", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;5", "excitement": "4;3;3", "reproducibility": "5;4;2", "correctness": "4;4;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-4667-5794", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Wuhan University", "aff_unique_dep": "", "aff_unique_url": "http://www.whu.edu.cn/", "aff_unique_abbr": "WHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "RwzFNbJ3Ez", "title": "SelfCheckGPT: Zero-Resource Black-Box Hallucination Detection for Generative Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Generative Large Language Models (LLMs) such as GPT-3 are capable of generating highly fluent responses to a wide variety of user prompts. However, LLMs are known to hallucinate facts and make non-factual statements which can undermine trust in their output. Existing fact-checking approaches either require access to the output probability distribution (which may not be available for systems such as ChatGPT) or external databases that are interfaced via separate, often complex, modules. In this work, we propose \"SelfCheckGPT\", a simple sampling-based approach that can be used to fact-check the responses of black-box models in a zero-resource fashion, i.e. without an external database. SelfCheckGPT leverages the simple idea that if an LLM has knowledge of a given concept, sampled responses are likely to be similar and contain consistent facts. However, for hallucinated facts, stochastically sampled responses are likely to diverge and contradict one another. We investigate this approach by using GPT-3 to generate passages about individuals from the WikiBio dataset, and manually annotate the factuality of the generated passages. We demonstrate that SelfCheckGPT can: i) detect non-factual and factual sentences; and ii) rank passages in terms of factuality. We compare our approach to several baselines and show that our approach has considerably higher AUC-PR scores in sentence-level hallucination detection and higher correlation scores in passage-level factuality assessment compared to grey-box methods.", "keywords": "generative-AI hallucination;fact-checking;trustworthy artificial intelligence", "primary_area": "", "supplementary_material": "", "author": "Potsawee Manakul;Adian Liusie;Mark Gales", "authorids": "~Potsawee_Manakul1;~Adian_Liusie1;~Mark_Gales1", "gender": "M;M;M", "homepage": "https://potsawee.github.io/;;http://mi.eng.cam.ac.uk/~mjfg/index.html", "dblp": "243/6654;333/0793;74/4419.html", "google_scholar": "https://scholar.google.com/citations?hl=en;dYtKMOgAAAAJ;https://scholar.google.co.uk/citations?hl=en", "or_profile": "~Potsawee_Manakul1;~Adian_Liusie1;~Mark_Gales1", "aff": "University of Cambridge;University of Cambridge;University of Cambridge", "aff_domain": "cam.ac.uk;cam.ac.uk;cam.ac.uk", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nmanakul2023selfcheckgpt,\ntitle={SelfCheck{GPT}: Zero-Resource Black-Box Hallucination Detection for Generative Large Language Models},\nauthor={Potsawee Manakul and Adian Liusie and Mark Gales},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=RwzFNbJ3Ez}\n}", "github": "", "project": "", "reviewers": "XVG4;JhFY;91B1;3S1N", "site": "https://openreview.net/forum?id=RwzFNbJ3Ez", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "4;4;2;5", "excitement": "4;4;3;4", "reproducibility": "4;5;4;5", "correctness": "4;3;3;4", "rating_avg": 5.0, "confidence_avg": 3.75, "excitement_avg": 3.75, "reproducibility_avg": 4.5, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";adian-liusie-00b60511a/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Cambridge", "aff_unique_dep": "", "aff_unique_url": "https://www.cam.ac.uk", "aff_unique_abbr": "Cambridge", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "RxvMKDgZH6", "title": "Accelerating Multiple Intent Detection and Slot Filling via Targeted Knowledge Distillation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recent non-autoregressive Spoken Language Understanding (SLU) models attracts increasing attention owing to the high inference speed. However, most of them still (1) suffer from the multi-modality problem since the prior knowledge about the reference is relatively poor during inference; (2) fail to achieve a satisfactory inference speed limited by their complex frameworks. To tackle these problems, in this paper, we propose a $\\textbf{T}$argeted $\\textbf{K}$nowledge $\\textbf{D}$istillation $\\textbf{F}$ramework (TKDF), which applies knowledge distillation to improve the performance. Specifically, we first train an SLU model as a teacher model, which has higher accuracy while slower inference speed. Then we introduce an evaluator and utilize the curriculum learning strategy to select proper targets for the student model. Experiment results on two public multi-intent SLU datasets demonstrate that our method can realize a flexible trade-off between inference speed and accuracy, achieving comparable performance to the state-of-the-art models while speeding up by over 4.5 times.", "keywords": "Multiple Intent Detection and Slot Filling;Knowledge Distillation;Non-Autoregressive", "primary_area": "", "supplementary_material": "", "author": "Xuxin Cheng;Zhihong Zhu;Wanshi Xu;Yaowei Li;Hongxiang Li;Yuexian Zou", "authorids": "~Xuxin_Cheng3;~Zhihong_Zhu1;~Wanshi_Xu1;~Yaowei_Li2;~Hongxiang_Li3;~Yuexian_Zou2", "gender": ";;;;;", "homepage": ";;http://wanshi.com;;;", "dblp": ";;;;;", "google_scholar": ";;;;;", "or_profile": "~Xuxin_Cheng3;~Zhihong_Zhu1;~Wanshi_Xu1;~Yaowei_Li2;~Hongxiang_Li3;~Yuexian_Zou2", "aff": ";;Peking University;;;", "aff_domain": ";;pku.edu.cn;;;", "position": ";;MS student;;;", "bibtex": "@inproceedings{\ncheng2023accelerating,\ntitle={Accelerating Multiple Intent Detection and Slot Filling via Targeted Knowledge Distillation},\nauthor={Xuxin Cheng and Zhihong Zhu and Wanshi Xu and Yaowei Li and Hongxiang Li and Yuexian Zou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=RxvMKDgZH6}\n}", "github": "", "project": "", "reviewers": "3Dd1;y4ok;bSiD", "site": "https://openreview.net/forum?id=RxvMKDgZH6", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;5", "excitement": "3;3;3", "reproducibility": "3;3;4", "correctness": "2;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";;;;;", "aff_unique_index": "0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "Rz5eVgy8Sd", "title": "An Intent-based and Annotation-free Method for Duplicate Question Detection in CQA Forums", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "With the advent of large language models (LLMs), Community Question Answering (CQA) forums offer well-curated questions and answers that can be utilized for instruction-tuning, effectively training LLMs to be aligned with human intents. However, the issue of duplicate questions arises as the volume of content within CQA continues to grow, posing a threat to content quality. Recent research highlights the benefits of detecting and eliminating duplicate content. It not only enhances the LLMs' ability to generalize across diverse intents but also improves the efficiency of training data utilization while addressing concerns related to information leakage. However, existing methods for detecting duplicate questions in CQA typically rely on generic text-pair matching models, overlooking the intent behind the questions. In this paper, we propose a novel intent-based duplication detector named Intent-DQD that comprehensively leverages intent information to address the problem of duplicate question detection in CQA. Intent-DQD first leverages the characteristics in CQA forums and extracts training labels to recognize and match intents without human annotation. Intent-DQD then effectively aggregates intent-level relations and establishes question-level relations to enable intent-aware duplication detection. Experimental results on fifteen distinct domains from both CQADupStack and Stack Overflow datasets demonstrate the effectiveness of Intent-DQD. Reproducible codes and datasets will be released upon publication of the paper.", "keywords": "Sentence-level Semantics;Textual Inference;Data deduplicating;Instruct-tuning", "primary_area": "", "supplementary_material": "", "author": "Yubo Shu;Hansu Gu;Peng Zhang;Tun Lu;Ning Gu", "authorids": "~Yubo_Shu1;~Hansu_Gu1;~Peng_Zhang31;~Tun_Lu1;~Ning_Gu2", "gender": ";;M;M;M", "homepage": "https://github.com/BruceStayHungry;;https://cscw.fudan.edu.cn/pengzhang/list.htm;;https://cscw.fudan.edu.cn/", "dblp": ";00/7447;;41/2472;", "google_scholar": ";;;;https://scholar.google.com.au/citations?user=AUnPpaUAAAAJ", "or_profile": "~Yubo_Shu1;~Hansu_Gu1;~Peng_Zhang31;~Tun_Lu1;~Ning_Gu2", "aff": "Fudan University;Amazon;;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;amazon.com;;fudan.edu.cn;fudan.edu.cn", "position": "PhD student;Researcher;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nshu2023an,\ntitle={An Intent-based and Annotation-free Method for Duplicate Question Detection in {CQA} Forums},\nauthor={Yubo Shu and Hansu Gu and Peng Zhang and Tun Lu and Ning Gu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Rz5eVgy8Sd}\n}", "github": "", "project": "", "reviewers": "xaBD;118e;Q6eK", "site": "https://openreview.net/forum?id=Rz5eVgy8Sd", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;5;4", "excitement": "3;3;4", "reproducibility": "4;5;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 4.666666666666667, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-6633-4826;0000-0002-2915-974X", "linkedin": ";;;;", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Fudan University;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.fudan.edu.cn;https://www.amazon.com", "aff_unique_abbr": "Fudan;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;United States" }, { "id": "RzWrY4KYg8", "title": "Uncovering Limitations in Text-to-Image Generation: A Contrastive Approach with Structured Semantic Alignment", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Despite significant advancements in text-to-image generation models, they still face challenges when it comes to producing highly detailed or complex images based on textual descriptions. In order to explore these limitations, we propose a Structured Semantic Alignment (SSA) method for evaluating text-to-image generation models. SSA focuses on learning structured semantic embeddings across different modalities and aligning them in a joint space. The method employs the following steps to achieve its objective: (i) Generating mutated prompts by substituting words with semantically equivalent or nonequivalent alternatives while preserving the original syntax; (ii) Representing the sentence structure through parsing trees obtained via syntax parsing; (iii) Learning fine-grained structured embeddings that project semantic features from different modalities into a shared embedding space; (iv) Evaluating the semantic consistency between the structured text embeddings and the corresponding visual embeddings. Through experiments conducted on various benchmarks, we have demonstrated that SSA offers improved measurement of semantic consistency of text-to-image generation models. Additionally, it unveils a wide range of generation errors including under-generation, incorrect constituency, incorrect dependency, and semantic confusion. By uncovering these biases and limitations embedded within the models, our proposed method provides valuable insights into their shortcomings when applied to real-world scenarios.", "keywords": "Multi-modal generation;Semantic consistency;Structure information learning", "primary_area": "", "supplementary_material": "", "author": "Qianyu Feng;Yulei Sui;Hongyu Zhang", "authorids": "~Qianyu_Feng1;~Yulei_Sui1;~Hongyu_Zhang1", "gender": "F;M;M", "homepage": ";http://yuleisui.github.io;https://sites.google.com/site/hongyujohn", "dblp": "246/4698;58/10567.html;29/2726-2", "google_scholar": "https://scholar.google.com.au/citations?user=d0EHVf0AAAAJ;https://scholar.google.com.au/citations?user=wGHqq1cAAAAJ;https://scholar.google.com.au/citations?user=zsUN6PkAAAAJ", "or_profile": "~Qianyu_Feng1;~Yulei_Sui1;~Hongyu_Zhang1", "aff": "University of Newcastle;University of New South Wales;University of Newcastle, Australia", "aff_domain": "newcastle.edu.au;unsw.edu.au;newcastle.edu.au", "position": "Postdoc;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nfeng2023uncovering,\ntitle={Uncovering Limitations in Text-to-Image Generation: A Contrastive Approach with Structured Semantic Alignment},\nauthor={Qianyu Feng and Yulei Sui and Hongyu Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=RzWrY4KYg8}\n}", "github": "", "project": "", "reviewers": "4Ntp;L4hu;kaoX", "site": "https://openreview.net/forum?id=RzWrY4KYg8", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;4", "excitement": "4;3;4", "reproducibility": "3;4;3", "correctness": "3;2;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-1014-6081;;0000-0002-3063-9425", "linkedin": ";;", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Newcastle;University of New South Wales", "aff_unique_dep": ";", "aff_unique_url": "https://www.newcastle.edu.au;https://www.unsw.edu.au", "aff_unique_abbr": "UON;UNSW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "id": "S0eqbM16k2", "title": "Instances and Labels: Hierarchy-aware Joint Supervised Contrastive Learning for Hierarchical Multi-Label Text Classification", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Hierarchical multi-label text classification (HMTC) aims at utilizing a label hierarchy in multi-label classification. Recent approaches to HMTC deal with the problem of imposing an overconstrained premise on the output space by using contrastive learning on generated samples in a semi-supervised manner to bring text and label embeddings closer. However, the generation of samples tends to introduce noise as it ignores the correlation between similar samples in the same batch. One solution to this issue is supervised contrastive learning, but it remains an underexplored topic in HMTC due to its complex structured labels. To overcome this challenge, we propose **HJCL**, a **H**ierarchy-aware **J**oint Supervised **C**ontrastive **L**earning method that bridges the gap between supervised contrastive learning and HMTC. Specifically, we employ both instance-wise and label-wise contrastive learning techniques and carefully construct batches to fulfill the contrastive learning objective. Extensive experiments on four multi-path HMTC datasets demonstrate that HJCLachieves promising results and the effectiveness of Contrastive Learning on HMTC. Code and data are available at https://github.com/simonucl/HJCL.", "keywords": "Natural language processing;Multi-label Classification;Contrastive Learning in NLP", "primary_area": "", "supplementary_material": "", "author": "Simon Chi Lok Yu;Jie He;Victor Gutierrez Basulto;Jeff Z. Pan", "authorids": "~Simon_Chi_Lok_Yu1;~Jie_He3;~Victor_Gutierrez_Basulto1;~Jeff_Z._Pan1", "gender": "M;M;;M", "homepage": "https://simonucl.github.io/;;;https://knowledge-representation.org/j.z.pan/", "dblp": ";28/4019-4;;59/6490", "google_scholar": "https://scholar.google.co.uk/citations?hl=en;VMD_HuYAAAAJ;;https://scholar.google.co.uk/citations?hl=en", "or_profile": "~Simon_Chi_Lok_Yu1;~Jie_He3;~Victor_Gutierrez_Basulto1;~Jeff_Z._Pan1", "aff": "University of Edinburgh, University of Edinburgh;University of Edinburgh;;University of Edinburgh, University of Edinburgh", "aff_domain": "ed.ac.uk;ed.ac.uk;;ed.ac.uk", "position": "Undergrad student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nyu2023instances,\ntitle={Instances and Labels: Hierarchy-aware Joint Supervised Contrastive Learning for Hierarchical Multi-Label Text Classification},\nauthor={Simon Chi Lok Yu and Jie He and Victor Gutierrez Basulto and Jeff Z. Pan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=S0eqbM16k2}\n}", "github": "", "project": "", "reviewers": "9pSu;46re;Jpbo", "site": "https://openreview.net/forum?id=S0eqbM16k2", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;2", "excitement": "3;3;2", "reproducibility": "4;4;4", "correctness": "4;4;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-9779-2088", "linkedin": ";;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Edinburgh", "aff_unique_dep": "", "aff_unique_url": "https://www.ed.ac.uk", "aff_unique_abbr": "Edinburgh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "S5eTDhfjHM", "title": "tagE: Enabling an Embodied Agent to Understand Human Instructions", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Natural language serves as the primary mode of communication when an intelligent agent with a physical presence engages with human beings. While a plethora of research focuses on natural language understanding (NLU), encompassing endeavors such as sentiment analysis, intent prediction, question answering, and summarization, the scope of NLU directed at situations necessitating tangible actions by an embodied agent remains limited. The inherent ambiguity and incompleteness inherent in natural language present challenges for intelligent agents striving to decipher human intention. To tackle this predicament head-on, we introduce a novel system known as task and argument grounding for Embodied agents (tagE). At its core, our system employs an inventive neural network model designed to extract a series of tasks from complex task instructions expressed in natural language. Our proposed model adopts an encoder-decoder framework enriched with nested decoding to effectively extract tasks and their corresponding arguments from these intricate instructions. These extracted tasks are then mapped (or grounded) to the robot's established collection of skills, while the arguments find grounding in objects present within the environment. To facilitate the training and evaluation of our system, we have curated a dataset featuring complex instructions. The results of our experiments underscore the prowess of our approach, as it outperforms robust baseline models.", "keywords": "human-robot interaction; NLP for robotics; task and argument extraction; task and argument grounding;", "primary_area": "", "supplementary_material": "", "author": "Chayan Sarkar;Avik Mitra;Pradip Pramanick;Tapas Nayak", "authorids": "~Chayan_Sarkar1;~Avik_Mitra1;~Pradip_Pramanick1;~Tapas_Nayak1", "gender": "M;M;M;M", "homepage": "https://www.chayansarkar.com/;;;", "dblp": "01/10806;;219/8193;184/8833", "google_scholar": "MMOS7xsAAAAJ;;o9PXpR0AAAAJ;4AgZ2VYAAAAJ", "or_profile": "~Chayan_Sarkar1;~Avik_Mitra1;~Pradip_Pramanick1;~Tapas_Nayak1", "aff": "Tata Consultancy Services Limited, India;Tata Consultancy Services Limited, India;Tata Consultancy Services Limited, India;Tata Consultancy Services Limited, India", "aff_domain": "tcs.com;tcs.com;tcs.com;tcs.com", "position": "Scientist;Researcher;Researcher;Scientist", "bibtex": "@inproceedings{\nsarkar2023tage,\ntitle={tagE: Enabling an Embodied Agent to Understand Human Instructions},\nauthor={Chayan Sarkar and Avik Mitra and Pradip Pramanick and Tapas Nayak},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=S5eTDhfjHM}\n}", "github": "", "project": "", "reviewers": "MBSB;Tmep;vT9w", "site": "https://openreview.net/forum?id=S5eTDhfjHM", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "3;3;3", "reproducibility": "3;3;3", "correctness": "4;3;2", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-4777-2086;;;", "linkedin": "csarkar87/;avikmitra1998;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tata Consultancy Services Limited", "aff_unique_dep": "", "aff_unique_url": "https://www.tcs.com", "aff_unique_abbr": "TCS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "India" }, { "id": "S81zso7Imh", "title": "IC3: Image Captioning by Committee Consensus", "track": "main", "status": "Long Main", "tldr": "", "abstract": "If you ask a human to describe an image, they might do so in a thousand different ways. Traditionally, image captioning models are trained to generate a single \"best\" (most like a reference) image caption. Unfortunately, doing so encourages captions that are \"informationally impoverished,\" and focus on only a subset of the possible details, while ignoring other potentially useful information in the scene. In this work, we introduce a simple, yet novel, method: \"Image Captioning by Committee Consensus\" (IC3), designed to generate a single caption that captures high-level details from several annotator viewpoints. Humans rate captions produced by IC3 at least as helpful as baseline SOTA models more than two thirds of the time, and IC3 can improve the performance of SOTA automated recall systems by up to 84%, outperforming single human-generated reference captions, and indicating significant improvements over SOTA approaches for visual description. Code is available at [https://davidmchan.github.io/caption-by-committee/](https://davidmchan.github.io/caption-by-committee/)", "keywords": "Image Captioning;Large Language Models;Prompt Engineering;Visual Description", "primary_area": "", "supplementary_material": "", "author": "David Chan;Austin Myers;Sudheendra Vijayanarasimhan;David A Ross;John Canny", "authorids": "~David_Chan3;~Austin_Myers1;~Sudheendra_Vijayanarasimhan1;~David_A_Ross1;~John_Canny1", "gender": "M;M;M;M;M", "homepage": "https://people.eecs.berkeley.edu/~davidchan/;;https://research.google.com/pubs/105363.html;http://www.cs.berkeley.edu/~jfc/;http://www.cs.toronto.edu/~dross/", "dblp": "80/9659;135/8626;;;68/2171", "google_scholar": "qa4M89wAAAAJ;Tw8DY-cAAAAJ;y5fsjDAAAAAJ;https://scholar.google.com.tw/citations?user=LAv0HTEAAAAJ;RqOzJR0AAAAJ", "or_profile": "~David_Chan3;~Austin_Myers1;~Sudheendra_Vijayanarasimhan1;~John_Canny1;~David_Alexander_Ross1", "aff": "University of California, Berkeley;Google;Research, Google;University of California, Berkeley;Research, Google", "aff_domain": "berkeley.edu;google.com;research.google.com;berkeley.edu;research.google.com", "position": "PhD student;Researcher;Researcher;Full Professor;Software Engineer", "bibtex": "@inproceedings{\nchan2023ic,\ntitle={{IC}3: Image Captioning by Committee Consensus},\nauthor={David Chan and Austin Myers and Sudheendra Vijayanarasimhan and David A Ross and John Canny},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=S81zso7Imh}\n}", "github": "", "project": "", "reviewers": "WL9Q;5Y4s;k3RY;9GXH", "site": "https://openreview.net/forum?id=S81zso7Imh", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "3;3;4;3", "excitement": "4;4;3;3", "reproducibility": "3;5;3;5", "correctness": "4;3;3;3", "rating_avg": 5.0, "confidence_avg": 3.25, "excitement_avg": 3.5, "reproducibility_avg": 4.0, "correctness_avg": 3.25, "replies_avg": 13, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;;", "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "University of California, Berkeley;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.berkeley.edu;https://www.google.com", "aff_unique_abbr": "UC Berkeley;Google", "aff_campus_unique_index": "0;1;1;0;1", "aff_campus_unique": "Berkeley;Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "SAM1HFH6iB", "title": "Self-Evolution Learning for Mixup: Enhance Data Augmentation on Few-Shot Text Classification Tasks", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Text classification tasks often encounter few-shot scenarios with limited labeled data, and addressing data scarcity is crucial. Data augmentation with mixup merges sample pairs to generate new pseudos, which can relieve the data deficiency issue in text classification. However, the quality of pseudo-samples generated by mixup exhibits significant variations. Most of the mixup methods fail to consider the varying degree of learning difficulty in different stages of training. And mixup generates new samples with one-hot labels, which encourages the model to produce a high prediction score for the correct class that is much larger than other classes, resulting in the model's over-confidence. In this paper, we propose a self-evolution learning (SE) based mixup approach for data augmentation in text classification, which can generate more adaptive and model-friendly pseudo samples for the model training. SE caters to the growth of the model learning ability and adapts to the ability when generating training samples. To alleviate the model over-confidence, we introduce an instance-specific label smoothing regularization approach, which linearly interpolates the model\u2019s output and one-hot labels of the original samples to generate new soft labels for label mixing up. Through experimental analysis, experiments show that our SE brings consistent and significant improvements upon different mixup methods. In-depth analyses demonstrate that SE enhances the model's generalization ability.", "keywords": "self-evolution learning;mixup;pretrained language model;few-shot text classification", "primary_area": "", "supplementary_material": "", "author": "Haoqi Zheng;Qihuang Zhong;Liang Ding;Zhiliang Tian;Xin Niu;Changjian Wang;Dongsheng Li;Dacheng Tao", "authorids": "~Haoqi_Zheng1;~Qihuang_Zhong1;~Liang_Ding3;~Zhiliang_Tian2;~Xin_Niu1;~Changjian_Wang1;~Dongsheng_Li3;~Dacheng_Tao1", "gender": ";M;M;M;M;M;;", "homepage": "https://zhe123tc.github.io/;https://www.qihuangzhong.top/;http://liamding.cc/;https://scholar.google.com.hk/citations?hl=en&user=ClvGvccAAAAJ#;;;;", "dblp": "348/5063;272/6439.html;88/3340-6.html;203/9265;;145/0604;;", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;lFCLvOAAAAAJ;https://scholar.google.com.hk/citations?hl=en;;;;", "or_profile": "~Haoqi_Zheng1;~Qihuang_Zhong1;~Liang_Ding3;~Zhiliang_Tian2;~Xin_Niu1;~Changjian_Wang1;~Dongsheng_Li3;~Dacheng_Tao1", "aff": "Hebei University;Wuhan University;JD Explore Academy, JD.com Inc.;National University of Defense Technology;National University of Defense Technology;National University of Defense Technology;;", "aff_domain": "hbu.edu.cn;whu.edu.cn;jd.com;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;;", "position": "Undergrad student;PhD student;Research Scientist;Assistant Professor;Associate Professor;Associate Professor;;", "bibtex": "@inproceedings{\nzheng2023selfevolution,\ntitle={Self-Evolution Learning for Mixup: Enhance Data Augmentation on Few-Shot Text Classification Tasks},\nauthor={Haoqi Zheng and Qihuang Zhong and Liang Ding and Zhiliang Tian and Xin Niu and Changjian Wang and Dongsheng Li and Dacheng Tao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=SAM1HFH6iB}\n}", "github": "", "project": "", "reviewers": "RCEN;YbFy;g5MQ;hra8", "site": "https://openreview.net/forum?id=SAM1HFH6iB", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "5;4;3;3", "excitement": "3;3;4;3", "reproducibility": "4;3;4;4", "correctness": "3;2;3;4", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 3.25, "reproducibility_avg": 3.75, "correctness_avg": 3.0, "replies_avg": 12, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-9396-7688;0000-0002-1160-0365;;", "linkedin": ";;;;;;;", "aff_unique_index": "0;1;2;3;3;3", "aff_unique_norm": "Hebei University;Wuhan University;JD.com Inc.;National University of Defense Technology", "aff_unique_dep": ";;JD Explore Academy;", "aff_unique_url": "http://www.hbu.edu.cn/;http://www.whu.edu.cn/;https://www.jd.com;http://www.nudt.edu.cn/", "aff_unique_abbr": "HBU;WHU;JD.com;NUDT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "SEFD0G4kf0", "title": "USB: A Unified Summarization Benchmark Across Tasks and Domains", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "While the NLP community has produced numerous summarization benchmarks,\nnone provide the rich annotations required \nto simultaneously address many important problems related \nto control and reliability.\nWe introduce a Wikipedia-derived benchmark,\ncomplemented by a rich set of crowd-sourced annotations,\nthat supports $8$ interrelated tasks:\n(i) extractive summarization;\n(ii) abstractive summarization;\n(iii) topic-based summarization;\n(iv) compressing selected sentences into a one-line summary;\n(v) surfacing evidence for a summary sentence;\n(vi) predicting the factual accuracy of a summary sentence;\n(vii) identifying unsubstantiated spans in a summary sentence;\n(viii) correcting factual errors in summaries.\nWe compare various methods on this benchmark and discover\nthat on multiple tasks, moderately-sized fine-tuned models \nconsistently outperform much larger few-shot prompted language models.\nFor factuality-related tasks, we also evaluate existing heuristics \nto create training data and find that training on them \nresults in worse performance than training on $20\\times$ less human-labeled data.\nOur articles draw from 6 domains,\nfacilitating cross-domain analysis.\nOn some tasks, the amount of training data \nmatters more than the domain where it comes from,\nwhile for other tasks training specifically on data from the target domain, \neven if limited, is more beneficial.", "keywords": "summarization;large language models;pretraining;benchmarks;factual correctness", "primary_area": "", "supplementary_material": "", "author": "Kundan Krishna;Prakhar Gupta;Sanjana Ramprasad;Byron C Wallace;Jeffrey P. Bigham;Zachary Chase Lipton", "authorids": "~Kundan_Krishna1;~Prakhar_Gupta1;~Sanjana_Ramprasad2;~Byron_C_Wallace1;~Jeffrey_P._Bigham1;~Zachary_Chase_Lipton1", "gender": "M;M;F;M;M;Unspecified", "homepage": "https://kkrishna.in/;https://prakharguptaz.github.io/;;http://www.byronwallace.com/;http://www.cs.cmu.edu/~jbigham/;http://zacklipton.com", "dblp": "207/7773.html;121/0747;239/6709;00/8247;83/6818;", "google_scholar": "0d59fEcAAAAJ;YuFcRF0AAAAJ;YW4uMpkAAAAJ;KTzRHmwAAAAJ;DFqp8NkAAAAJ;MN9Kfg8AAAAJ", "or_profile": "~Kundan_Krishna1;~Prakhar_Gupta1;~Sanjana_Ramprasad2;~Byron_C_Wallace1;~Jeffrey_P._Bigham1;~Zachary_Chase_Lipton1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Northeastern University ;Northeastern University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;northeastern.edu;northeastern.edu;cmu.edu;cmu.edu", "position": "PhD student;PhD student;PhD student;Associate Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nkrishna2023usb,\ntitle={{USB}: A Unified Summarization Benchmark Across Tasks and Domains},\nauthor={Kundan Krishna and Prakhar Gupta and Sanjana Ramprasad and Byron C Wallace and Jeffrey P. Bigham and Zachary Chase Lipton},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=SEFD0G4kf0}\n}", "github": "", "project": "", "reviewers": "Ur5L;T3pT;mh1S;WFLP", "site": "https://openreview.net/forum?id=SEFD0G4kf0", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;4;3", "excitement": "4;3;3;3", "reproducibility": "3;5;3;4", "correctness": "3;3;3;3", "rating_avg": 3.0, "confidence_avg": 3.75, "excitement_avg": 3.25, "reproducibility_avg": 3.75, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";prakhar-gupta-100/;;;;", "aff_unique_index": "0;0;1;1;0;0", "aff_unique_norm": "Carnegie Mellon University;Northeastern University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.northeastern.edu", "aff_unique_abbr": "CMU;NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "SFTvQQA4KJ", "title": "FLatS: Principled Out-of-Distribution Detection with Feature-Based Likelihood Ratio Score", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Detecting out-of-distribution (OOD) instances is crucial for NLP models in practical applications. Although numerous OOD detection methods exist, most of them are empirical. Backed by theoretical analysis, this paper advocates for the measurement of the \"OOD-ness\" of a test case $\\boldsymbol{x}$ through the \\emph{likelihood ratio} between out-distribution $\\mathcal P_{\\textit{out}}$ and in-distribution $\\mathcal P_{\\textit{in}}$. We argue that the state-of-the-art (SOTA) feature-based OOD detection methods, such as Maha and KNN, are suboptimal since they only estimate in-distribution density $p_{\\textit{in}}(\\boldsymbol{x})$. To address this issue, we propose \\textbf{FLATS}, a principled solution for OOD detection based on likelihood ratio. Moreover, we demonstrate that FLATS can serve as a general framework capable of enhancing other OOD detection methods by incorporating out-distribution density $p_{\\textit{out}}(\\boldsymbol{x})$ estimation. Experiments show that FLATS establishes a new SOTA on popular benchmarks.", "keywords": "OOD Detection;Likelihood Ratio;Intent Classification", "primary_area": "", "supplementary_material": "", "author": "Haowei Lin;Yuntian Gu", "authorids": "~Haowei_Lin1;~Yuntian_Gu1", "gender": "M;", "homepage": "https://linhaowei1.github.io/;https://github.com/guyuntian", "dblp": "235/2798;", "google_scholar": "Ng-DmJgAAAAJ;qdyC5XsAAAAJ", "or_profile": "~Haowei_Lin1;~Yuntian_Gu1", "aff": "Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn", "position": "Undergrad student;Undergrad student", "bibtex": "@inproceedings{\nlin2023flats,\ntitle={{FL}atS: Principled Out-of-Distribution Detection with Feature-Based Likelihood Ratio Score},\nauthor={Haowei Lin and Yuntian Gu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=SFTvQQA4KJ}\n}", "github": "", "project": "", "reviewers": "4HcV;RYgf;RnjR;UcFx", "site": "https://openreview.net/forum?id=SFTvQQA4KJ", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;3;2;3", "excitement": "4;3;4;4", "reproducibility": "4;4;3;4", "correctness": "4;3;4;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.75, "reproducibility_avg": 3.75, "correctness_avg": 3.75, "replies_avg": 13, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0006-9809-4835;", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "SHkMYY26KP", "title": "On General Language Understanding", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Natural Language Processing prides itself to be an empirically-minded, if not outright empiricist field, and yet lately it seems to get itself into essentialist debates on issues of meaning and measurement (\"Do Large Language Models Understand Language, And If So, How Much?\"). This is not by accident: Here, as everywhere, the evidence underspecifies the understanding. As a remedy, this paper sketches the outlines of a model of understanding, which can ground questions of the adequacy of current methods of measurement of model quality. The paper makes three claims: A) That different language use situation types have different characteristics, B) That language understanding is a multifaceted phenomenon, bringing together individualistic and social processes, and C) That the choice of Understanding Indicator marks the limits of benchmarking, and the beginnings of considerations of the ethics of NLP use.", "keywords": "NLU;evaluation;benchmarking;semantics;dialogue;modelling;measurement", "primary_area": "", "supplementary_material": "", "author": "David Schlangen", "authorids": "~David_Schlangen1", "gender": "M", "homepage": "http://www.ling.uni-potsdam.de/~das", "dblp": "11/1189", "google_scholar": "https://scholar.google.com.tw/citations?user=QoDgwZYAAAAJ", "or_profile": "~David_Schlangen1", "aff": "University of Potsdam", "aff_domain": "uni-potsdam.de", "position": "Full Professor", "bibtex": "@inproceedings{\nschlangen2023on,\ntitle={On General Language Understanding},\nauthor={David Schlangen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=SHkMYY26KP}\n}", "github": "", "project": "", "reviewers": "kY1L;tfyP;uYnW", "site": "https://openreview.net/forum?id=SHkMYY26KP", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "4;4;2", "reproducibility": "", "correctness": "4;4;1", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 0, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 1, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-2686-6887", "linkedin": "", "aff_unique_index": "0", "aff_unique_norm": "University of Potsdam", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-potsdam.de", "aff_unique_abbr": "UP", "aff_country_unique_index": "0", "aff_country_unique": "Germany" }, { "id": "SI2CXa5eok", "title": "AMR Parsing with Causal Hierarchical Attention and Pointers", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Translation-based AMR parsers have recently gained popularity due to their simplicity and effectiveness. They predict linearized graphs as free texts, avoiding explicit structure modeling. However, this simplicity neglects structural locality in AMR graphs and introduces unnecessary tokens to represent coreferences. In this paper, we introduce new target forms of AMR parsing and a novel model, CHAP, which is equipped with causal hierarchical attention and the pointer mechanism, enabling the integration of structures into the Transformer decoder. We empirically explore various alternative modeling options. Experiments show that our model outperforms baseline models on four out of five benchmarks in the setting of no additional data.", "keywords": "semantic parsing;AMR parsing;hierarical attention;pointer mechanism", "primary_area": "", "supplementary_material": "", "author": "Chao Lou;Kewei Tu", "authorids": "~Chao_Lou1;~Kewei_Tu1", "gender": "M;M", "homepage": ";https://faculty.sist.shanghaitech.edu.cn/faculty/tukw/", "dblp": "147/6026;22/918", "google_scholar": "ii7ozEUAAAAJ;5gi3Pm0AAAAJ", "or_profile": "~Chao_Lou1;~Kewei_Tu1", "aff": "ShanghaiTech University;ShanghaiTech University", "aff_domain": "shanghaitech.edu.cn;shanghaitech.edu.cn", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nlou2023amr,\ntitle={{AMR} Parsing with Causal Hierarchical Attention and Pointers},\nauthor={Chao Lou and Kewei Tu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=SI2CXa5eok}\n}", "github": "", "project": "", "reviewers": "MHyS;9uSY;2oGz", "site": "https://openreview.net/forum?id=SI2CXa5eok", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "3;4;3", "reproducibility": "4;4;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "ShanghaiTech University", "aff_unique_dep": "", "aff_unique_url": "https://www.shanghaitech.edu.cn", "aff_unique_abbr": "ShanghaiTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "SJ0Da0j8n7", "title": "Exploring Linguistic Probes for Morphological Inflection", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Modern work on the cross-linguistic computational modeling of morphological inflection has typically employed language-independent data splitting algorithms. In this paper, we supplement that approach with language-specific probes designed to test aspects of morphological generalization. Testing these probes on three morphologically distinct languages, English, Spanish, and Swahili, we find evidence that three leading morphological inflection systems employ distinct generalization strategies over conjugational classes and feature sets on both orthographic and phonologically transcribed inputs.", "keywords": "morphology;inflection;linguistic probes;English;Spanish;Swahili", "primary_area": "", "supplementary_material": "", "author": "Jordan Kodner;Salam Khalifa;Sarah Ruth Brogden Payne", "authorids": "~Jordan_Kodner1;~Salam_Khalifa1;~Sarah_Ruth_Brogden_Payne1", "gender": "M;F;Non-Binary", "homepage": ";https://www.salamkhalifa.com/;https://paynesa.github.io/", "dblp": "212/4348;169/3252;", "google_scholar": ";SfVNUMMAAAAJ;DDUffvQAAAAJ", "or_profile": "~Jordan_Kodner1;~Salam_Khalifa1;~Sarah_Ruth_Brogden_Payne1", "aff": "State University of New York, Stony Brook;State University of New York, Stony Brook;State University of New York at Stony Brook", "aff_domain": "stonybrook.edu;stonybrook.edu;stonybrook.edu", "position": "Assistant Professor;PhD student;PhD student", "bibtex": "@inproceedings{\nkodner2023exploring,\ntitle={Exploring Linguistic Probes for Morphological Inflection},\nauthor={Jordan Kodner and Salam Khalifa and Sarah Ruth Brogden Payne},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=SJ0Da0j8n7}\n}", "github": "", "project": "", "reviewers": "FThw;gRQQ;1U8W", "site": "https://openreview.net/forum?id=SJ0Da0j8n7", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;2", "excitement": "5;4;5", "reproducibility": "5;4;4", "correctness": "5;4;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 4.666666666666667, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-0049-3637;", "linkedin": ";salamkhalifa/;", "aff_unique_index": "0;0;1", "aff_unique_norm": "State University of New York;State University of New York at Stony Brook", "aff_unique_dep": ";", "aff_unique_url": "https://www.stonybrook.edu;https://www.stonybrook.edu", "aff_unique_abbr": "SUNY Stony Brook;SUNY Stony Brook", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stony Brook", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "SJYTfbI59J", "title": "Open-source Large Language Models are Strong Zero-shot Query Likelihood Models for Document Ranking", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "In the field of information retrieval, Query Likelihood Models (QLMs) rank documents based on the probability of generating the query given the content of a document.\nRecently, advanced large language models (LLMs) have emerged as effective QLMs, showcasing promising ranking capabilities. This paper focuses on investigating the genuine zero-shot ranking effectiveness of recent LLMs, which are solely pre-trained on unstructured text data without supervised instruction fine-tuning. Our findings reveal the robust zero-shot ranking ability of such LLMs, highlighting that additional instruction fine-tuning may hinder effectiveness unless a question generation task is present in the fine-tuning dataset. Furthermore, we introduce a novel state-of-the-art ranking system that integrates LLM-based QLMs with a hybrid zero-shot retriever, demonstrating exceptional effectiveness in both zero-shot and few-shot scenarios. \nWe make our codebase publicly available at https://github.com/ielab/llm-qlm.", "keywords": "large language model;query likelihood model;zero-shot ranking model", "primary_area": "", "supplementary_material": "", "author": "Shengyao Zhuang;Bing Liu;Bevan Koopman;Guido Zuccon", "authorids": "~Shengyao_Zhuang1;~Bing_Liu7;~Bevan_Koopman1;~Guido_Zuccon1", "gender": "M;M;;", "homepage": "https://arvinzhuang.github.io/;https://uqbingliu.github.io/;http://koopman.id.au;http://ielab.io/people/guido-zuccon.html", "dblp": "262/6236.html;https://dblp.uni-trier.de/pid/181/2855-25;96/9899;22/6562", "google_scholar": "-7sbXNIAAAAJ;https://scholar.google.com.au/citations?user=uYJIvCMAAAAJ;;aEVHhC8AAAAJ", "or_profile": "~Shengyao_Zhuang1;~Bing_Liu7;~Bevan_Koopman1;~Guido_Zuccon1", "aff": "University of Queensland;University of Queensland;University of Queensland;University of Queensland", "aff_domain": "uq.edu.au;uq.edu.au;uq.edu.au;uq.edu.au", "position": "PhD student;PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nzhuang2023opensource,\ntitle={Open-source Large Language Models are Strong Zero-shot Query Likelihood Models for Document Ranking},\nauthor={Shengyao Zhuang and Bing Liu and Bevan Koopman and Guido Zuccon},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=SJYTfbI59J}\n}", "github": "", "project": "", "reviewers": "ia7W;Zpce;vsPL;1y9F", "site": "https://openreview.net/forum?id=SJYTfbI59J", "pdf_size": 0, "rating": "2;2;2;2", "confidence": "3;4;4;3", "excitement": "3;3;3;4", "reproducibility": "5;5;4;5", "correctness": "4;2;2;4", "rating_avg": 2.0, "confidence_avg": 3.5, "excitement_avg": 3.25, "reproducibility_avg": 4.75, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-6711-0955;0000-0002-7858-7468;;0000-0003-0271-5563", "linkedin": "shengyaozhuangit/;;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Queensland", "aff_unique_dep": "", "aff_unique_url": "https://www.uq.edu.au", "aff_unique_abbr": "UQ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Australia" }, { "id": "SNB6BwY2zy", "title": "Detecting and Mitigating Hallucinations in Multilingual Summarisation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Hallucinations pose a significant challenge to the reliability of neural models for abstractive summarisation. While automatically generated summaries may be fluent, they often lack faithfulness to the original document. This issue becomes even more pronounced in low-resource languages, where summarisation requires cross-lingual transfer. With the existing faithful metrics focusing on English, even measuring the extent of this phenomenon in cross-lingual settings is hard. To address this, we first develop a novel metric, mFACT, evaluating the faithfulness of non-English summaries, leveraging translation-based transfer from multiple English faithfulness metrics. Through extensive experiments in multiple languages, we demonstrate that mFACT is best suited to detect hallucinations compared to alternative metrics. With mFACT, we assess a broad range of multilingual large language models, and find that they all tend to hallucinate often in languages different from English. We then propose a simple but effective method to reduce hallucinations in cross-lingual transfer, which weighs the loss of each training example by its faithfulness score. This method drastically increases both performance and faithfulness according to both automatic and human evaluation when compared to strong baselines for cross-lingual transfer such as MAD-X. Our code and dataset are available at https://github.com/yfqiu-nlp/mfact-summ.", "keywords": "Summarisation;Multilingual NLP;Hallucination;Natural Language Generation;Faithfulness Evaluation", "primary_area": "", "supplementary_material": "", "author": "Yifu QIU;Yftah Ziser;Anna Korhonen;Edoardo Ponti;Shay B Cohen", "authorids": "~Yifu_QIU1;~Yftah_Ziser1;~Anna_Korhonen1;~Edoardo_Ponti1;~Shay_B_Cohen1", "gender": "Not Specified;M;;;M", "homepage": "https://yfqiu.netlify.app/;https://yftah89.github.io/;https://sites.google.com/site/annakorhonen/;https://ducdauge.github.io/;http://homepages.inf.ed.ac.uk/scohen", "dblp": "316/9904;188/6096.html;14/6532;178/8829;04/5629", "google_scholar": "OA6GaMwAAAAJ;https://scholar.google.co.il/citations?user=37SMCrsAAAAJ;https://scholar.google.co.uk/citations?user=SCoVoOYAAAAJ;https://scholar.google.ca/citations?user=tklL2q0AAAAJ;", "or_profile": "~Yifu_QIU1;~Yftah_Ziser1;~Anna_Korhonen1;~Edoardo_Ponti1;~Shay_B_Cohen1", "aff": "University of Edinburgh, University of Edinburgh;University of Edinburgh;University of Cambridge;University of Edinburgh;University of Edinburgh", "aff_domain": "ed.ac.uk;edinburgh.org;cam.ac.uk;ed.ac.uk;ed.ac.uk", "position": "PhD student;Postdoc;Professor;Assistant Professor;Reader", "bibtex": "@inproceedings{\nqiu2023detecting,\ntitle={Detecting and Mitigating Hallucinations in Multilingual Summarisation},\nauthor={Yifu QIU and Yftah Ziser and Anna Korhonen and Edoardo Ponti and Shay B Cohen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=SNB6BwY2zy}\n}", "github": "", "project": "", "reviewers": "Pf1A;HygN;1ZfH", "site": "https://openreview.net/forum?id=SNB6BwY2zy", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "4;4;4", "reproducibility": "4;4;3", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0002-6228-9471;;0000-0002-6308-1050;0000-0003-4753-8353", "linkedin": "yifu-qiu-turing/;;anna-korhonen-534a9b5/;edoardo-maria-ponti/;", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "University of Edinburgh;University of Cambridge", "aff_unique_dep": ";", "aff_unique_url": "https://www.ed.ac.uk;https://www.cam.ac.uk", "aff_unique_abbr": "Edinburgh;Cambridge", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "SP8zIwanHD", "title": "$\\textbf{\\emph{CLMSM}}$: A Multi-Task Learning Framework for Pre-training on Procedural Text", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In this paper, we propose ***CLMSM***, a domain-specific, continual pre-training framework, that learns from a large set of procedural recipes. ***CLMSM*** uses a Multi-Task Learning Framework to optimize two objectives - a) Contrastive Learning using hard triplets to learn fine-grained differences across entities in the procedures, and b) a novel Mask-Step Modelling objective to learn step-wise context of a procedure. We test the performance of ***CLMSM*** on the downstream tasks of tracking entities and aligning actions between two procedures on three datasets, one of which is an open-domain dataset not conforming with the pre-training dataset. We show that ***CLMSM*** not only outperforms baselines on recipes (in-domain) but is also able to generalize to open-domain procedural NLP tasks.", "keywords": "pre-training;procedural reasoning;contrastive learning;masked language modeling;multi-task learning;nlp", "primary_area": "", "supplementary_material": "", "author": "Abhilash Nandy;Manav Nitin Kapadnis;Pawan Goyal;Niloy Ganguly", "authorids": "~Abhilash_Nandy1;~Manav_Nitin_Kapadnis1;~Pawan_Goyal1;~Niloy_Ganguly1", "gender": "M;M;M;M", "homepage": "https://sites.google.com/view/abhilashnandy;https://manavkapadnis.github.io/;http://cse.iitkgp.ac.in/~pawang/;http://www.facweb.iitkgp.ac.in/~niloy/", "dblp": "237/2393.html;304/7583;77/2307-2;https://dblp.org/pers/hd/g/Ganguly:Niloy", "google_scholar": "vJhwesAAAAAJ;L7KLra8AAAAJ;https://scholar.google.com.tw/citations?user=F14FHsIAAAAJ;hCbFmUUAAAAJ", "or_profile": "~Abhilash_Nandy1;~Manav_Nitin_Kapadnis1;~Pawan_Goyal1;~Niloy_Ganguly1", "aff": "Indian Institute of Technology Kharagpur;Indian Institute of Technology Kharagpur;IIT Kharagpur;Indian Institute of Technology Kharagpur,", "aff_domain": "iitkgp.ac.in;iitkgp.ac.in;cse.iitkgp.ac.in;iitkgp.ac.in", "position": "PhD student;Undergrad student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nnandy2023textbfemphclmsm,\ntitle={\\${\\textbackslash}textbf\\{{\\textbackslash}emph\\{{CLMSM}\\}\\}\\$: A Multi-Task Learning Framework for Pre-training on Procedural Text},\nauthor={Abhilash Nandy and Manav Nitin Kapadnis and Pawan Goyal and Niloy Ganguly},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=SP8zIwanHD}\n}", "github": "", "project": "", "reviewers": "uD45;9spE;z7wU", "site": "https://openreview.net/forum?id=SP8zIwanHD", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;3", "excitement": "3;2;3", "reproducibility": "4;2;4", "correctness": "4;4;4", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-8683-107X;0009-0003-8640-2106;;", "linkedin": "abhilash-nandy-86244111b/;manav-nitin-kapadnis/;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Indian Institute of Technology Kharagpur", "aff_unique_dep": "", "aff_unique_url": "https://www.iitkgp.ac.in", "aff_unique_abbr": "IIT Kharagpur", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Kharagpur", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "India" }, { "id": "SPtskxPEiV", "title": "Role of Context in Unsupervised Sentence Representation Learning: the Case of Dialog Act Modeling", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Unsupervised learning of word representations involves capturing the contextual information surrounding word occurrences, which can be grounded in the observation that word form is largely disconnected from word meaning. While there are fewer reasons to believe that the same holds for sentences, learning through context has been carried over to learning representations of word sequences. However, this work pays minimal to no attention to the role of context in inferring sentence representations.\nIn this article, we present a dialog act tag probing task designed to explicitly compare content-, and context-oriented sentence representations inferred on utterances of telephone conversations (SwDA). Our results suggest that there is no clear benefit of context-based sentence representations over content-based sentence representations. However, there is a very clear benefit of increasing the dimensionality of the sentence vectors in nearly all approaches.", "keywords": "unsupervised learning;sentence representation;dialog act modeling", "primary_area": "", "supplementary_material": "", "author": "Rastislav Hronsky;Emmanuel Keuleers", "authorids": "~Rastislav_Hronsky1;~Emmanuel_Keuleers1", "gender": "M;", "homepage": "https://hrasto.github.io/;https://www.tilburguniversity.edu/staff/e-a-keuleers", "dblp": ";", "google_scholar": ";", "or_profile": "~Rastislav_Hronsky1;~Emmanuel_Keuleers1", "aff": "Eindhoven University of Technology;Tilburg University", "aff_domain": "tue.nl;tilburguniversity.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nhronsky2023role,\ntitle={Role of Context in Unsupervised Sentence Representation Learning: the Case of Dialog Act Modeling},\nauthor={Rastislav Hronsky and Emmanuel Keuleers},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=SPtskxPEiV}\n}", "github": "", "project": "", "reviewers": "TQzd;F3K4;wEG6", "site": "https://openreview.net/forum?id=SPtskxPEiV", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "4;3;4", "reproducibility": "4;3;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;1", "aff_unique_norm": "Eindhoven University of Technology;Tilburg University", "aff_unique_dep": ";", "aff_unique_url": "https://www.tue.nl;https://www.tilburguniversity.edu/", "aff_unique_abbr": "TU/e;Tilburg U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Netherlands" }, { "id": "SQodZvCM5g", "title": "Neuro-Symbolic Sentiment Analysis with Dynamic Word Sense Disambiguation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Sentiment analysis is a task that highly depends on the understanding of word senses. Traditional neural network models are black boxes that represent word senses as vectors that are uninterpretable for humans. On the other hand, the application of Word Sense Disambiguation (WSD) systems in downstream tasks poses challenges regarding i) which words need to be disambiguated, and ii) how to model explicit word senses into easily understandable terms for a downstream model. This work proposes a neurosymbolic framework that incorporates WSD by identifying and paraphrasing ambiguous words to improve the accuracy of sentiment predictions. The framework allows us to understand which words are paraphrased into which semantically unequivocal words, thus enabling a downstream task model to gain both accuracy and interpretability. To better fine-tune a lexical substitution model for WSD on a downstream task without ground-truth word sense labels, we leverage dynamic rewarding to jointly train sentiment analysis and lexical substitution models. Our framework proves to effectively improve the performance of sentiment analysis on corpora from different domains.", "keywords": "sentiment analysis;neuro-symbolic AI;word sense disambiguation", "primary_area": "", "supplementary_material": "", "author": "Xulang Zhang;Rui Mao;Kai He;Erik Cambria", "authorids": "~Xulang_Zhang1;~Rui_Mao1;~Kai_He4;~Erik_Cambria1", "gender": ";M;M;M", "homepage": ";https://maorui.wixsite.com/homepage;https://kaihe-better.github.io/;https://sentic.net/erikcambria/", "dblp": ";51/5793-2;12/5913-1;80/7421", "google_scholar": "https://scholar.google.com.sg/citations?user=rH0Nn58AAAAJ;s_JzI5kAAAAJ;https://scholar.google.com.hk/citations?user=4nWk-HYAAAAJ;ilSYpW0AAAAJ", "or_profile": "~Xulang_Zhang1;~Rui_Mao1;~Kai_He4;~Erik_Cambria1", "aff": "National Technological University;Nanyang Technological University;National University of Singapore;Nanyang Technological University", "aff_domain": "ntu.edu;ntu.edu.sg;nus.edu;ntu.edu.sg", "position": "PhD student;Postdoc;Researcher;Full Professor", "bibtex": "@inproceedings{\nzhang2023neurosymbolic,\ntitle={Neuro-Symbolic Sentiment Analysis with Dynamic Word Sense Disambiguation},\nauthor={Xulang Zhang and Rui Mao and Kai He and Erik Cambria},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=SQodZvCM5g}\n}", "github": "", "project": "", "reviewers": "PBHS;PXHm;xJze", "site": "https://openreview.net/forum?id=SQodZvCM5g", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;2", "excitement": "2;3;3", "reproducibility": "2;3;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-1082-8755;0000-0003-2639-1532;0000-0002-3030-1280", "linkedin": ";rui-mao-nlp;;erikcambria/", "aff_unique_index": "0;1;2;1", "aff_unique_norm": "National Technological University;Nanyang Technological University;National University of Singapore", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ntu.edu;https://www.ntu.edu.sg;https://www.nus.edu.sg", "aff_unique_abbr": "NTU;NTU;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;Singapore" }, { "id": "SS44Mrv21o", "title": "EARA: Improving Biomedical Semantic Textual Similarity with Entity-Aligned Attention and Retrieval Augmentation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Measuring Semantic Textual Similarity (STS) is a fundamental task in biomedical text processing, which aims at quantifying the similarity between two input biomedical sentences. Unfortunately, the STS datasets in the biomedical domain are relatively smaller but more complex in semantics than common domain, often leading to overfitting issues and insufficient text representation even based on Pre-trained Language Models (PLMs) due to too many biomedical entities. In this paper, we propose EARA, an entity-aligned, attention-based and retrieval-augmented PLMs. Our proposed EARA first aligns the same type of fine-grained entity information in each sentence pair with an entity alignment matrix. Then, EARA regularizes the attention mechanism with an entity alignment matrix with an auxiliary loss. Finally, we add a retrieval module that retrieves similar instances to expand the scope of entity pairs and improve the model's generalization. The comprehensive experiments reflect that EARA can achieve state-of-the-art performance on both in-domain and out-of-domain datasets. Source code is available \\footnote{https://github.com/xy-always/EARA}.", "keywords": "Biomedical semantic textual similarity;entity-aligned regularization;retrival augmentation", "primary_area": "", "supplementary_material": "", "author": "Ying Xiong;Xin Yang;Linjing Liu;Ka-Chun Wong;Qingcai Chen;Yang Xiang;Buzhou Tang", "authorids": "~Ying_Xiong2;~Xin_Yang18;~Linjing_Liu1;~Ka-Chun_Wong1;~Qingcai_Chen2;~Yang_Xiang4;~Buzhou_Tang1", "gender": "F;M;F;;M;M;M", "homepage": ";https://github.com/awfulsunday;;http://www.cs.toronto.edu/~wkc/;http://faculty.hitsz.edu.cn/chenqingcai1;;", "dblp": ";;;45/7183;15/1052;50/2192-3;00/7437", "google_scholar": "nz7B_JcAAAAJ;;;nZH_Ws8AAAAJ;7aR5D4sAAAAJ;zDyL-NoAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Ying_Xiong2;~Xin_Yang18;~Linjing_Liu1;~Ka-Chun_Wong1;~Qingcai_Chen2;~Yang_Xiang4;~Buzhou_Tang1", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;City University of Hong Kong;City University of Hong Kong;Harbin Institute of Technology (Shenzhen);Peng Cheng Laboratory;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;hit.edu.cn;cityu.edu.hk;cityu.edu.hk;hit.edu.cn;pcl.ac;hit.edu.cn", "position": "PhD student;MS student;PhD student;Associate Professor;Full Professor;Researcher;Full Professor", "bibtex": "@inproceedings{\nxiong2023eara,\ntitle={{EARA}: Improving Biomedical Semantic Textual Similarity with Entity-Aligned Attention and Retrieval Augmentation},\nauthor={Ying Xiong and Xin Yang and Linjing Liu and Ka-Chun Wong and Qingcai Chen and Yang Xiang and Buzhou Tang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=SS44Mrv21o}\n}", "github": "", "project": "", "reviewers": "zskt;pgrB;tAyj;yReV", "site": "https://openreview.net/forum?id=SS44Mrv21o", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;3;2", "excitement": "4;3;3;3", "reproducibility": "5;3;4;3", "correctness": "4;3;4;2", "rating_avg": 3.0, "confidence_avg": 3.25, "excitement_avg": 3.25, "reproducibility_avg": 3.75, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-7423-2937;;0000-0002-2366-4593;0000-0001-6062-733X;;0000-0003-1395-6805;", "linkedin": "ying-xiong-42642b2a9/;;;;;yang-xiang-7554b6195/;", "aff_unique_index": "0;0;1;1;0;2;0", "aff_unique_norm": "Harbin Institute of Technology;City University of Hong Kong;Pengcheng Laboratory", "aff_unique_dep": ";;Peng Cheng Laboratory", "aff_unique_url": "http://www.hit.edu.cn/;https://www.cityu.edu.hk;http://www.pcl.ac.cn", "aff_unique_abbr": "HIT;CityU;PCL", "aff_campus_unique_index": "0;0;1;1;2;0", "aff_campus_unique": "Harbin;Hong Kong SAR;Shenzhen;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "ST0ejo0mnc", "title": "A Rewriting Approach for Gender Inclusivity in Portuguese", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In recent years, there has been a notable rise in research interest regarding the integration of gender-inclusive and gender-neutral language in natural language processing models. A specific area of focus that has gained practical and academic significant interest is gender-neutral rewriting, which involves converting binary-gendered text to its gender-neutral counterpart. However, current approaches to gender-neutral rewriting for gendered languages tend to rely on large datasets, which may not be an option for languages with fewer resources, such as Portuguese. In this paper, we present a rule-based and a neural-based tool for gender-neutral rewriting for Portuguese, a heavily gendered Romance language whose morphology creates different challenges from the ones tackled by other gender-neutral rewriters. Our neural approach relies on fine-tuning large multilingual machine translation models on examples generated by the rule-based model. We evaluate both models on texts from different sources and contexts. We provide the first Portuguese dataset explicitly containing gender-neutral language and neopronouns, as well as a manually annotated golden collection of 500 sentences that allows for evaluation of future work.", "keywords": "nlp;portuguese;gender neutrality;gender inclusivity;machine translation", "primary_area": "", "supplementary_material": "", "author": "Leonor Veloso;Luisa Coheur;Rui Ribeiro", "authorids": "~Leonor_Veloso1;~Luisa_Coheur1;~Rui_Ribeiro1", "gender": "F;;", "homepage": "https://www.linkedin.com/in/leonor-veloso/;;", "dblp": ";;", "google_scholar": ";;zYSaXZsAAAAJ", "or_profile": "~Leonor_Veloso1;~Luisa_Coheur1;~Rui_Ribeiro1", "aff": "Instituto Superior T\u00e9cnico;;Instituto Superior T\u00e9cnico", "aff_domain": "tecnico.ulisboa.pt;;tecnico.ulisboa.pt", "position": "MS student;;PhD student", "bibtex": "@inproceedings{\nveloso2023a,\ntitle={A Rewriting Approach for Gender Inclusivity in Portuguese},\nauthor={Leonor Veloso and Luisa Coheur and Rui Ribeiro},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ST0ejo0mnc}\n}", "github": "", "project": "", "reviewers": "KYF6;CAWR;jbYX", "site": "https://openreview.net/forum?id=ST0ejo0mnc", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;5", "excitement": "3;2;3", "reproducibility": "5;4;4", "correctness": "4;2;4", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0", "aff_unique_norm": "Instituto Superior T\u00e9cnico", "aff_unique_dep": "", "aff_unique_url": "https://www.ist.utl.pt", "aff_unique_abbr": "IST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Portugal" }, { "id": "STHKApXVMH", "title": "Unnatural Error Correction: GPT-4 Can Almost Perfectly Handle Unnatural Scrambled Text", "track": "main", "status": "Short Main", "tldr": "", "abstract": "While Large Language Models (LLMs) have achieved remarkable performance in many tasks, much about their inner workings remains unclear. In this study, we present novel experimental insights into the resilience of LLMs, particularly GPT-4, when subjected to extensive character-level permutations. To investigate this, we first propose the Scrambled Bench, a suite designed to measure the capacity of LLMs to handle scrambled input, in terms of both recovering scrambled sentences and answering questions given scrambled context. The experimental results indicate that multiple advanced LLMs demonstrate the capability akin to typoglycemia, a phenomenon where humans can understand the meaning of words even when the letters within those words are scrambled, as long as the first and last letters remain in place. More surprisingly, we found that only GPT-4 nearly flawlessly processes inputs with unnatural errors, a task that poses significant challenges for other LLMs and often even for humans. Specifically, GPT-4 can almost perfectly reconstruct the original sentences from scrambled ones, decreasing the edit distance by 95%, even when all letters within each word are entirely scrambled. It is counter-intuitive that LLMs can exhibit such resilience despite severe disruption to input tokenization caused by scrambled text.", "keywords": "Large Language Models;Emergent Ability;Scrambled Text;GPT-4", "primary_area": "", "supplementary_material": "", "author": "Qi Cao;Takeshi Kojima;Yutaka Matsuo;Yusuke Iwasawa", "authorids": "~Qi_Cao4;~Takeshi_Kojima1;~Yutaka_Matsuo1;~Yusuke_Iwasawa1", "gender": "M;M;M;M", "homepage": ";;http://ymatsuo.com;", "dblp": ";41/1448;m/YMatsuo.html;117/7377", "google_scholar": "WO29Gw4AAAAJ;KpkgqOsAAAAJ;Dy8iau4AAAAJ;https://scholar.google.co.jp/citations?user=pvvZgj0AAAAJ", "or_profile": "~Qi_Cao4;~Takeshi_Kojima1;~Yutaka_Matsuo1;~Yusuke_Iwasawa1", "aff": "The University of Tokyo;The University of Tokyo;The University of Tokyo;The University of Tokyo, The University of Tokyo", "aff_domain": "u-tokyo.ac.jp;u-tokyo.ac.jp;u-tokyo.ac.jp;weblab.t.u-tokyo.ac.jp", "position": "PhD student;PhD student;Associate Professor;Lecturer", "bibtex": "@inproceedings{\ncao2023unnatural,\ntitle={Unnatural Error Correction: {GPT}-4 Can Almost Perfectly Handle Unnatural Scrambled Text},\nauthor={Qi Cao and Takeshi Kojima and Yutaka Matsuo and Yusuke Iwasawa},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=STHKApXVMH}\n}", "github": "", "project": "", "reviewers": "EzCc;Lbf2;tGXe;NrTg", "site": "https://openreview.net/forum?id=STHKApXVMH", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;3;4;4", "excitement": "2;4;4;4", "reproducibility": "5;5;4;5", "correctness": "4;4;4;4", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 3.5, "reproducibility_avg": 4.75, "correctness_avg": 4.0, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-1321-2622", "linkedin": ";;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Tokyo", "aff_unique_dep": "", "aff_unique_url": "https://www.u-tokyo.ac.jp", "aff_unique_abbr": "UTokyo", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Japan" }, { "id": "SUAeMJKg6b", "title": "\u201cMistakes Help Us Grow\u201d: Facilitating and Evaluating Growth Mindset Supportive Language in Classrooms", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Teachers\u2019 growth mindset supportive language (GMSL)\u2014rhetoric emphasizing that one's skills can be improved over time\u2014has been shown to significantly reduce disparities in academic achievement and enhance students' learning outcomes. Although teachers espouse growth mindset principles, most find it difficult to adopt GMSL in their practice due the lack of effective coaching in this area. We explore whether large language models (LLMs) can provide automated, personalized coaching to support teachers' use of GMSL. We establish an effective coaching tool to reframe unsupportive utterances to GMSL by developing (i) a parallel dataset containing GMSL-trained teacher reframings of unsupportive statements with an accompanying annotation guide, (ii) a GMSL prompt framework to revise teachers\u2019 unsupportive language, and (iii) an evaluation framework grounded in psychological theory for evaluating GMSL with the help of students and teachers. We conduct a large-scale evaluation involving 174 teachers and 1,006 students, finding that both teachers and students perceive GMSL-trained teacher and model reframings as more effective in fostering a growth mindset and promoting challenge-seeking behavior, among other benefits. We also find that model-generated reframings outperform those from the GMSL-trained teachers. These results show promise for harnessing LLMs to provide automated GMSL feedback for teachers and, more broadly, LLMs\u2019 potentiality for supporting students\u2019 learning in the classroom. Our findings also demonstrate the benefit of large-scale human evaluations when applying LLMs in educational domains.", "keywords": "Growth mindset;Language models;Education;Applications", "primary_area": "", "supplementary_material": "", "author": "Kunal Handa;Margarett Clapper;Jessica Boyle;Rose E Wang;Diyi Yang;David Yeager;Dorottya Demszky", "authorids": "~Kunal_Handa1;~Margarett_Clapper1;~Jessica_Boyle1;~Rose_E_Wang1;~Diyi_Yang2;~David_Yeager2;~Dorottya_Demszky1", "gender": ";;F;F;F;M;F", "homepage": "https://kunhanda.github.io/;https://www.mclapper.com/;;https://cs.stanford.edu/~rewang;https://cs.stanford.edu/~diyiy/;;https://www.dorademszky.com/", "dblp": "336/6747.html;;;259/1500;70/11145;;", "google_scholar": "scdcthMAAAAJ;;GNtq7ZcAAAAJ;V-dlwF4AAAAJ;j9jhYqQAAAAJ;https://scholar.google.com/citations?hl=en;WtVqgE8AAAAJ", "or_profile": "~Kunal_Handa1;~Margarett_Clapper1;~Jessica_Boyle1;~Rose_E_Wang1;~Diyi_Yang2;~David_Yeager2;~Dorottya_Demszky1", "aff": "Brown University;University of Texas at Austin;Vanderbilt University;Stanford University;Stanford University;University of Texas at Austin;Stanford University", "aff_domain": "brown.edu;utexas.edu;vanderbilt.edu;stanford.edu;stanford.edu;utexas.edu;stanford.edu", "position": "Undergrad student;PhD student;PhD student;PhD student;Assistant Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nhanda2023mistakes,\ntitle={{\\textquotedblleft}Mistakes Help Us Grow{\\textquotedblright}: Facilitating and Evaluating Growth Mindset Supportive Language in Classrooms},\nauthor={Kunal Handa and Margarett Clapper and Jessica Boyle and Rose E Wang and Diyi Yang and David Yeager and Dorottya Demszky},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=SUAeMJKg6b}\n}", "github": "", "project": "", "reviewers": "swnb;QY3p;jpn8", "site": "https://openreview.net/forum?id=SUAeMJKg6b", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;5;4", "excitement": "3;4;4", "reproducibility": "5;3;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 4.666666666666667, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;0000-0002-6759-9367", "linkedin": ";;;;;;", "aff_unique_index": "0;1;2;3;3;1;3", "aff_unique_norm": "Brown University;University of Texas at Austin;Vanderbilt University;Stanford University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.brown.edu;https://www.utexas.edu;https://www.vanderbilt.edu;https://www.stanford.edu", "aff_unique_abbr": "Brown;UT Austin;Vanderbilt;Stanford", "aff_campus_unique_index": "1;2;2;1;2", "aff_campus_unique": ";Austin;Stanford", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "SViJgzox1z", "title": "Parameter Efficient Multi-task Fine-tuning by Learning to Transfer Token-wise Prompts", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Prompt tuning has been proven to be successful on various tasks by incorporating a small number of trainable parameters while freezing large pre-trained language models (PLMs). \nHowever, it is still unsettled how to generate more proper prompts for any individual examples and how to extend prompt tuning to multi-task learning scenarios by leveraging cross-task features. \nTo address these challenges, we propose a token-wise prompt tuning (TPT), in which a bank of finer-grained soft prompt tokens is built for multi-task learning by memory network. \nThe tokens are retrieved from the bank against an input example and assembled to an instance-dependent prompt. Extensive experimental results on $14$ datasets demonstrated that the models enhanced by our TPT performed far better than full parameter fine-tuned models and achieved state-of-the-art by tuning only $0.035\\%$ parameters.", "keywords": "multi-task learning;token-wise;memory network;instance-dependent prompt", "primary_area": "", "supplementary_material": "", "author": "Muling Wu;Wenhao Liu;Jianhan Xu;Changze Lv;Zixuan Ling;Tianlong Li;Longtao Huang;Xiaoqing Zheng;Xuanjing Huang", "authorids": "~Muling_Wu1;~Wenhao_Liu2;~Jianhan_Xu1;~Changze_Lv1;~Zixuan_Ling1;~Tianlong_Li4;~Longtao_Huang2;~Xiaoqing_Zheng2;~Xuanjing_Huang1", "gender": ";M;M;M;M;M;M;;F", "homepage": ";;;https://lvchangze.github.io;https://github.com/narcissusLZX;https://github.com/Tengyuantuohai-113;http://people.ucas.edu.cn/~huanglongtao?language=en;;https://xuanjing-huang.github.io/", "dblp": "358/8927;;278/1558.html;350/4445;;;76/10119;;05/6735-1", "google_scholar": ";;G_p-oocAAAAJ;t3-viUwAAAAJ;;https://scholar.google.com.hk/citations?hl=zh-CN;EQDfV9cAAAAJ;;RGsMgZA4H78C", "or_profile": "~Muling_Wu1;~Wenhao_Liu2;~Jianhan_Xu1;~Changze_Lv1;~Zixuan_Ling1;~Tianlong_Li4;~Longtao_Huang2;~Xiaoqing_Zheng2;~Xuanjing_Huang1", "aff": "Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Alibaba Group;;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;alibaba-inc.com;;fudan.edu.cn", "position": "MS student;MS student;MS student;PhD student;MS student;MS student;Researcher;;Full Professor", "bibtex": "@inproceedings{\nwu2023parameter,\ntitle={Parameter Efficient Multi-task Fine-tuning by Learning to Transfer Token-wise Prompts},\nauthor={Muling Wu and Wenhao Liu and Jianhan Xu and Changze Lv and Zixuan Ling and Tianlong Li and Longtao Huang and Xiaoqing Zheng and Xuanjing Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=SViJgzox1z}\n}", "github": "", "project": "", "reviewers": "11wC;viJ7;GwUS;ZdZA", "site": "https://openreview.net/forum?id=SViJgzox1z", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;4;3;4", "excitement": "2;1;3;2", "reproducibility": "3;4;4;4", "correctness": "3;2;3;2", "rating_avg": 3.0, "confidence_avg": 3.5, "excitement_avg": 2.0, "reproducibility_avg": 3.75, "correctness_avg": 2.5, "replies_avg": 13, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0003-1875-6658;0009-0008-8455-4743;;;;;;;0000-0001-9197-9426", "linkedin": ";;;;;;;;", "aff_unique_index": "0;0;0;0;0;0;1;0", "aff_unique_norm": "Fudan University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.fudan.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "Fudan;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "SdpSaw26XT", "title": "Mirror: A Universal Framework for Various Information Extraction Tasks", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Sharing knowledge between information extraction tasks has always been a challenge due to the diverse data formats and task variations.\nMeanwhile, this divergence leads to information waste and increases difficulties in building complex applications in real scenarios.\nRecent studies often formulate IE tasks as a triplet extraction problem.\nHowever, such a paradigm does not support multi-span and n-ary extraction, leading to weak versatility.\nTo this end, we reorganize IE problems into unified multi-slot tuples and propose a universal framework for various IE tasks, namely Mirror.\nSpecifically, we recast existing IE tasks as a multi-span cyclic graph extraction problem and devise a non-autoregressive graph decoding algorithm to extract all spans in a single step.\nIt is worth noting that this graph structure is incredibly versatile, and it supports not only complex IE tasks, but also machine reading comprehension and classification tasks.\nWe manually construct a corpus containing 57 datasets for model pretraining, and conduct experiments on 30 datasets across 8 downstream tasks.\nThe experimental results demonstrate that our model has decent compatibility and outperforms or reaches competitive performance with SOTA systems under few-shot and zero-shot settings.\nThe code, model weights, and pretraining corpus are available at https://github.com/Spico197/Mirror .", "keywords": "Information Extraction;Non-Autoregressive Decoding;Multi-task", "primary_area": "", "supplementary_material": "", "author": "Tong Zhu;Junfei Ren;Zijian Yu;Mengsong Wu;Guoliang Zhang;Xiaoye Qu;Wenliang Chen;Zhefeng Wang;Baoxing Huai;Min Zhang", "authorids": "~Tong_Zhu2;~Junfei_Ren1;~Zijian_Yu3;~Mengsong_Wu1;~Guoliang_Zhang1;~Xiaoye_Qu1;~Wenliang_Chen1;~Zhefeng_Wang1;~Baoxing_Huai1;~Min_Zhang9", "gender": ";M;M;M;M;M;M;M;;M", "homepage": ";https://github.com/15962171082;https://github.com/NLPlearner;https://fairyshine.github.io/;;;;;;https://zhangmin-nlp-ai.github.io/", "dblp": "36/1469-2;;;;;229/8206;43/2376;147/9113;152/3689.html;83/5342-5", "google_scholar": ";;;;;rT3hqdcAAAAJ;YfYi8VMAAAAJ;t22ZUJ4AAAAJ;LSkVMHQAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Tong_Zhu2;~Junfei_Ren1;~Zijian_Yu3;~Mengsong_Wu1;~Guoliang_Zhang1;~Xiaoye_Qu1;~Wenliang_Chen1;~Zhefeng_Wang1;~Baoxing_Huai1;~Min_Zhang9", "aff": "Soochow University, China;Suzhou University;Suzhou University;Suzhou University;Suzhou University;Shanghai Artificial Intelligence Laboratory;Soochow University, China;Huawei Technologies Ltd.;;Harbin Institute of Technology, Shenzhen", "aff_domain": "suda.edu.cn;suda.edu.cn;suda.edu.cn;suda.edu.cn;suda.edu.cn;pjlab.org.cn;suda.edu.cn;huawei.com;;hit.edu.cn", "position": "PhD student;MS student;MS student;MS student;MS student;Researcher;Full Professor;Researcher;;Full Professor", "bibtex": "@inproceedings{\nzhu2023mirror,\ntitle={Mirror: A Universal Framework for Various Information Extraction Tasks},\nauthor={Tong Zhu and Junfei Ren and Zijian Yu and Mengsong Wu and Guoliang Zhang and Xiaoye Qu and Wenliang Chen and Zhefeng Wang and Baoxing Huai and Min Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=SdpSaw26XT}\n}", "github": "", "project": "", "reviewers": "59YZ;aSvu;z1TA", "site": "https://openreview.net/forum?id=SdpSaw26XT", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;4;3", "excitement": "2;3;4", "reproducibility": "4;4;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 10, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-5433-8504;;;;0000-0002-3639-0712;;;;;", "linkedin": ";;;;;%E6%99%93%E6%99%94-xiaoye-qu-%E7%9E%BF-8b9a0a133/;;;;", "aff_unique_index": "0;1;1;1;1;2;0;3;4", "aff_unique_norm": "Soochow University;Suzhou University;Shanghai Artificial Intelligence Laboratory;Huawei;Harbin Institute of Technology", "aff_unique_dep": ";;;Huawei Technologies;", "aff_unique_url": "https://www.soochow.edu.cn;https://www.suda.edu.cn;http://www.shailab.org/;https://www.huawei.com;http://en.hhit.edu.cn/", "aff_unique_abbr": "Soochow U;Suda;Shanghai AI Lab;Huawei;HIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "SfI8GT3xdb", "title": "Chain-of-Questions Training with Latent Answers for Robust Multistep Question Answering", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We propose Chain-of-Questions, a framework that trains a model to robustly answer multistep questions by generating and answering sub-questions. We obtain supervision for sub-questions from human-annotated question decomposition meaning representation (QDMR),\nbut QDMR does not include annotated answers to sub-questions. To overcome this technical challenge, we treat sub-answers as latent variables and infer them with a novel dynamic mixture of Hard-EM and MAPO. Chain-of-Questions is effective and robust, greatly outperforming strong neuro-symbolic methods by 9.0 F1 on a DROP contrast set and GPT-3.5 by 24.3 F1 on a HotpotQA adversarial set.", "keywords": "multistep reasoning;question answering;latent variable learning", "primary_area": "", "supplementary_material": "", "author": "Wang Zhu;Jesse Thomason;Robin Jia", "authorids": "~Wang_Zhu1;~Jesse_Thomason1;~Robin_Jia1", "gender": "M;M;M", "homepage": "https://billzhu.me;https://jessethomason.com/;https://robinjia.github.io/", "dblp": "223/4711-1;130/2863;182/2556", "google_scholar": "dMkqNF8AAAAJ;8BeTDr0AAAAJ;ajZ-_O0AAAAJ", "or_profile": "~Wang_Zhu1;~Jesse_Thomason1;~Robin_Jia1", "aff": "University of Southern California;Amazon;University of Southern California", "aff_domain": "usc.edu;amazon.com;usc.edu", "position": "PhD student;Visiting Academic;Assistant Professor", "bibtex": "@inproceedings{\nzhu2023chainofquestions,\ntitle={Chain-of-Questions Training with Latent Answers for Robust Multistep Question Answering},\nauthor={Wang Zhu and Jesse Thomason and Robin Jia},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=SfI8GT3xdb}\n}", "github": "", "project": "", "reviewers": "dfZy;UiWR;7ULL", "site": "https://openreview.net/forum?id=SfI8GT3xdb", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "3;4;4", "reproducibility": "3;3;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-6821-4115;0000-0001-9199-0633;", "linkedin": ";jesse-thomason-034746171/;", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Southern California;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.usc.edu;https://www.amazon.com", "aff_unique_abbr": "USC;Amazon", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "ShQoWnMu1b", "title": "Learning to Predict Task Transferability via Soft Prompt", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Fine-tuning pretrained language models on helpful intermediate tasks often greatly improves the performance of target tasks. However, how to efficiently find the source tasks that can successfully transfer still remains under-explored. In this work, we propose to learn an affinity scoring function to predict transferability between tasks. Specifically, we conduct prompt tuning and regard soft prompts as task embeddings that summarize task-specific information. Then we randomly sample task pairs to train an affinity scoring function. The goal is to predict the transfer gain (i.e., affinity) between a task pair, by conditioning on their task embeddings. Once the scoring function is trained, given a novel target task, we use it to predict the most transferable source tasks, without a brute-force search for all possible source-target pairs. Experimental results across 50 tasks show that our method efficiently identifies beneficial tasks for transfer learning.", "keywords": "transfer learning;prompt tuning", "primary_area": "", "supplementary_material": "", "author": "Lingyun Feng", "authorids": "~Lingyun_Feng1", "gender": "", "homepage": "", "dblp": "196/1807.html", "google_scholar": "", "or_profile": "~Lingyun_Feng1", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\nfeng2023learning,\ntitle={Learning to Predict Task Transferability via Soft Prompt},\nauthor={Lingyun Feng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ShQoWnMu1b}\n}", "github": "", "project": "", "reviewers": "fsyc;23Bc;uUvq", "site": "https://openreview.net/forum?id=ShQoWnMu1b", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;3;5", "excitement": "3;4;4", "reproducibility": "4;4;4", "correctness": "3;4;3", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 1, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "", "linkedin": "" }, { "id": "SihQ9bBLWa", "title": "Annotations Are Not All You Need: A Cross-modal Knowledge Transfer Network for Unsupervised Temporal Sentence Grounding", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "This paper addresses the task of temporal sentence grounding (TSG). Although many respectable works have made decent achievements in this important topic, they severely rely on massive expensive video-query paired annotations, which require a tremendous amount of human effort to collect in real-world applications. To this end, in this paper, we target a more practical but challenging TSG setting: unsupervised temporal sentence grounding, where both paired video-query and segment boundary annotations are unavailable during the network training. Considering that some other cross-modal tasks provide many easily available yet cheap labels, we tend to collect and transfer their simple cross-modal alignment knowledge into our complex scenarios: 1) We first explore the entity-aware object-guided appearance knowledge from the paired Image-Noun task, and adapt them into each independent video frame; 2) Then, we extract the event-aware action representation from the paired Video-Verb task, and further refine the action representation into more practical but complicated real-world cases by a newly proposed copy-paste approach; 3) By modulating and transferring both appearance and action knowledge into our challenging unsupervised task, our model can directly utilize this general knowledge to correlate videos and queries, and accurately retrieve the relevant segment without training. Extensive experiments on two challenging datasets (ActivityNet Captions and Charades-STA) show our effectiveness, outperforming existing unsupervised methods and even competitively beating supervised works.", "keywords": "Cross-modal Knowledge Transfer;Unsupervised Temporal Sentence Grounding", "primary_area": "", "supplementary_material": "", "author": "Xiang Fang;Daizong Liu;Wanlong Fang;Pan Zhou;Yu Cheng;Keke Tang;Kai Zou", "authorids": "~Xiang_Fang1;~Daizong_Liu1;~Wanlong_Fang1;~Pan_Zhou5;~Yu_Cheng1;~Keke_Tang2;~Kai_Zou2", "gender": ";;;M;M;M;M", "homepage": ";https://liudaizong.github.io/HomePage/;;http://faculty.hust.edu.cn/pzhou/zh_CN/index.htm;https://ych133.github.io;https://tangbohu.github.io/;https://www.linkedin.com/in/kz4225/", "dblp": ";239/6021;;84/6614-1;96/3060-1.html;162/3984;135/509201", "google_scholar": ";lUw7tVIAAAAJ;;cTpFPJgAAAAJ;https://scholar.google.com/citations?hl=en;9Lk6HpQAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Xiang_Fang1;~Daizong_Liu1;~Wanlong_Fang1;~Pan_Zhou5;~Yu_Cheng1;~Keke_Tang2;~Kai_Zou2", "aff": ";Peking University;;Huazhong University of Science and Technology;Microsoft Research;Guangzhou University;Protagolabs Inc", "aff_domain": ";pku.edu.cn;;hust.edu.cn;microsoft.com;gzhu.edu.cn;protagolabs.ai", "position": ";PhD student;;Professor;Principal Researcher;Associate Professor;Founder CEO", "bibtex": "@inproceedings{\nfang2023annotations,\ntitle={Annotations Are Not All You Need: A Cross-modal Knowledge Transfer Network for Unsupervised Temporal Sentence Grounding},\nauthor={Xiang Fang and Daizong Liu and Wanlong Fang and Pan Zhou and Yu Cheng and Keke Tang and Kai Zou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=SihQ9bBLWa}\n}", "github": "", "project": "", "reviewers": "MGd2;4roi;Zmbu", "site": "https://openreview.net/forum?id=SihQ9bBLWa", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "2;4;4", "reproducibility": "3;3;4", "correctness": "1;4;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-8179-4508;;;;0000-0003-0377-1022;", "linkedin": ";;;;chengyu05/;;kz4225/", "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Peking University;Huazhong University of Science and Technology;Microsoft;Guangzhou University;Protagolabs", "aff_unique_dep": ";;Microsoft Research;;", "aff_unique_url": "http://www.pku.edu.cn;http://www.hust.edu.cn;https://www.microsoft.com/en-us/research;http://www.gzhu.edu.cn;", "aff_unique_abbr": "Peking U;HUST;MSR;GU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;1", "aff_country_unique": "China;United States" }, { "id": "SkWgL49qwI", "title": "A Language Model with Limited Memory Capacity Captures Interference in Human Sentence Processing", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Two of the central factors believed to underpin human sentence processing difficulty are expectations and retrieval from working memory. A recent attempt to create a unified cognitive model integrating these two factors have relied on the parallels between the self-attention mechanism of transformer language models and cue-based retrieval theories of working memory in human sentence processing (Ryu and Lewis 2021). While the authors show that attention patterns in specialized attention heads of GPT-2 are consistent with a key prediction of cue-based retrieval models, similarity-based interference effects, their method requires the identification of syntactically specialized attention heads, and makes an cognitively implausible implicit assumption that hundreds of memory retrieval operations take place in parallel. In the present work, we develop a recurrent neural language model with a single self-attention head, which more closely parallels the memory system assumed by cognitive theories. We show that our model\u2019s single attention head can capture semantic and syntactic interference effects observed in human experiments.", "keywords": "cue-based retrieval;working memory;interference;attention;agreement attraction;neural networks;cognitive modeling;surprisal;attention", "primary_area": "", "supplementary_material": "", "author": "William Timkey;Tal Linzen", "authorids": "~William_Timkey1;~Tal_Linzen1", "gender": ";M", "homepage": "https://wtimkey.github.io/;http://tallinzen.net", "dblp": ";169/3438", "google_scholar": "YbBHsJ8AAAAJ;5mJDXjoAAAAJ", "or_profile": "~William_Timkey1;~Tal_Linzen1", "aff": "New York University;New York University", "aff_domain": "nyu.edu;nyu.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\ntimkey2023a,\ntitle={A Language Model with Limited Memory Capacity Captures Interference in Human Sentence Processing},\nauthor={William Timkey and Tal Linzen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=SkWgL49qwI}\n}", "github": "", "project": "", "reviewers": "jZo2;Cvhi;mtFz", "site": "https://openreview.net/forum?id=SkWgL49qwI", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "3;4;4", "reproducibility": "2;4;4", "correctness": "3;3;5", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "SlL3dr0Xa9", "title": "Show, Write, and Retrieve: Entity-aware Article Generation and Retrieval", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Article comprehension is an important challenge in natural language processing with many applications such as article generation or image-to-article retrieval. Prior work typically encodes all tokens in articles uniformly using pretrained language models. However, in many applications, such as understanding news stories, these articles are based on real-world events and may reference many named entities that are difficult to accurately recognize and predict by language models. To address this challenge, we propose an ENtity-aware article GeneratIoN and rEtrieval (ENGINE) framework, to explicitly incorporate named entities into language models. ENGINE has two main components: a named-entity extraction module to extract named entities from both metadata and embedded images associated with articles, and an entity-aware mechanism that enhances the model's ability to recognize and predict entity names. We conducted experiments on three public datasets: GoodNews, VisualNews, and WikiText, where our results demonstrate that our model can boost both article generation and article retrieval performance, with a 4-5 perplexity improvement in article generation and a 3-4% boost in recall@1 in article retrieval. We release our implementation at [this http URL](https://github.com/Zhongping-Zhang/ENGINE).", "keywords": "article generation;article retrieval;named entity recognition", "primary_area": "", "supplementary_material": "", "author": "Zhongping Zhang;Yiwen Gu;Bryan A. Plummer", "authorids": "~Zhongping_Zhang1;~Yiwen_Gu1;~Bryan_A._Plummer1", "gender": "M;;M", "homepage": "http://cs-people.bu.edu/zpzhang/;https://cs-people.bu.edu/yiweng/;http://bryanplummer.com/", "dblp": "132/6203;;163/2330", "google_scholar": "6C20vTwAAAAJ;;https://scholar.google.com/citations?hl=en", "or_profile": "~Zhongping_Zhang1;~Yiwen_Gu1;~Bryan_Allen_Plummer1", "aff": "Boston University;Boston University, Boston University;Boston University", "aff_domain": "bu.edu;bu.edu;bu.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023show,\ntitle={Show, Write, and Retrieve: Entity-aware Article Generation and Retrieval},\nauthor={Zhongping Zhang and Yiwen Gu and Bryan A. Plummer},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=SlL3dr0Xa9}\n}", "github": "", "project": "", "reviewers": "KMxz;U6FG;5dtH", "site": "https://openreview.net/forum?id=SlL3dr0Xa9", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;2;3", "reproducibility": "4;4;1", "correctness": "3;2;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-2437-0343;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Boston University", "aff_unique_dep": "", "aff_unique_url": "https://www.bu.edu", "aff_unique_abbr": "BU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Boston", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "Sm3RzRKCel", "title": "Dialogizer: Context-aware Conversational-QA Dataset Generation from Textual Sources", "track": "main", "status": "Long Main", "tldr": "", "abstract": "To address the data scarcity issue in Conversational question answering (ConvQA), a dialog inpainting method, which utilizes documents to generate ConvQA datasets, has been proposed. However, the original dialog inpainting model is trained solely on the dialog reconstruction task, resulting in the generation of questions with low contextual relevance due to insufficient learning of question-answer alignment. To overcome this limitation, we propose a novel framework called Dialogizer, which has the capability to automatically generate ConvQA datasets with high contextual relevance from textual sources. The framework incorporates two training tasks: question-answer matching (QAM) and topic-aware dialog generation (TDG). Moreover, re-ranking is conducted during the inference phase based on the contextual relevance of the generated questions. Using our framework, we produce four ConvQA datasets by utilizing documents from multiple domains as the primary source. Through automatic evaluation using diverse metrics, as well as human evaluation, we validate that our proposed framework exhibits the ability to generate datasets of higher quality compared to the baseline dialog inpainting model.", "keywords": "Dialog System;Conversational Question Answering;Dataset Generation", "primary_area": "", "supplementary_material": "", "author": "Yerin Hwang;Yongil Kim;Hyunkyung Bae;Hwanhee Lee;Jeesoo Bang;Kyomin Jung", "authorids": "~Yerin_Hwang1;~Yongil_Kim1;~Hyunkyung_Bae1;~Hwanhee_Lee1;~Jeesoo_Bang1;~Kyomin_Jung1", "gender": "F;M;F;M;;M", "homepage": "https://yerin-hwang49.github.io/;https://yong1-kim.github.io;https://github.com/jennybae1024;https://hwanheelee1993.github.io/;;http://milab.snu.ac.kr/kjung/index.html", "dblp": ";96/4712;;218/5402;148/9768;48/3867", "google_scholar": ";https://scholar.google.com/citations?hl=en;;eRM8zHkAAAAJ;bKssw7kAAAAJ;https://scholar.google.co.kr/citations?user=u3uMl4MAAAAJ", "or_profile": "~Yerin_Hwang1;~Yongil_Kim1;~Hyunkyung_Bae1;~Hwanhee_Lee1;~Jeesoo_Bang1;~Kyomin_Jung1", "aff": "Seoul National University;Seoul National University;LG AI Research;Seoul National University;LG AI Research;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;lgresearch.ai;snu.ac.kr;lgresearch.ai;snu.ac.kr", "position": "PhD student;PhD student;Researcher;Postdoc;Postdoc;Full Professor", "bibtex": "@inproceedings{\nhwang2023dialogizer,\ntitle={Dialogizer: Context-aware Conversational-{QA} Dataset Generation from Textual Sources},\nauthor={Yerin Hwang and Yongil Kim and Hyunkyung Bae and Hwanhee Lee and Jeesoo Bang and Kyomin Jung},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Sm3RzRKCel}\n}", "github": "", "project": "", "reviewers": "ja3s;D2VE;7ih2;d7bR", "site": "https://openreview.net/forum?id=Sm3RzRKCel", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;1;4;3", "excitement": "3;4;4;3", "reproducibility": "3;5;4;3", "correctness": "2;4;4;4", "rating_avg": 4.0, "confidence_avg": 2.75, "excitement_avg": 3.5, "reproducibility_avg": 3.75, "correctness_avg": 3.5, "replies_avg": 13, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0003-1062-7686;", "linkedin": ";;;hwanhee-lee-69a435133/?originalSubdomain=;;", "aff_unique_index": "0;0;1;0;1;0", "aff_unique_norm": "Seoul National University;LG", "aff_unique_dep": ";LG AI Research", "aff_unique_url": "https://www.snu.ac.kr;https://www.lgaires.com", "aff_unique_abbr": "SNU;LG AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "SnFmGmKTn1", "title": "KICGPT: Large Language Model with Knowledge in Context for Knowledge Graph Completion", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Knowledge Graph Completion (KGC) is crucial for addressing knowledge graph incompleteness and supporting downstream applications. Many models have been proposed for KGC and they can be categorized into two main classes, including triple-based and test-based approaches. Triple-based methods struggle with long-tail entities due to limited structural information and imbalanced distributions of entities. Text-based methods alleviate this issue but require costly training for language models and specific finetuning for knowledge graphs, which limits their efficiency. To alleviate the limitations in the two approaches, in this paper, we propose KICGPT, a framework that integrates a large language model (LLM) and a triple-based KGC retriever, to alleviate the long-tail problem without incurring additional training overhead. In the proposed KICGPT model, we propose an in-context learning strategy called Knowledge Prompt, which encodes structural knowledge into demonstrations to guide LLM. Empirical results on benchmark datasets demonstrate the effectiveness of the proposed KICGPT model with lighter training overhead and no finetuning.", "keywords": "Knowledge Graph Completion;Large Language Model", "primary_area": "", "supplementary_material": "", "author": "Yanbin Wei;Qiushi Huang;Yu Zhang;James Kwok", "authorids": "~Yanbin_Wei1;~Qiushi_Huang1;~Yu_Zhang3;~James_Kwok1", "gender": "M;M;M;", "homepage": ";;http://cse.sustech.edu.cn/faculty/~zhangy/;", "dblp": "329/1767;204/2933;50/671-6;", "google_scholar": "QkcrPzIAAAAJ;F_yGB9sAAAAJ;https://scholar.google.com.hk/citations?user=jaRS5w4AAAAJ;", "or_profile": "~Yanbin_Wei1;~Qiushi_Huang1;~Yu_Zhang3;~James_Kwok1", "aff": "Hong Kong University of Science and Technology;University of Surrey;Southern University of Science and Technology;", "aff_domain": "ust.hk;surrey.ac.uk;sustc.edu.cn;", "position": "PhD student;PhD student;Associate Professor;", "bibtex": "@inproceedings{\nwei2023kicgpt,\ntitle={{KICGPT}: Large Language Model with Knowledge in Context for Knowledge Graph Completion},\nauthor={Yanbin Wei and Qiushi Huang and Yu Zhang and James Kwok},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=SnFmGmKTn1}\n}", "github": "", "project": "", "reviewers": "dRov;A54b;C1He;QYzB", "site": "https://openreview.net/forum?id=SnFmGmKTn1", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;3;3;4", "excitement": "3;3;4;3", "reproducibility": "3;3;4;3", "correctness": "4;3;4;3", "rating_avg": 4.0, "confidence_avg": 3.5, "excitement_avg": 3.25, "reproducibility_avg": 3.25, "correctness_avg": 3.5, "replies_avg": 13, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-1301-2505;;;", "linkedin": ";;;", "aff_unique_index": "0;1;2", "aff_unique_norm": "Hong Kong University of Science and Technology;University of Surrey;Southern University of Science and Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ust.hk;https://www.surrey.ac.uk;https://www.sustech.edu.cn", "aff_unique_abbr": "HKUST;Surrey;SUSTech", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United Kingdom" }, { "id": "SoypWgmvqP", "title": "Detecting Propaganda Techniques in Code-Switched Social Media Text", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Propaganda is a form of communication intended to influence the opinions and the mindset of the public to promote a particular agenda. With the rise of social media, propaganda has spread rapidly, leading to the need for automatic propaganda detection systems. Most work on propaganda detection has focused on high-resource languages, such as English, and little effort has been made to detect propaganda for low-resource languages. Yet, it is common to find a mix of multiple languages in social media communication, a phenomenon known as code-switching. Code-switching combines different languages within the same text, which poses a challenge for automatic systems. Considering this premise, we propose a novel task of detecting propaganda techniques in code-switched text. To support this task, we create a corpus of 1,030 texts code-switching between English and Roman Urdu, annotated with 20 propaganda techniques at fragment-level. We perform a number of experiments contrasting different experimental setups, and we find that it is important to model the multilinguality directly rather than using translation as well as to use the right fine-tuning strategy. We plan to publicly release our code and dataset.", "keywords": "propaganda detection;code-switching;low-resource languages;multilinguality;roman-urdu;natural language processing", "primary_area": "", "supplementary_material": "", "author": "Muhammad Umar Salman;Asif Hanif;Shady Shehata;Preslav Nakov", "authorids": "~Muhammad_Umar_Salman1;~Asif_Hanif2;~Shady_Shehata1;~Preslav_Nakov2", "gender": "M;M;M;M", "homepage": "https://umar1997.github.io/;https://github.com/asif-hanif;;https://mbzuai.ac.ae/study/faculty/preslav-nakov/", "dblp": ";;92/6456;https://dblp.uni-trier.de/pid/19/1947", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;6SO2wqUAAAAJ;osOiYvYAAAAJ;DfXsKZ4AAAAJ", "or_profile": "~Muhammad_Umar_Salman1;~Asif_Hanif2;~Shady_Shehata1;~Preslav_Nakov2", "aff": "Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "mbzuai.ac.ae;mbzuai.ac.ae;mbzuai.ac.ae;mbzuai.ac.ae", "position": "MS student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nsalman2023detecting,\ntitle={Detecting Propaganda Techniques in Code-Switched Social Media Text},\nauthor={Muhammad Umar Salman and Asif Hanif and Shady Shehata and Preslav Nakov},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=SoypWgmvqP}\n}", "github": "", "project": "", "reviewers": "YUyd;ut2p;KqAR;s1qS;o5WW;X7cv", "site": "https://openreview.net/forum?id=SoypWgmvqP", "pdf_size": 0, "rating": "4;4;4;4;4;4", "confidence": "4;4;4;3;4;5", "excitement": "2;1;4;4;3;4", "reproducibility": "4;2;4;3;4;4", "correctness": "2;2;3;4;2;5", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.5, "correctness_avg": 3.0, "replies_avg": 20, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-3258-6734;0000-0002-3600-1510", "linkedin": "umar-salman/;asif-hanif-;shady-shehata/;preslavnakov/", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": "", "aff_unique_url": "https://mbzuai.ac.ae", "aff_unique_abbr": "MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Arab Emirates" }, { "id": "Srxf1V2jPa", "title": "RobustGEC: Robust Grammatical Error Correction Against Subtle Context Perturbation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Grammatical Error Correction (GEC) systems play a vital role in assisting people with their daily writing tasks. However, users may sometimes come across a GEC system that initially performs well but fails to correct errors when the inputs are slightly modified. To ensure an ideal user experience, a reliable GEC system should have the ability to provide consistent and accurate suggestions when encountering irrelevant context perturbations, which we refer to as context robustness. In this paper, we introduce RobustGEC, a benchmark designed to evaluate the context robustness of GEC systems. RobustGEC comprises 5,000 GEC cases, each with one original error-correct sentence pair and five variants carefully devised by human annotators. Utilizing RobustGEC, we reveal that state-of-the-art GEC systems still lack sufficient robustness against context perturbations. Moreover, we propose a simple yet effective method for remitting this issue.", "keywords": "Grammatical Error Correction;Robustness", "primary_area": "", "supplementary_material": "", "author": "Yue Zhang;Leyang Cui;Enbo Zhao;Wei Bi;Shuming Shi", "authorids": "~Yue_Zhang12;~Leyang_Cui1;~Enbo_Zhao1;~Wei_Bi1;~Shuming_Shi1", "gender": "M;M;;F;M", "homepage": "https://hillzhang1999.github.io/;https://github.com/Nealcly;https://github.com/Whiplashzeb;https://scholar.google.com.hk/citations?hl=en&user=aSJcgQMAAAAJ&view_op=list_works&sortby=pubdate#d=gsc_md_iad&u=%2Fcitations%3Fview_op%3Dimport_lookup%26hl%3Den%26imq%3DWei%2BBi%26json%3D%26btnA%3D1;", "dblp": ";247/6181;;38/1163;s/ShumingShi", "google_scholar": "wYEAchYAAAAJ;6YVwZgkAAAAJ;;https://scholar.google.com.hk/citations?hl=en;Lg31AKMAAAAJ", "or_profile": "~Yue_Zhang12;~Leyang_Cui1;~Enbo_Zhao1;~Wei_Bi1;~Shuming_Shi1", "aff": "Suzhou University;Tencent AI Lab;;Hong Kong University of Science and Technology;Tencent AI Lab", "aff_domain": "suda.edu.cn;tencent.com;;ust.hk;tencent.com", "position": "MS student;Researcher;;PhD student;Principal Researcher", "bibtex": "@inproceedings{\nzhang2023robustgec,\ntitle={Robust{GEC}: Robust Grammatical Error Correction Against Subtle Context Perturbation},\nauthor={Yue Zhang and Leyang Cui and Enbo Zhao and Wei Bi and Shuming Shi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Srxf1V2jPa}\n}", "github": "", "project": "", "reviewers": "hW9g;3EQP;t6Xg", "site": "https://openreview.net/forum?id=Srxf1V2jPa", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;4", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-8457-0630;", "linkedin": ";;;;", "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Suzhou University;Tencent;Hong Kong University of Science and Technology", "aff_unique_dep": ";Tencent AI Lab;", "aff_unique_url": "https://www.suda.edu.cn;https://ai.tencent.com;https://www.ust.hk", "aff_unique_abbr": "Suda;Tencent AI Lab;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "SvmlxXMLYr", "title": "COUNT: COntrastive UNlikelihood Text Style Transfer for Text Detoxification", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Offensive and toxic text on social media platforms can lead to polarization and divisiveness within online communities and hinders constructive dialogue. Text detoxification is a crucial task in natural language processing to ensure the generation of non-toxic and safe text. Text detoxification is a special case of the Text Style Transfer (TST) problem, where an input text is rephrased to an output text that preserves its content while modifying the style (in this case to a more neutral, non-toxic style). State-of-the-art methods for detoxification use supervised training of encoder-decoder models to produce gold-standard outputs with a standard likelihood-based objective. However, it can be hard for these models to deviate from their pretrained auto-encoder identity mapping. While previous methods have used unlikelihood-based losses to penalize input-to-output copying of toxic content, these methods also unfortunately penalize non-toxic content in the input that would be fine to preserve in the output. To address these issues, we introduce a novel contrastive unlikelihood objective (COUNT) that directly contrasts the gold standard rephrasing with the identity input-to-output mapping to effectively isolate and focus learning on non-toxic style transfer. We benchmark COUNT on two parallel datasets, ParaDetox and APPDIA, showing that it achieves significant improvements in jointly combined fluency, content preservation, and detoxification (i.e., the highest \u201cJ\u201d score).", "keywords": "Text Style Transfer;Detoxification;Unlikelihood Training", "primary_area": "", "supplementary_material": "", "author": "Mohammad Mahdi Abdollah Pour;Parsa Farinneya;Manasa Bharadwaj;Nikhil Verma;Ali Pesaranghader;Scott Sanner", "authorids": "~Mohammad_Mahdi_Abdollah_Pour1;~Parsa_Farinneya1;~Manasa_Bharadwaj1;~Nikhil_Verma2;~Ali_Pesaranghader1;~Scott_Sanner1", "gender": "M;M;;M;;M", "homepage": "https://mahdiabdollahpour.github.io/;;;http://lihkinverma.github.io/portfolio;;http://d3m.mie.utoronto.ca/", "dblp": ";;;230/4600.html;;88/3374", "google_scholar": "P15R_U0AAAAJ;wzdtxokAAAAJ;;;;https://scholar.google.ca/citations?user=kB8UPNIAAAAJ", "or_profile": "~Mohammad_Mahdi_Abdollah_Pour1;~Parsa_Farinneya1;~Manasa_Bharadwaj1;~Nikhil_Verma2;~Ali_Pesaranghader1;~Scott_Sanner1", "aff": "Department of Mechanical and Industrial Engineering, University of Toronto, University of Toronto;University of Toronto;;LG Toronto AI Lab;;Department of Computer Science", "aff_domain": "mie.utoronto.ca;utoronto.ca;;lge.com;;cs.toronto.edu", "position": "MS student;MS student;;Researcher;;Cross-appointed", "bibtex": "@inproceedings{\npour2023count,\ntitle={{COUNT}: {CO}ntrastive {UN}likelihood Text Style Transfer for Text Detoxification},\nauthor={Mohammad Mahdi Abdollah Pour and Parsa Farinneya and Manasa Bharadwaj and Nikhil Verma and Ali Pesaranghader and Scott Sanner},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=SvmlxXMLYr}\n}", "github": "", "project": "", "reviewers": "oxw4;7tdq;yYYJ", "site": "https://openreview.net/forum?id=SvmlxXMLYr", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;5;3", "excitement": "3;3;4", "reproducibility": "3;4;2", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": "mohammad-mahdi-abdollah-pour-a48b008b/;;;lihkinVerma/;;", "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of Toronto;LG;Unknown Institution", "aff_unique_dep": "Department of Mechanical and Industrial Engineering;AI Lab;Department of Computer Science", "aff_unique_url": "https://www.utoronto.ca;https://www.lg.com;", "aff_unique_abbr": "U of T;LG;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Toronto;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Canada;South Korea;" }, { "id": "SwphsE7hYO", "title": "Retrieval-based Knowledge Transfer: An Effective Approach for Extreme Large Language Model Compression", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large-scale pre-trained language models (LLMs) have demonstrated exceptional performance in various natural language processing (NLP) tasks. However, the massive size of these models poses huge challenges for their deployment in real-world applications. While numerous model compression techniques have been proposed, most of them are not well-suited for achieving extreme model compression when there is a significant gap in model scale. In this paper, we introduce a novel compression paradigm called Retrieval-based Knowledge Transfer (RetriKT), which effectively transfers the knowledge of LLMs to extremely small-scale models (e.g., 1\\%). In particular, our approach extracts knowledge from LLMs to construct a knowledge store, from which the small-scale model can retrieve relevant information and leverage it for effective inference. To improve the quality of the model, soft prompt tuning and Proximal Policy Optimization (PPO) reinforcement learning techniques are employed. Extensive experiments are conducted on low-resource tasks from SuperGLUE and GLUE benchmarks. The results demonstrate that the proposed approach significantly enhances the performance of small-scale models by leveraging the knowledge from LLMs.", "keywords": "Model Compression;LLM", "primary_area": "", "supplementary_material": "", "author": "Jiduan Liu;Jiahao Liu;Qifan Wang;Jingang Wang;Xunliang Cai;Dongyan Zhao;Ran Lucien Wang;Rui Yan", "authorids": "~Jiduan_Liu1;~Jiahao_Liu6;~Qifan_Wang2;~Jingang_Wang1;~Xunliang_Cai1;~Dongyan_Zhao1;~Ran_Lucien_Wang1;~Rui_Yan2", "gender": "M;M;M;M;M;M;M;M", "homepage": ";https://hit-computer.github.io/;https://wqfcr.github.io/;https://sites.google.com/site/bitwjg/;https://maimai.cn/contact/share/card?u=fudmdwckxlwi;https://space.bilibili.com/450195232?spm_id_from=333.1007.0.0;https://gsai.ruc.edu.cn/english/ruiyan;https://www.wict.pku.edu.cn/zhaodongyan/en/", "dblp": "264/0050;;33/8610;59/7807;;;19/2405-1;63/1870", "google_scholar": "-p0i9UMAAAAJ;https://scholar.google.com.hk/citations?user=IvImF70AAAAJ;LrSyLosAAAAJ;janU39IAAAAJ;;;eLw6g-UAAAAJ;lhR8-68AAAAJ", "or_profile": "~Jiduan_Liu1;~Jiahao_Liu6;~Qifan_Wang2;~Jingang_Wang1;~Xunliang_Cai1;~Ran_Lucien_Wang1;~Rui_Yan2;~Dongyan_Zhao2", "aff": "Peking University;Meituan;Meta AI;Meituan;Meituan;;Renmin University of China;Peking University", "aff_domain": "pku.edu.cn;meituan.com;fb.com;meituan.com;meituan.com;;ruc.edu.cn;pku.edu.cn", "position": "MS student;Researcher;Principal Researcher;Researcher;Principal Researcher;;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nliu2023retrievalbased,\ntitle={Retrieval-based Knowledge Transfer: An Effective Approach for Extreme Large Language Model Compression},\nauthor={Jiduan Liu and Jiahao Liu and Qifan Wang and Jingang Wang and Xunliang Cai and Dongyan Zhao and Ran Lucien Wang and Rui Yan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=SwphsE7hYO}\n}", "github": "", "project": "", "reviewers": "6ByT;CJV8;MYsE", "site": "https://openreview.net/forum?id=SwphsE7hYO", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "4;3;4", "reproducibility": "3;4;3", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-7570-5756;;;;0000-0002-3356-6823;", "linkedin": ";;;;;;;", "aff_unique_index": "0;1;2;1;1;3;0", "aff_unique_norm": "Peking University;Meituan;Meta;Renmin University of China", "aff_unique_dep": ";;Meta AI;", "aff_unique_url": "http://www.pku.edu.cn;https://www.meituan.com;https://meta.com;http://www.ruc.edu.cn", "aff_unique_abbr": "Peking U;Meituan;Meta;RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "China;United States" }, { "id": "SxrA1okPXY", "title": "Event-Location Tracking in Narratives: A Case Study on Holocaust Testimonies", "track": "main", "status": "Long Main", "tldr": "", "abstract": "This work focuses on the spatial dimension of narrative understanding and presents the task of event-location tracking in narrative texts. The task intends to extract the sequence of locations where the narrative is set through its progression. We present several architectures for the task that seeks to model the global structure of the sequence, with varying levels of context awareness. We compare these methods to several baselines, including the use of strong methods applied over narrow contexts. We also develop methods for the generation of location embeddings and show that learning to predict a sequence of continuous embeddings, rather than a string of locations, is advantageous in terms of performance. We focus on the test case of Holocaust survivor testimonies. We argue for the moral and historical importance of studying this dataset in computational means and that it provides a unique case of a large set of narratives with a relatively restricted set of location trajectories. Our results show that models that are aware of the larger context of the narrative can generate more accurate location chains. We further corroborate the effectiveness of our methods by showing similar trends from experiments on an additional domain.", "keywords": "Location tracking;Narrative understanding;Holocaust testimonies", "primary_area": "", "supplementary_material": "", "author": "Eitan Wagner;Renana Keydar;Omri Abend", "authorids": "~Eitan_Wagner1;~Renana_Keydar1;~Omri_Abend1", "gender": "M;F;M", "homepage": ";https://renanakeydar.huji.ac.il;http://www.cs.huji.ac.il/~oabend/", "dblp": ";;30/8159", "google_scholar": ";;https://scholar.google.com.tw/citations?user=BD_hRzYAAAAJ", "or_profile": "~Eitan_Wagner1;~Renana_Keydar1;~Omri_Abend1", "aff": "Hebrew University of Jerusalem;Hebrew University of Jerusalem;Hebrew University of Jerusalem", "aff_domain": "huji.ac.il;huji.ac.il;huji.ac.il", "position": "PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nwagner2023eventlocation,\ntitle={Event-Location Tracking in Narratives: A Case Study on Holocaust Testimonies},\nauthor={Eitan Wagner and Renana Keydar and Omri Abend},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=SxrA1okPXY}\n}", "github": "", "project": "", "reviewers": "vwcJ;VE7r;adQB", "site": "https://openreview.net/forum?id=SxrA1okPXY", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;4;4", "excitement": "4;3;3", "reproducibility": "3;4;4", "correctness": "4;3;4", "rating_avg": 2.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "eitan-wagner-125446216/;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Hebrew University of Jerusalem", "aff_unique_dep": "", "aff_unique_url": "https://www.huji.ac.il", "aff_unique_abbr": "HUJI", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Jerusalem", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Israel" }, { "id": "SyEwsV52Dk", "title": "Evaluation Metrics in the Era of GPT-4: Reliably Evaluating Large Language Models on Sequence to Sequence Tasks", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Large Language Models (LLMs) evaluation is a patchy and inconsistent landscape, and it is becoming clear that the quality of automatic evaluation metrics is not keeping up with the pace of development of generative models. We aim to improve the understanding of current models' performance by providing a preliminary and hybrid evaluation on a range of open and closed-source generative LLMs on three NLP benchmarks: text summarisation, text simplification and grammatical error correction (GEC), using both automatic and human evaluation. We also explore the potential of the recently released GPT-4 to act as an evaluator. We find that ChatGPT consistently outperforms many other popular models according to human reviewers on the majority of metrics, while scoring much more poorly when using classic automatic evaluation metrics. We also find that human reviewers rate the gold reference as much worse than the best models' outputs, indicating the poor quality of many popular benchmarks. Finally, we find that GPT-4 is capable of ranking models' outputs in a way which aligns reasonably closely to human judgement despite task-specific variations, with a lower alignment in the GEC task.", "keywords": "evaluation;human evaluation;LLM;summarization;simplification;grammatical error correction;ChatGPT;GPT-4;Sequence to sequence", "primary_area": "", "supplementary_material": "", "author": "Andrea Sottana;Bin Liang;Kai Zou;Zheng Yuan", "authorids": "~Andrea_Sottana1;~Bin_Liang8;~Kai_Zou2;~Zheng_Yuan4", "gender": ";M;M;", "homepage": ";https://github.com/BinLiang2021;https://www.linkedin.com/in/kz4225/;", "dblp": ";;135/509201;", "google_scholar": ";;https://scholar.google.com/citations?hl=en;", "or_profile": "~Andrea_Sottana1;~Bin_Liang8;~Kai_Zou2;~Zheng_Yuan4", "aff": ";Netmind.AI ;Protagolabs Inc;", "aff_domain": ";netmind.ai;protagolabs.ai;", "position": ";Researcher;Founder CEO;", "bibtex": "@inproceedings{\nsottana2023evaluation,\ntitle={Evaluation Metrics in the Era of {GPT}-4: Reliably Evaluating Large Language Models on Sequence to Sequence Tasks},\nauthor={Andrea Sottana and Bin Liang and Kai Zou and Zheng Yuan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=SyEwsV52Dk}\n}", "github": "", "project": "", "reviewers": "JKmf;Etgw;EMpQ", "site": "https://openreview.net/forum?id=SyEwsV52Dk", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;5;4", "excitement": "4;3;3", "reproducibility": "4;2;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;kz4225/;", "aff_unique_index": "0;1", "aff_unique_norm": "Netmind;Protagolabs", "aff_unique_dep": ";", "aff_unique_url": "https://www.netmind.ai;", "aff_unique_abbr": "Netmind;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Spain;United States" }, { "id": "SzH7d4617q", "title": "The Intended Uses of Automated Fact-Checking Artefacts: Why, How and Who", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Automated fact-checking is often presented as an epistemic tool that fact-checkers, social media consumers, and other stakeholders can use to fight misinformation. Nevertheless, few papers thoroughly discuss \\textit{how}. We document this by analysing 100 highly-cited papers, and annotating epistemic elements related to intended use, i.e.,\\ means, ends, and stakeholders. We find that narratives leaving out some of these aspects are common, that many papers propose inconsistent means and ends, and that the feasibility of suggested strategies rarely has empirical backing. We argue that this vagueness actively hinders the technology from reaching its goals, as it encourages overclaiming, limits criticism, and prevents stakeholder feedback. Accordingly, we provide several recommendations for thinking and writing about the use of fact-checking artefacts.", "keywords": "fact-checking;automated fact-checking;content analysis;intended use;natural language processing;artefacts", "primary_area": "", "supplementary_material": "", "author": "Michael Sejr Schlichtkrull;Nedjma OUSIDHOUM;Andreas Vlachos", "authorids": "~Michael_Sejr_Schlichtkrull1;~Nedjma_OUSIDHOUM1;~Andreas_Vlachos1", "gender": "M;F;M", "homepage": "http://michschli.github.io/;https://nedjmaou.github.io/;http://andreasvlachos.github.io/", "dblp": "186/7091;248/2832;18/1071-1", "google_scholar": "z8YvWyEAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.es/citations?user=XjWnyM4AAAAJ", "or_profile": "~Michael_Sejr_Schlichtkrull1;~Nedjma_OUSIDHOUM1;~Andreas_Vlachos1", "aff": "University of Cambridge;University of Cambridge;University of Cambridge", "aff_domain": "cam.ac.uk;cam.ac.uk;cam.ac.uk", "position": "Postdoc;Postdoc;Full Professor", "bibtex": "@inproceedings{\nschlichtkrull2023the,\ntitle={The Intended Uses of Automated Fact-Checking Artefacts: Why, How and Who},\nauthor={Michael Sejr Schlichtkrull and Nedjma OUSIDHOUM and Andreas Vlachos},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=SzH7d4617q}\n}", "github": "", "project": "", "reviewers": "XbZ4;gXnU;21ex", "site": "https://openreview.net/forum?id=SzH7d4617q", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;4", "excitement": "5;4;2", "reproducibility": "4;5;4", "correctness": "4;4;1", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-2123-5071", "linkedin": ";;andreas-vlachos-70ab391", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Cambridge", "aff_unique_dep": "", "aff_unique_url": "https://www.cam.ac.uk", "aff_unique_abbr": "Cambridge", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "T3kZcQ2ivs", "title": "Are Embedded Potatoes Still Vegetables? On the Limitations of WordNet Embeddings for Lexical Semantics", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Knowledge Base Embedding (KBE) models have been widely used to encode structured information from knowledge bases, including WordNet. However, the existing literature has predominantly focused on link prediction as the evaluation task, often neglecting exploration of the models' semantic capabilities. In this paper, we investigate the potential disconnect between the performance of KBE models of WordNet on link prediction and their ability to encode semantic information, highlighting the limitations of current evaluation protocols. Our findings reveal that some top-performing KBE models on the WN18RR benchmark exhibit subpar results on two semantic tasks and two downstream tasks. These results demonstrate the inadequacy of link prediction benchmarks for evaluating the semantic capabilities of KBE models, suggesting the need for a more targeted assessment approach.", "keywords": "Knowledge Base Embedding;Lexical Semantics;WordNet;Link Prediction", "primary_area": "", "supplementary_material": "", "author": "Xuyou Cheng;Michael Sejr Schlichtkrull;Guy Emerson", "authorids": "~Xuyou_Cheng1;~Michael_Sejr_Schlichtkrull1;~Guy_Emerson1", "gender": "M;M;M", "homepage": "https://github.com/yoyouC;http://michschli.github.io/;https://www.languagesciences.cam.ac.uk/directory/guy-emerson", "dblp": ";186/7091;182/2001.html", "google_scholar": ";z8YvWyEAAAAJ;https://scholar.google.co.uk/citations?user=v8dvAc0AAAAJ", "or_profile": "~Xuyou_Cheng1;~Michael_Sejr_Schlichtkrull1;~Guy_Emerson1", "aff": ";University of Cambridge;University of Cambridge", "aff_domain": ";cam.ac.uk;cam.ac.uk", "position": ";Postdoc;Principal Researcher", "bibtex": "@inproceedings{\ncheng2023are,\ntitle={Are Embedded Potatoes Still Vegetables? On the Limitations of WordNet Embeddings for Lexical Semantics},\nauthor={Xuyou Cheng and Michael Sejr Schlichtkrull and Guy Emerson},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=T3kZcQ2ivs}\n}", "github": "", "project": "", "reviewers": "UJC6;P9Hk;YVuz", "site": "https://openreview.net/forum?id=T3kZcQ2ivs", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "4;4;4", "reproducibility": "3;4;4", "correctness": "3;4;3", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-3136-9682", "linkedin": ";;", "aff_unique_index": "0;0", "aff_unique_norm": "University of Cambridge", "aff_unique_dep": "", "aff_unique_url": "https://www.cam.ac.uk", "aff_unique_abbr": "Cambridge", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "T3n9nbeIKc", "title": "Dataset Bias Mitigation in Multiple-Choice Visual Question Answering and Beyond", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Vision-language (VL) understanding tasks evaluate models' comprehension of complex visual scenes through multiple-choice questions. However, we have identified two dataset biases that models can exploit as shortcuts to resolve various VL tasks correctly without proper understanding. The first type of dataset bias is Unbalanced Matching bias, where the correct answer overlaps the question and image more than the incorrect answers. The second type of dataset bias is Distractor Similarity bias, where incorrect answers are overly dissimilar to the correct answer but significantly similar to other incorrect answers within the same sample. To address these dataset biases, we first propose Adversarial Data Synthesis (ADS) to generate synthetic training and debiased evaluation data. We then introduce Intra-sample Counterfactual Training (ICT) to assist models in utilizing the synthesized training data, particularly the counterfactual data, via focusing on intra-sample differentiation. Extensive experiments demonstrate the effectiveness of ADS and ICT in consistently improving model performance across different benchmarks, even in domain-shifted scenarios.", "keywords": "vision language;vcr;vqa;snli-ve;visual question answering;commonsense reasoning;pretraining;multimodal;robust;low-shot;zero-shot;domain-shift;debiased;shortcut", "primary_area": "", "supplementary_material": "", "author": "Zhecan Wang;Long Chen;Haoxuan You;Keyang Xu;Yicheng He;Wenhao Li;Noel C Codella;Kai-Wei Chang;Shih-Fu Chang", "authorids": "~Zhecan_Wang2;~Long_Chen8;~Haoxuan_You1;~Keyang_Xu2;~Yicheng_He1;~Wenhao_Li5;~Noel_C_Codella1;~Kai-Wei_Chang1;~Shih-Fu_Chang3", "gender": "M;M;M;M;;M;M;M;M", "homepage": "https://www.zhecanwang.com/;https://zjuchenlong.github.io/;https://hxyou.github.io/;https://rivercold.github.io/;;https://www.linkedin.com/in/benningtonli/;http://www.noelcodella.com/;http://kwchang.net;http://www.ee.columbia.edu/~sfchang/", "dblp": "167/4251;64/5725-16;210/2628;154/3241.html;;;;18/2428;c/ShihFuChang", "google_scholar": "uqHPnmgAAAAJ;https://scholar.google.com.sg/citations?user=-gtmMpIAAAAJ;BhysChMAAAAJ;byJxOsEAAAAJ;ct_AbrUAAAAJ;;8BnjC-4AAAAJ;fqDBtzYAAAAJ;OMVTRscAAAAJ", "or_profile": "~Zhecan_Wang2;~Long_Chen8;~Haoxuan_You1;~Keyang_Xu2;~Yicheng_He1;~Wenhao_Li5;~Noel_C_Codella1;~Kai-Wei_Chang1;~Shih-Fu_Chang3", "aff": "Columbia University;Columbia University;Columbia University;Columbia University;;Columbia University;Microsoft;Amazon;Columbia University", "aff_domain": "columbia.edu;columbia.edu;columbia.edu;cs.columbia.edu;;columbia.edu;microsoft.com;amazon.com;ee.columbia.edu", "position": "PhD student;Postdoc;PhD student;PhD student;;MS student;Principal Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nwang2023dataset,\ntitle={Dataset Bias Mitigation in Multiple-Choice Visual Question Answering and Beyond},\nauthor={Zhecan Wang and Long Chen and Haoxuan You and Keyang Xu and Yicheng He and Wenhao Li and Noel C Codella and Kai-Wei Chang and Shih-Fu Chang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=T3n9nbeIKc}\n}", "github": "", "project": "", "reviewers": "32Fg;PvtF;4SCZ", "site": "https://openreview.net/forum?id=T3n9nbeIKc", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;2;4", "excitement": "3;3;4", "reproducibility": "4;3;4", "correctness": "4;3;4", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0003-7785-4637;0000-0001-6148-9709;;;;;;0000-0001-5365-0072;", "linkedin": "jameszhecanwang/;;;;;;noel-c-f-codella-ph-d-1b1b1723/;kai-wei-chang-41239040;", "aff_unique_index": "0;0;0;0;0;1;2;0", "aff_unique_norm": "Columbia University;Microsoft;Amazon", "aff_unique_dep": ";Microsoft Corporation;Amazon.com, Inc.", "aff_unique_url": "https://www.columbia.edu;https://www.microsoft.com;https://www.amazon.com", "aff_unique_abbr": "Columbia;Microsoft;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "T6GJ2Y0dn7", "title": "Intersectional Stereotypes in Large Language Models: Dataset and Analysis", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Despite many stereotypes targeting intersectional demographic groups, prior studies on stereotypes within Large Language Models (LLMs) primarily focus on broader, individual categories. This research bridges this gap by introducing a novel dataset of intersectional stereotypes, curated with the assistance of the ChatGPT model and manually validated. Moreover, this paper offers a comprehensive analysis of intersectional stereotype propagation in three contemporary LLMs by leveraging this dataset. The findings underscore the urgency of focusing on intersectional biases in ongoing efforts to reduce stereotype prevalence in LLMs.", "keywords": "Stereotype Examination;Intersectional Stereotype;Dataset", "primary_area": "", "supplementary_material": "", "author": "Weicheng Ma;Brian Chiang;Tong Wu;Lili Wang;Soroush Vosoughi", "authorids": "~Weicheng_Ma2;~Brian_Chiang1;~Tong_Wu17;~Lili_Wang2;~Soroush_Vosoughi1", "gender": "M;M;F;;", "homepage": "https://www.linkedin.com/in/weicheng-ma-83a2b11a1/;https://www.linkedin.com/in/brianchiang8/;;;https://www.cs.dartmouth.edu/~soroush/", "dblp": "127/3100;;;;01/1709", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;;vJXfvigAAAAJ;45DAXkwAAAAJ", "or_profile": "~Weicheng_Ma2;~Brian_Chiang1;~Tong_Wu17;~Lili_Wang2;~Soroush_Vosoughi1", "aff": "Dartmouth College;Dartmouth College;Mount Holyoke College;Dartmouth College;Dartmouth College", "aff_domain": "dartmouth.edu;dartmouth.edu;mtholyoke.edu;dartmouth.edu;dartmouth.edu", "position": "PhD student;Undergrad student;Undergrad student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nma2023intersectional,\ntitle={Intersectional Stereotypes in Large Language Models: Dataset and Analysis},\nauthor={Weicheng Ma and Brian Chiang and Tong Wu and Lili Wang and Soroush Vosoughi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=T6GJ2Y0dn7}\n}", "github": "", "project": "", "reviewers": "k5qo;PMWn;kDnu", "site": "https://openreview.net/forum?id=T6GJ2Y0dn7", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;3;3", "excitement": "3;3;3", "reproducibility": "1;3;3", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-7494-9874;;;;0000-0002-2564-8909", "linkedin": "weicheng-ma-83a2b11a1/;;tong-wu-ab0072217/;lili-wang-752552a5/;", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Dartmouth College;Mount Holyoke College", "aff_unique_dep": ";", "aff_unique_url": "https://www.dartmouth.edu;https://www.mtholyoke.edu", "aff_unique_abbr": "Dartmouth;MHC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "T8ABT8q3FS", "title": "SegAugment: Maximizing the Utility of Speech Translation Data with Segmentation-based Augmentations", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "End-to-end Speech Translation is hindered by a lack of available data resources. While most of them are based on documents, a sentence-level version is available, which is however single and static, potentially impeding the usefulness of the data. We propose a new data augmentation strategy, SegAugment, to address this issue by generating multiple alternative sentence-level versions of a dataset. Our method utilizes an Audio Segmentation system, which re-segments the speech of each document with different length constraints, after which we obtain the target text via alignment methods. Experiments demonstrate consistent gains across eight language pairs in MuST-C, with an average increase of 2.5 BLEU points, and up to 5 BLEU for low-resource scenarios in mTEDx. Furthermore, when combined with a strong system, SegAugment obtains state-of-the-art results in MuST-C. Finally, we show that the proposed method can also successfully augment sentence-level datasets, and that it enables Speech Translation models to close the gap between the manual and automatic segmentation at inference time.", "keywords": "Speech Translation;Data Augmentation", "primary_area": "", "supplementary_material": "", "author": "Ioannis Tsiamas;Jos\u00e9 A.R. Fonollosa;Marta R. Costa-juss\u00e0", "authorids": "~Ioannis_Tsiamas1;~Jos\u00e9_A.R._Fonollosa1;~Marta_R._Costa-juss\u00e01", "gender": "M;M;F", "homepage": ";https://futur.upc.edu/178188;https://www.costa-jussa.com", "dblp": "292/4394;68/1326.html;17/2183", "google_scholar": "9XW4Md4AAAAJ;nL-pZh8AAAAJ;ESqQ7FoAAAAJ", "or_profile": "~Ioannis_Tsiamas1;~Jos\u00e9_A.R._Fonollosa1;~Marta_R._Costa-juss\u00e01", "aff": "Dolby;Universitat Polit\u00e8cnica de Catalunya;Meta", "aff_domain": "dolby.com;upc.edu;fb.com", "position": "Intern;Full Professor;Research Scientist", "bibtex": "@inproceedings{\ntsiamas2023segaugment,\ntitle={SegAugment: Maximizing the Utility of Speech Translation Data with Segmentation-based Augmentations},\nauthor={Ioannis Tsiamas and Jos{\\'e} A.R. Fonollosa and Marta R. Costa-juss{\\`a}},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=T8ABT8q3FS}\n}", "github": "", "project": "", "reviewers": "n8SA;bbzS;Li15;uvqE", "site": "https://openreview.net/forum?id=T8ABT8q3FS", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;2;4;4", "excitement": "3;4;4;3", "reproducibility": "3;4;4;4", "correctness": "3;4;4;5", "rating_avg": 4.0, "confidence_avg": 3.5, "excitement_avg": 3.5, "reproducibility_avg": 3.75, "correctness_avg": 4.0, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-1049-2515;0000-0001-9513-7939;", "linkedin": "i-tsiamas/;jarfo;", "aff_unique_index": "0;1;2", "aff_unique_norm": "Dolby Laboratories;Universitat Polit\u00e8cnica de Catalunya;Meta", "aff_unique_dep": ";;Meta Platforms, Inc.", "aff_unique_url": "https://www.dolby.com;https://www.upc.edu;https://meta.com", "aff_unique_abbr": "Dolby;UPC;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;Spain" }, { "id": "T9jJsFUGtI", "title": "Citance-Contextualized Summarization of Scientific Papers", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Current approaches to automatic summarization of scientific papers generate informative summaries in the form of abstracts. However, abstracts are not intended to show the relationship between a paper and the references cited in it. We propose a new contextualized summarization approach that can generate an informative summary conditioned on a given sentence containing the citation of a reference (a so-called ``citance''). This summary outlines content of the cited paper relevant to the citation location. Thus, our approach extracts and models the citances of a paper, retrieves relevant passages from cited papers, and generates abstractive summaries tailored to each citance. We evaluate our approach using **Webis-Context-SciSumm-2023**, a new dataset containing 540K computer science papers and 4.6M citances therein.", "keywords": "Summarization;Scholarly Document Processing;Scientific Papers;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "Shahbaz Syed;Ahmad Dawar Hakimi;Khalid Al Khatib;Martin Potthast", "authorids": "~Shahbaz_Syed1;~Ahmad_Dawar_Hakimi1;~Khalid_Al_Khatib1;~Martin_Potthast1", "gender": ";M;M;M", "homepage": ";https://adhakimi.github.io/;https://khalid-alkhatib.github.io/;http://www.temir.org", "dblp": ";270/1995.html;31/8936;87/6573", "google_scholar": ";6Q8I1okAAAAJ;https://scholar.google.com/citations?hl=en;a0W8R-cAAAAJ", "or_profile": "~Shahbaz_Syed1;~Ahmad_Dawar_Hakimi1;~Khalid_Al_Khatib1;~Martin_Potthast1", "aff": ";Universit\u00e4t Leipzig;University of Groningen;Leipzig University and ScaDS.AI", "aff_domain": ";uni-leipzig.de;rug.nl;uni-leipzig.de", "position": ";MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nsyed2023citancecontextualized,\ntitle={Citance-Contextualized Summarization of Scientific Papers},\nauthor={Shahbaz Syed and Ahmad Dawar Hakimi and Khalid Al Khatib and Martin Potthast},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=T9jJsFUGtI}\n}", "github": "", "project": "", "reviewers": "mxe2;Gvqo;jGux", "site": "https://openreview.net/forum?id=T9jJsFUGtI", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "3;3;3", "reproducibility": "3;4;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0009-0006-7255-5349;0000-0003-2451-0665", "linkedin": ";adhakimi/;khalid-alkhatib/;potthast", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Leipzig;University of Groningen;Leipzig University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-leipzig.de;https://www.rug.nl;https://www.uni-leipzig.de", "aff_unique_abbr": "Uni Leipzig;RUG;Uni Leipzig", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Germany;Netherlands" }, { "id": "T9wuVnNa5v", "title": "SIR-ABSC: Incorporating Syntax into RoBERTa-based Sentiment Analysis Models with a Special Aggregator Token", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We present a simple, but effective method to incorporate syntactic dependency information directly into transformer-based language models (e.g. RoBERTa) for tasks such as Aspect-Based Sentiment Classification (ABSC), where the desired output depends on specific input tokens. In contrast to prior approaches to ABSC that capture syntax by combining language models with graph neural networks over dependency trees, our model, Syntax-Integrated RoBERTa for ABSC (SIR-ABSC) incorporates syntax directly into the language model by using a novel aggregator token. Yet, SIR-ABSC outperforms these more complex models, yielding new state-of-the-art results on ABSC.", "keywords": "Aspect-based sentiment analysis;Pre-trained Language Models;RoBERTa", "primary_area": "", "supplementary_material": "", "author": "IKHYUN CHO;Yoonhwa Jung;Julia Hockenmaier", "authorids": "~IKHYUN_CHO1;~Yoonhwa_Jung1;~Julia_Hockenmaier1", "gender": ";F;M", "homepage": ";https://cs.illinois.edu/directory/profile/juliahmr;https://ihcho2.github.io/", "dblp": ";64/2448;", "google_scholar": ";https://scholar.google.com.tw/citations?user=iIiVrrQAAAAJ;", "or_profile": "~Yoonhwa_Jung1;~Julia_Hockenmaier1;~IKHYUN_CHO2", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;Department of Computer Science", "aff_domain": "illinois.edu;illinois.edu;cs.illinois.edu", "position": "PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\ncho2023sirabsc,\ntitle={{SIR}-{ABSC}: Incorporating Syntax into Ro{BERT}a-based Sentiment Analysis Models with a Special Aggregator Token},\nauthor={IKHYUN CHO and Yoonhwa Jung and Julia Hockenmaier},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=T9wuVnNa5v}\n}", "github": "", "project": "", "reviewers": "XaBJ;hL41;HrVu", "site": "https://openreview.net/forum?id=T9wuVnNa5v", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;4;4", "excitement": "3;4;2", "reproducibility": "3;4;3", "correctness": "2;4;2", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 2.6666666666666665, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-6926-4077;;", "linkedin": ";;%EC%9D%B5%ED%98%84-%EC%A1%B0-1705571b8/", "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;Unknown Institution", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://illinois.edu;", "aff_unique_abbr": "UIUC;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States;" }, { "id": "THr9aJ3z9k", "title": "Quick Back-Translation for Unsupervised Machine Translation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The field of unsupervised machine translation has seen significant advancement from the marriage of the Transformer and the back-translation algorithm. The Transformer is a powerful generative model, and back-translation leverages Transformer's high-quality translations for iterative self-improvement. However, the Transformer is encumbered by the run-time of autoregressive inference during back-translation, and back-translation is limited by a lack of synthetic data efficiency. We propose a two-for-one improvement to Transformer back-translation: Quick Back-Translation (QBT). QBT re-purposes the encoder as a generative model, and uses encoder-generated sequences to train the decoder in conjunction with the original autoregressive back-translation step, improving data throughput and utilization. Experiments on various WMT benchmarks demonstrate that a relatively small number of refining steps of QBT improve current unsupervised machine translation models, and that QBT dramatically outperforms standard back-translation only method in terms of training efficiency for comparable translation qualities.", "keywords": "unsupervised machine translation;back-translation;non-autoregressive generation;Transformer", "primary_area": "", "supplementary_material": "", "author": "Benjamin Lincoln Brimacombe;Jiawei Zhou", "authorids": "~Benjamin_Lincoln_Brimacombe1;~Jiawei_Zhou1", "gender": "M;M", "homepage": ";https://joezhouai.com/", "dblp": ";126/4991-1", "google_scholar": ";https://scholar.google.com/citations?hl=en", "or_profile": "~Benjamin_Lincoln_Brimacombe1;~Jiawei_Zhou1", "aff": ";Harvard University", "aff_domain": ";harvard.edu", "position": ";PhD student", "bibtex": "@inproceedings{\nbrimacombe2023quick,\ntitle={Quick Back-Translation for Unsupervised Machine Translation},\nauthor={Benjamin Lincoln Brimacombe and Jiawei Zhou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=THr9aJ3z9k}\n}", "github": "", "project": "", "reviewers": "zNyd;w5DB;7rp5", "site": "https://openreview.net/forum?id=THr9aJ3z9k", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;4;4", "reproducibility": "4;4;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-5590-6270", "linkedin": "benjamin-brimacombe/;jiawei-zhou/", "aff_unique_index": "0", "aff_unique_norm": "Harvard University", "aff_unique_dep": "", "aff_unique_url": "https://www.harvard.edu", "aff_unique_abbr": "Harvard", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "TKGgLVYRqJ", "title": "Seeing through the mess: evolutionary dynamics of lexical polysemy", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Evidently, words can have multiple senses. For example, the word mess refers to a place to have food or to a confusing situation. How exactly multiple senses emerge is less clear. In this work, we propose and analyze a mathematical model of the evolution of lexical meaning to investigate mechanisms leading to polysemy. \n\nThis model features factors that have been discussed to impact the semantic processing and transmission of words: word frequency, non-conformism, and semantic discriminability. We formally derive conditions under which a sense of a word tends to diversify itself into multiple senses that coexist stably.\n\nThe model predicts that diversification is promoted by low frequency, a strong bias for non-conformist usage, and high semantic discriminability. We statistically validate these predictions with historical language data covering semantic developments of a set of English words. Multiple alternative measures are used to operationalize each variable involved, and we confirm the predicted tendencies for twelve combinations of measures.", "keywords": "polysemy;language change;mathematical modeling;adaptive dynamics;senses;frequency;non-conformism;discriminability", "primary_area": "", "supplementary_material": "", "author": "Andreas Baumann;Andreas Stephan;Benjamin Roth", "authorids": "~Andreas_Baumann1;~Andreas_Stephan1;~Benjamin_Roth2", "gender": "M;M;", "homepage": "https://evsl.univie.ac.at/digital-philology/baumann/;https://andst.github.io/;https://www.benjaminroth.net", "dblp": ";91/1604;63/8171-1", "google_scholar": ";https://scholar.google.com/citations?hl=en;", "or_profile": "~Andreas_Baumann1;~Andreas_Stephan1;~Benjamin_Roth2", "aff": "Universit\u00e4t Vienna;Universit\u00e4t Vienna;Universit\u00e4t Vienna", "aff_domain": "univie.ac.at;univie.ac.at;univie.ac.at", "position": "Researcher;PhD student;Full Professor", "bibtex": "@inproceedings{\nbaumann2023seeing,\ntitle={Seeing through the mess: evolutionary dynamics of lexical polysemy},\nauthor={Andreas Baumann and Andreas Stephan and Benjamin Roth},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=TKGgLVYRqJ}\n}", "github": "", "project": "", "reviewers": "gMYd;SeWs;VVLs", "site": "https://openreview.net/forum?id=TKGgLVYRqJ", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "4;2;3", "reproducibility": "4;3;3", "correctness": "5;2;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";andst/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Vienna", "aff_unique_dep": "", "aff_unique_url": "https://univie.ac.at", "aff_unique_abbr": "UV", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Austria" }, { "id": "TKo2JXw7vL", "title": "Large Language Models Meet Harry Potter: A Dataset for Aligning Dialogue Agents with Characters", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In recent years, Dialogue-style Large Language Models (LLMs) such as ChatGPT and GPT4 have demonstrated immense potential in constructing open-domain dialogue agents. However, aligning these agents with specific characters or individuals remains a considerable challenge due to the complexities of character representation and the lack of comprehensive annotations. In this paper, we introduce the Harry Potter Dialogue (HPD) dataset, designed to advance the study of dialogue agents and character alignment. The dataset encompasses all dialogue sessions (in both English and Chinese) from the Harry Potter series and is annotated with vital background information, including dialogue scenes, speakers, character relationships, and attributes. These extensive annotations may empower LLMs to unlock character-driven dialogue capabilities. Furthermore, it can serve as a universal benchmark for evaluating how well can a LLM aligning with a specific character. We benchmark LLMs on HPD using both fine-tuning and in-context learning settings. Evaluation results reveal that although there is substantial room for improvement in generating high-quality, character-aligned responses, the proposed dataset is valuable in guiding models toward responses that better align with the character of Harry Potter.", "keywords": "Personalized dialogue systems;dataset", "primary_area": "", "supplementary_material": "", "author": "Nuo Chen;Yan Wang;Haiyun Jiang;Deng Cai;Yuhan Li;ziyang chen;Longyue Wang;Jia Li", "authorids": "~Nuo_Chen1;~Yan_Wang17;~Haiyun_Jiang1;~Deng_Cai1;~Yuhan_Li3;~ziyang_chen8;~Longyue_Wang3;~Jia_Li4", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://jerrynchen.github.io/;https://libertywing.github.io/yanwang.github.io/;;https://jcyk.github.io/;https://scholar.google.com/citations?user=c8DzpkAAAAAJ&hl=zh-CN;https://github.com/ZiyangChan;http://longyuewang.com/;https://sites.google.com/view/lijia", "dblp": "135/5622-1;59/2227-60;;c/DCai-2;116/8661-1;;127/3421;23/6950-9", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;fk684xEAAAAJ;KpbRLYcAAAAJ;c8DzpkAAAAAJ;;r1ctChkAAAAJ;1gSbcYoAAAAJ", "or_profile": "~Nuo_Chen1;~Yan_Wang17;~Haiyun_Jiang1;~Deng_Cai1;~Yuhan_Li3;~ziyang_chen8;~Longyue_Wang3;~Jia_Li4", "aff": "Hong Kong University of Science and Technology;miHoYo;Tencent AI Lab;Tencent AI Lab;Nankai University;Tencent;Tencent AI Lab;Hong Kong University of Science and Technology (Guangzhou)", "aff_domain": "hkust.edu;mihoyo.com;tencent.com;tencent.com;nankai.edu.cn;tencent.com;tencent.com;ust.hk", "position": "PhD student;Research Scientist;Researcher;Research Scientist;MS student;Researcher;Senior Researcher;Assistant Professor", "bibtex": "@inproceedings{\nchen2023large,\ntitle={Large Language Models Meet Harry Potter: A Dataset for Aligning Dialogue Agents with Characters},\nauthor={Nuo Chen and Yan Wang and Haiyun Jiang and Deng Cai and Yuhan Li and ziyang chen and Longyue Wang and Jia Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=TKo2JXw7vL}\n}", "github": "", "project": "", "reviewers": "R7XD;ZU76;Jmrm", "site": "https://openreview.net/forum?id=TKo2JXw7vL", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "4;3;4", "reproducibility": "3;4;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0003-1324-5819;;0000-0002-9062-6183;0000-0002-6362-4385", "linkedin": ";;;;;;vincentwang0229/;", "aff_unique_index": "0;1;2;2;3;2;2;0", "aff_unique_norm": "Hong Kong University of Science and Technology;miHoYo;Tencent;Nankai University", "aff_unique_dep": ";;Tencent AI Lab;", "aff_unique_url": "https://www.ust.hk;https://www.mihoyo.com;https://ai.tencent.com;http://www.nankai.edu.cn", "aff_unique_abbr": "HKUST;miHoYo;Tencent AI Lab;NKU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "TKzERU0kq1", "title": "Guiding LLM to Fool Itself: Automatically Manipulating Machine Reading Comprehension Shortcut Triggers", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Recent applications of LLMs in Machine Reading Comprehension (MRC) systems have shown impressive results, but the use of shortcuts, mechanisms triggered by features spuriously correlated to the true label, has emerged as a potential threat to their reliability. We analyze the problem from two angles: LLMs as editors, guided to edit text to mislead LLMs; and LLMs as readers, who answer questions based on the edited text. We introduce a framework that guides an editor to add potential shortcuts-triggers to samples. Using GPT4 as the editor, we find it can successfully edit trigger shortcut in samples that fool LLMs. Analysing LLMs as readers, we observe that even capable LLMs can be deceived using shortcut knowledge. Strikingly, we discover that GPT4 can be deceived by its own edits (15% drop in F1). Our findings highlight inherent vulnerabilities of LLMs to shortcut manipulations. We publish ShortcutQA, a curated dataset generated by our framework for future research.", "keywords": "Shortcuts;Large language models;Question answering", "primary_area": "", "supplementary_material": "", "author": "Mosh Levy;Shauli Ravfogel;Yoav Goldberg", "authorids": "~Mosh_Levy1;~Shauli_Ravfogel1;~Yoav_Goldberg1", "gender": "M;M;M", "homepage": ";https://github.com/Shaul1321;https://www.cs.biu.ac.il/~yogo", "dblp": ";227/2231;68/5296", "google_scholar": "zVTaH-YAAAAJ;;https://scholar.google.co.il/citations?user=0rskDKgAAAAJ", "or_profile": "~Mosh_Levy1;~Shauli_Ravfogel1;~Yoav_Goldberg1", "aff": "Bar-Ilan University;Bar-Ilan University;Allen Institute for Artificial Intelligence", "aff_domain": "biu.ac.il;biu.ac.il;allenai.org", "position": "PhD student;PhD student;Principal Researcher", "bibtex": "@inproceedings{\nlevy2023guiding,\ntitle={Guiding {LLM} to Fool Itself: Automatically Manipulating Machine Reading Comprehension Shortcut Triggers},\nauthor={Mosh Levy and Shauli Ravfogel and Yoav Goldberg},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=TKzERU0kq1}\n}", "github": "", "project": "", "reviewers": "fatY;GHLP;Vu8F", "site": "https://openreview.net/forum?id=TKzERU0kq1", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;3;4", "reproducibility": "5;4;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0;1", "aff_unique_norm": "Bar-Ilan University;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.biu.ac.il;https://allenai.org", "aff_unique_abbr": "BIU;AI2", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Israel;United States" }, { "id": "TSdWY9GaHA", "title": "CHiLL: Zero-shot Custom Interpretable Feature Extraction from Clinical Notes with Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We propose CHiLL (Crafting High-Level Latents), an approach for natural-language specification of features for linear models. CHiLL prompts LLMs with expert-crafted queries to generate interpretable features from health records. The resulting noisy labels are then used to train a simple linear classifier. Generating features based on queries to an LLM can empower physicians to use their domain expertise to craft features that are clinically meaningful for a downstream task of interest, without having to manually extract these from raw EHR. We are motivated by a real-world risk prediction task, but as a reproducible proxy, we use MIMIC-III and MIMIC-CXR data and standard predictive tasks (e.g., 30-day readmission) to evaluate this approach. We find that linear models using automatically extracted features are comparably performant to models using reference features, and provide greater interpretability than linear models using \u201cBag-of-Words\u201d features. We verify that learned feature weights align well with clinical expectations.", "keywords": "Interpretability;Large Language Models;Healthcare;Electronic Health Records;Feature Extraction;Zero-shot", "primary_area": "", "supplementary_material": "", "author": "Denis Jered McInerney;Geoffrey Young;Jan-Willem van de Meent;Byron C Wallace", "authorids": "~Denis_Jered_McInerney1;~Geoffrey_Young1;~Jan-Willem_van_de_Meent1;~Byron_C_Wallace1", "gender": "M;;M;M", "homepage": "https://www.khoury.northeastern.edu/people/denis-jered-mcinerney/;;https://jwvdm.github.io/;http://www.byronwallace.com/", "dblp": "262/6563;;137/3263;00/8247", "google_scholar": "Fc1UmSEAAAAJ;;CX9Lu38AAAAJ;KTzRHmwAAAAJ", "or_profile": "~Denis_Jered_McInerney1;~Geoffrey_Young1;~Jan-Willem_van_de_Meent1;~Byron_C_Wallace1", "aff": "Northeastern University;Harvard Medical School;Northeastern University;Northeastern University", "aff_domain": "neu.edu;bwh.harvard.edu;northeastern.edu;northeastern.edu", "position": "PhD student;Associate Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nmcinerney2023chill,\ntitle={{CH}i{LL}: Zero-shot Custom Interpretable Feature Extraction from Clinical Notes with Large Language Models},\nauthor={Denis Jered McInerney and Geoffrey Young and Jan-Willem van de Meent and Byron C Wallace},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=TSdWY9GaHA}\n}", "github": "", "project": "", "reviewers": "J7J2;DqbU;y8JT", "site": "https://openreview.net/forum?id=TSdWY9GaHA", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "3;3;3", "reproducibility": "3;3;3", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-5828-2379;0000-0001-8213-865x;0000-0001-9465-5398;", "linkedin": "jered-mcinerney-7b9774112/;;;", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Northeastern University;Harvard University", "aff_unique_dep": ";Medical School", "aff_unique_url": "https://www.northeastern.edu;https://hms.harvard.edu", "aff_unique_abbr": "NEU;HMS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Boston", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "TW2cBze4ZB", "title": "Contrastive Deterministic Autoencoders For Language Modeling", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Variational autoencoders (VAEs) are a popular family of generative models with wide applicability. Training VAEs, especially for text, often runs into the issue of posterior collapse, resulting in loss of representation quality. Deterministic autoencoders avoid this issue, and have been explored particularly well for images. It is however unclear how to best modify a deterministic model designed for images into a successful one for text. We show that with suitable adaptations, we can significantly improve on batch-normed VAEs (BN-VAEs), a strong benchmark for language modeling with VAEs, by replacing them with analogous deterministic models. We employ techniques from contrastive learning to control the entropy of the aggregate posterior of these models to make it Gaussian. The resulting models skip reparametrization steps in VAE modeling and avoid posterior collapse, while outperforming a broad range of VAE models on text generation and downstream tasks from representations. These improvements are shown to be consistent across both LSTM and Transformer-based VAE architectures. Appropriate comparisons to BERT/GPT-2 based results are also included. We also qualitatively examine the latent space through interpolation to supplement the quantitative aspects of the model.", "keywords": "Autoencoders;Contrastive;Transformers", "primary_area": "", "supplementary_material": "", "author": "Amur Ghose;Pascal Poupart", "authorids": "~Amur_Ghose1;~Pascal_Poupart2", "gender": "M;M", "homepage": ";https://cs.uwaterloo.ca/~ppoupart", "dblp": "227/6744;26/2122", "google_scholar": "bS4Q1mYAAAAJ;https://scholar.google.ca/citations?user=KhAJWroAAAAJ", "or_profile": "~Amur_Ghose1;~Pascal_Poupart2", "aff": "Huawei Technologies Ltd.;University of Waterloo", "aff_domain": "huawei.com;uwaterloo.ca", "position": "Researcher;Full Professor", "bibtex": "@inproceedings{\nghose2023contrastive,\ntitle={Contrastive Deterministic Autoencoders For Language Modeling},\nauthor={Amur Ghose and Pascal Poupart},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=TW2cBze4ZB}\n}", "github": "", "project": "", "reviewers": "1NbG;1cF2;kNe8", "site": "https://openreview.net/forum?id=TW2cBze4ZB", "pdf_size": 0, "rating": "2;2;2", "confidence": "3;3;2", "excitement": "3;3;3", "reproducibility": "3;5;4", "correctness": "2;4;2", "rating_avg": 2.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;1", "aff_unique_norm": "Huawei;University of Waterloo", "aff_unique_dep": "Huawei Technologies;", "aff_unique_url": "https://www.huawei.com;https://uwaterloo.ca", "aff_unique_abbr": "Huawei;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "China;Canada" }, { "id": "TW831RjYQO", "title": "MedEval: A Multi-Level, Multi-Task, and Multi-Domain Medical Benchmark for Language Model Evaluation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Curated datasets for healthcare are often limited due to the need of human annotations from experts. In this paper, we present MedEval, a multi-level, multi-task, and multi-domain medical benchmark to facilitate the development of language models for healthcare. MedEval is comprehensive and consists of data from several healthcare systems and spans 35 human body regions from 8 examination modalities. With 22,779 collected sentences and 21,228 reports, we provide expert annotations at multiple levels, offering a granular potential usage of the data and supporting a wide range of tasks. Moreover, we systematically evaluated 10 generic and domain-specific language models under zero-shot and finetuning settings, from domain-adapted baselines in healthcare to general-purposed state-of-the-art large language models (e.g., ChatGPT). Our evaluations reveal varying effectiveness of the two categories of language models across different tasks, from which we notice the importance of instruction tuning for few-shot usage of large language models. Our investigation paves the way toward benchmarking language models for healthcare and provides valuable insights into the strengths and limitations of adopting large language models in medical domains, informing their practical applications and future advancements.", "keywords": "Curated Datasets;NLP for Healthcare;Pre-trained Language Models", "primary_area": "", "supplementary_material": "", "author": "Zexue He;Yu Wang;An Yan;Yao Liu;Eric Y Chang;Amilcare Gentili;Julian McAuley;Chun-Nan Hsu", "authorids": "~Zexue_He1;~Yu_Wang24;~An_Yan1;~Yao_Liu11;~Eric_Y_Chang1;~Amilcare_Gentili1;~Julian_McAuley1;~Chun-Nan_Hsu1", "gender": "F;M;;M;M;M;M;M", "homepage": "https://zexuehe.github.io/;https://wangyu-ustc.github.io/;https://zzxslp.github.io;https://github.com/aglassoforange;https://scholar.google.com/citations?user=mY76b4QAAAAJ&hl=en;http://gentili.net;http://cseweb.ucsd.edu/~jmcauley/;https://profiles.ucsd.edu/chun-nan.hsu", "dblp": "215/4688;;37/10133-3;;;;29/3483;h/ChunNanHsu", "google_scholar": "-JrCM0AAAAAJ;https://scholar.google.com/citations?hl=en;7I_zqNoAAAAJ;;mY76b4QAAAAJ;LriRtQUAAAAJ;icbo4M0AAAAJ;1ZO4t_AAAAAJ", "or_profile": "~Zexue_He1;~Yu_Wang24;~An_Yan1;~Yao_Liu11;~Eric_Y_Chang1;~Amilcare_Gentili1;~Julian_McAuley1;~Chun-Nan_Hsu1", "aff": "University of California, San Diego;University of California, San Diego;University of California, San Diego;University of California, San Diego;Veterans Affairs;University of California, San Diego;University of California, San Diego, University of California, San Diego;University of California, San Diego", "aff_domain": "ucsd.edu;ucsd.edu;ucsd.edu;ucsd.edu;va.gov;ucsd.edu;eng.ucsd.edu;ucsd.edu", "position": "PhD student;PhD student;PhD student;Undergrad student;Full Professor;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nhe2023medeval,\ntitle={MedEval: A Multi-Level, Multi-Task, and Multi-Domain Medical Benchmark for Language Model Evaluation},\nauthor={Zexue He and Yu Wang and An Yan and Yao Liu and Eric Y Chang and Amilcare Gentili and Julian McAuley and Chun-Nan Hsu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=TW831RjYQO}\n}", "github": "", "project": "", "reviewers": "ZDUq;7LJK;yBca", "site": "https://openreview.net/forum?id=TW831RjYQO", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;5", "excitement": "3;4;4", "reproducibility": "4;4;0", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0002-5623-7512;0000-0003-0955-7588;0000-0002-5240-4707", "linkedin": ";;;;;amilcaregentili;;", "aff_unique_index": "0;0;0;0;1;0;0;0", "aff_unique_norm": "University of California, San Diego;Veterans Affairs", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucsd.edu;https://www.va.gov", "aff_unique_abbr": "UCSD;VA", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "TZW4nzgtQ8", "title": "Locally Differentially Private Document Generation Using Zero Shot Prompting", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Numerous studies have highlighted the privacy risks associated with pretrained large language models. In contrast, our research offers a unique perspective by demonstrating that pretrained large language models can effectively contribute to privacy preservation. We propose a locally differentially private mechanism called DP-Prompt, which leverages the power of pretrained large language models and zero-shot prompting to counter author de-anonymization attacks while minimizing the impact on downstream utility. When DP-Prompt is used with a powerful language model like ChatGPT (gpt-3.5), we observe a notable reduction in the success rate of de-anonymization attacks, showing that it surpasses existing approaches by a considerable margin despite its simpler design. For instance, in the case of the IMDB dataset, DP-Prompt (with ChatGPT) perfectly recovers the clean sentiment F1 score while achieving a 46\\% reduction in author identification F1 score against static attackers and a 26\\% reduction against adaptive attackers. We conduct extensive experiments across six open-source large language models, ranging up to 7 billion parameters, to analyze various effects of the privacy-utility tradeoff. Code is avaliable at \\url{https://github.com/SaitejaUtpala/dp_prompt}", "keywords": "Language Models;Local Differential Privacy;Deanonymization Attacks;Zero Shot Prompting", "primary_area": "", "supplementary_material": "", "author": "Saiteja Utpala;Sara Hooker;Pin-Yu Chen", "authorids": "~Saiteja_Utpala1;~Sara_Hooker2;~Pin-Yu_Chen1", "gender": "M;M;", "homepage": ";http://www.pinyuchen.com;https://www.sarahooker.me/", "dblp": ";39/8969;210/2611", "google_scholar": ";jxwlCUUAAAAJ;2xy6h3sAAAAJ", "or_profile": "~Saiteja_Utpala1;~Pin-Yu_Chen1;~Sara_Hooker1", "aff": "Microsoft;International Business Machines;Cohere For AI", "aff_domain": "microsoft.com;ibm.com;cohere.com", "position": "Researcher;Principal Researcher;Principal Researcher", "bibtex": "@inproceedings{\nutpala2023locally,\ntitle={Locally Differentially Private Document Generation Using Zero Shot Prompting},\nauthor={Saiteja Utpala and Sara Hooker and Pin-Yu Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=TZW4nzgtQ8}\n}", "github": "", "project": "", "reviewers": "T4EW;NaNk;ehk6;8UTc;qQmq", "site": "https://openreview.net/forum?id=TZW4nzgtQ8", "pdf_size": 0, "rating": "4;4;4;4;4", "confidence": "4;4;4;4;4", "excitement": "3;4;3;2;3", "reproducibility": "4;5;4;4;2", "correctness": "4;4;3;2;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.8, "correctness_avg": 3.2, "replies_avg": 17, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-1039-8369;", "linkedin": "saiteja-utpala/;pin-yu-chen-940062a2;", "aff_unique_index": "0;1;2", "aff_unique_norm": "Microsoft;International Business Machines Corporation;Cohere", "aff_unique_dep": "Microsoft Corporation;;Cohere AI", "aff_unique_url": "https://www.microsoft.com;https://www.ibm.com;https://cohere.ai", "aff_unique_abbr": "Microsoft;IBM;Cohere", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "Td9LjgO91J", "title": "Unleashing the Power of Language Models in Text-Attributed Graph", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Representation learning on graph has been demonstrated to be a powerful tool for solving real-world problems. Text-attributed graph carries both semantic and structural information among different types of graphs. Existing works have paved the way for knowledge extraction of this type of data by leveraging language models or graph neural networks or combination of them. However, these works suffer from issues like underutilization of relationships between nodes or words or unaffordable memory cost. In this paper, we propose a Node Representation Update Pre-training Architecture based on Co-modeling Text and Graph (NRUP). In NRUP, we construct a hierarchical text-attributed graph that incorporates both original nodes and word nodes. Meanwhile, we apply four self-supervised tasks for different level of constructed graph. We further design the pre-training framework to update the features of nodes during training epochs. We conduct the experiment on the benchmark dataset ogbn-arxiv. Our method achieves outperformance compared to baselines, fully demonstrating its validity and generalization.", "keywords": "Hierarchical Text-attributed Graph;Pre-training;Self-supervised Tasks", "primary_area": "", "supplementary_material": "", "author": "Haoyu Kuang;Jiarong Xu;Haozhe Zhang;Zuyu Zhao;Qi Zhang;Xuanjing Huang;zhongyu wei", "authorids": "~Haoyu_Kuang1;~Jiarong_Xu2;~Haozhe_Zhang2;~Zuyu_Zhao2;~Qi_Zhang8;~Xuanjing_Huang1;~zhongyu_wei1", "gender": "M;F;M;M;M;F;M", "homepage": "https://Haoyuk.github.io;https://galina0217.github.io/;https://haozhestat.github.io/;https://ya20586665.icoc.vc/;http://qizhang.info;https://xuanjing-huang.github.io/;http://www.sdspeople.fudan.edu.cn/zywei/", "dblp": "362/8682;;;;52/323-1;05/6735-1;31/10489", "google_scholar": "X43xyOcAAAAJ;;xv0IjskAAAAJ;;XfqR3yYAAAAJ;RGsMgZA4H78C;AjLDxxgAAAAJ", "or_profile": "~Haoyu_Kuang1;~Jiarong_Xu2;~Haozhe_Zhang2;~Zuyu_Zhao2;~Qi_Zhang8;~Xuanjing_Huang1;~zhongyu_wei1", "aff": "Southwest University of Finance and Economics;Fudan University;Huawei Technologies Ltd.;;Fudan University;Fudan University;Fudan University", "aff_domain": "swufe.edu.cn;fudan.edu.cn;huawei.com;;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "Undergrad student;Assistant Professor;Researcher;;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nkuang2023unleashing,\ntitle={Unleashing the Power of Language Models in Text-Attributed Graph},\nauthor={Haoyu Kuang and Jiarong Xu and Haozhe Zhang and Zuyu Zhao and Qi Zhang and Xuanjing Huang and zhongyu wei},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Td9LjgO91J}\n}", "github": "", "project": "", "reviewers": "TszY;52xC;y7tX", "site": "https://openreview.net/forum?id=Td9LjgO91J", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "3;3;3", "reproducibility": "4;4;3", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-2973-1889;0000-0002-7771-4808;;;0000-0001-9197-9426;", "linkedin": ";;;;;;", "aff_unique_index": "0;1;2;1;1;1", "aff_unique_norm": "Southwest University of Finance and Economics;Fudan University;Huawei", "aff_unique_dep": ";;Huawei Technologies", "aff_unique_url": "https://www.swufe.edu.cn;https://www.fudan.edu.cn;https://www.huawei.com", "aff_unique_abbr": "SWUFE;Fudan;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "TdrI4F7wS8", "title": "Regulation and NLP (RegNLP): Taming Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The scientific innovation in Natural Language Processing (NLP) and more broadly in artificial intelligence (AI) is at its fastest pace to date. As large language models (LLMs) unleash a new era of automation, important debates emerge regarding the benefits and risks of their development, deployment and use. Currently, these debates have been dominated by often polarized narratives mainly led by the AI Safety and AI Ethics movements. This polarization, often amplified by social media, is swaying political agendas on AI regulation and governance and posing issues of regulatory capture. Capture occurs when the regulator advances the interests of the industry it is supposed to regulate, or of special interest groups rather than pursuing the general public interest. Meanwhile in NLP research, attention has been increasingly paid to the discussion of regulating risks and harms. This often happens without systematic methodologies or sufficient rooting in the disciplines that inspire an extended scope of NLP research, jeopardizing the scientific integrity of these endeavors. Regulation studies are a rich source of knowledge on how to systematically deal with risk and uncertainty, as well as with scientific evidence, to evaluate and compare regulatory options. This resource has largely remained untapped so far. In this paper, we argue how NLP research on these topics can benefit from proximity to regulatory studies and adjacent fields. We do so by discussing basic tenets of regulation, and risk and uncertainty, and by highlighting the shortcomings of current NLP discussions dealing with risk assessment. Finally, we advocate for the development of a new multidisciplinary research space on regulation and NLP (RegNLP), focused on connecting scientific knowledge to regulatory processes based on systematic methodologies.", "keywords": "LLM;regulation;ethics;safety;public policy;science influencers", "primary_area": "", "supplementary_material": "", "author": "Catalina Goanta;Nikolaos Aletras;Ilias Chalkidis;Sofia Ranchord\u00e1s;Gerasimos Spanakis", "authorids": "~Catalina_Goanta1;~Nikolaos_Aletras1;~Ilias_Chalkidis1;~Sofia_Ranchord\u00e1s1;~Gerasimos_Spanakis1", "gender": "F;;M;F;M", "homepage": "https://www.uu.nl/staff/ecgoanta;;https://iliaschalkidis.github.io;https://www.tilburguniversity.edu/nl/medewerkers/s-h-ranchordas;https://dke.maastrichtuniversity.nl/jerry.spanakis", "dblp": ";118/9116;199/8161;;43/7739", "google_scholar": "vf5EyZ0AAAAJ;https://scholar.google.co.uk/citations?user=uxRWFhoAAAAJ;BrtAqz8AAAAJ;b0gjDzoAAAAJ;https://scholar.google.gr/citations?user=LiUXYVgAAAAJ", "or_profile": "~Catalina_Goanta1;~Nikolaos_Aletras1;~Ilias_Chalkidis1;~Sofia_Ranchord\u00e1s1;~Gerasimos_Spanakis1", "aff": "Utrecht University;Amazon;Copenhagen University;Luiss Guido Carli University;Maastricht University", "aff_domain": "uu.nl;amazon.com;ku.dk;luiss.it;maastrichtuniversity.nl", "position": "Associate Professor;Researcher;Postdoc;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\ngoanta2023regulation,\ntitle={Regulation and {NLP} (Reg{NLP}): Taming Large Language Models},\nauthor={Catalina Goanta and Nikolaos Aletras and Ilias Chalkidis and Sofia Ranchord{\\'a}s and Gerasimos Spanakis},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=TdrI4F7wS8}\n}", "github": "", "project": "", "reviewers": "6v9w;dLf1;kJVK", "site": "https://openreview.net/forum?id=TdrI4F7wS8", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "3;3;4", "reproducibility": "", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-1044-9800;;0000-0002-0706-7772;;", "linkedin": ";;;;", "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Utrecht University;Amazon;University of Copenhagen;Luiss Guido Carli University;Maastricht University", "aff_unique_dep": ";Amazon.com, Inc.;;;", "aff_unique_url": "https://www.uu.nl;https://www.amazon.com;https://www.ku.dk;https://www.luiss.edu/;https://www.maastrichtuniversity.nl", "aff_unique_abbr": "UU;Amazon;UCPH;Luiss;MU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;3;0", "aff_country_unique": "Netherlands;United States;Denmark;Italy" }, { "id": "TemPqRDMJ8", "title": "SOUL: Towards Sentiment and Opinion Understanding of Language", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Sentiment analysis is a well-established natural language processing task, with sentiment polarity classification being one of its most popular and representative tasks. However, despite the success of pre-trained language models in this area, they often fall short of capturing the broader complexities of sentiment analysis. To address this issue, we propose a new task called Sentiment and Opinion Understanding of Language (SOUL). SOUL aims to evaluate sentiment understanding through two subtasks: Review Comprehension (RC) and Justification Generation (JG). RC seeks to validate statements that focus on subjective information based on a review text, while JG requires models to provide explanations for their sentiment predictions. To enable comprehensive evaluation, we annotate a new dataset comprising 15,028 statements from 3,638 reviews. Experimental results indicate that SOUL is a challenging task for both small and large language models, with a performance gap of up to 27\\% when compared to human performance. Furthermore, evaluations conducted with both human experts and GPT-4 highlight the limitations of the small language model in generating reasoning-based justifications. These findings underscore the challenging nature of the SOUL task for existing models, emphasizing the need for further advancements in sentiment analysis to address its complexities. The new dataset and code are available at \\url{https://github.com/DAMO-NLP-SG/SOUL}.", "keywords": "sentiment analysis;sentiment classification;sentiment and opinion understanding", "primary_area": "", "supplementary_material": "", "author": "Yue Deng;Wenxuan Zhang;Sinno Jialin Pan;Lidong Bing", "authorids": "~Yue_Deng3;~Wenxuan_Zhang1;~Sinno_Jialin_Pan1;~Lidong_Bing2", "gender": "M;;;M", "homepage": "https://ntudy.github.io/;https://isakzhang.github.io/;https://lidongbing.github.io;http://www.cse.cuhk.edu.hk/~sinnopan/", "dblp": "35/8109-10;85/1177-1.html;53/6625;80/5412", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en", "or_profile": "~Yue_Deng3;~Wenxuan_Zhang1;~Lidong_Bing3;~Sinno_Pan1", "aff": "School of Computer Science and Engineering, Nanyang Technological University;Alibaba Group;Alibaba Group;Nanyang Technological University", "aff_domain": "scse.ntu.edu.sg;alibaba-inc.com;alibaba-inc.com;ntu.edu.sg", "position": "PhD student;Researcher;Scientist;Full Professor", "bibtex": "@inproceedings{\ndeng2023soul,\ntitle={{SOUL}: Towards Sentiment and Opinion Understanding of Language},\nauthor={Yue Deng and Wenxuan Zhang and Sinno Jialin Pan and Lidong Bing},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=TemPqRDMJ8}\n}", "github": "", "project": "", "reviewers": "HN7A;TkL7;Qqt5;5t9T", "site": "https://openreview.net/forum?id=TemPqRDMJ8", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "5;5;3;4", "excitement": "4;3;3;4", "reproducibility": "3;3;2;5", "correctness": "3;3;2;3", "rating_avg": 3.0, "confidence_avg": 4.25, "excitement_avg": 3.5, "reproducibility_avg": 3.25, "correctness_avg": 2.75, "replies_avg": 13, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0006-3682-8047;;;", "linkedin": "yue0068/;wenxuan-zhang-608b88153/;;", "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Nanyang Technological University;Alibaba Group", "aff_unique_dep": "School of Computer Science and Engineering;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.alibaba.com", "aff_unique_abbr": "NTU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Singapore;China" }, { "id": "Tha4jW8er9", "title": "Machine Reading Comprehension using Case-based Reasoning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We present an accurate and interpretable method for answer extraction in machine reading comprehension that is reminiscent of case-based reasoning (CBR) from classical AI. Our method (CBR-MRC) builds upon the hypothesis that contextualized answers to similar questions share semantic similarities with each other. Given a test question, CBR-MRC first retrieves a set of similar cases from a nonparametric memory and then predicts an answer by selecting the span in the test context that is most similar to the contextualized representations of answers in the retrieved cases. The semi-parametric nature of our approach allows it to attribute a prediction to the specific set of evidence cases, making it a desirable choice for building reliable and debuggable QA systems. We show that CBR-MRC provides high accuracy comparable with large reader models and outperforms baselines by 11.5 and 8.4 EM on NaturalQuestions and NewsQA, respectively. Further, we demonstrate the ability of CBR-MRC in identifying not just the correct answer tokens but also the span with the most relevant supporting evidence. Lastly, we observe that contexts for certain question types show higher lexical diversity than others and find that CBR-MRC is robust to these variations while performance using fully-parametric methods drops.", "keywords": "question answering;case-based reasoning", "primary_area": "", "supplementary_material": "", "author": "Dung Ngoc Thai;Dhruv Agarwal;Mudit Chaudhary;Wenlong Zhao;Rajarshi Das;Jay-Yoon Lee;Hannaneh Hajishirzi;Manzil Zaheer;Andrew McCallum", "authorids": "~Dung_Ngoc_Thai1;~Dhruv_Agarwal2;~Mudit_Chaudhary1;~Wenlong_Zhao1;~Rajarshi_Das1;~Jay-Yoon_Lee1;~Hannaneh_Hajishirzi1;~Manzil_Zaheer1;~Andrew_McCallum1", "gender": "F;M;M;;;F;M;M;M", "homepage": "https://people.cs.umass.edu/~dthai;https://people.cs.umass.edu/~dagarwal/;;;http://rajarshd.github.io;https://homes.cs.washington.edu/~hannaneh/;https://www.aclweb.org/anthology/people/m/manzil-zaheer/;http://www.cs.umass.edu/~mccallum;https://www.cs.cmu.edu/~jaylee", "dblp": ";301/7894;283/5772;03/4555-1;;52/1296;40/10701;m/AndrewMcCallum;https://dblp.org/pers/l/Lee:Jay_Yoon", "google_scholar": ";7-AxhB4AAAAJ;FmZEBUoAAAAJ;i0lW2EAAAAAJ;FKoKAwIAAAAJ;LOV6_WIAAAAJ;A33FhJMAAAAJ;yILa1y0AAAAJ;_USiaqwAAAAJ", "or_profile": "~Dung_Ngoc_Thai1;~Dhruv_Agarwal2;~Mudit_Chaudhary1;~Wenlong_Zhao1;~Rajarshi_Das1;~Hannaneh_Hajishirzi1;~Manzil_Zaheer1;~Andrew_McCallum1;~Jay_Yoon_Lee1", "aff": ";Amazon;College of Information and Computer Science, University of Massachusetts at Amherst;University of Massachusetts at Amherst;University of Washington;University of Washington;Google DeepMind;University of Massachusetts Amherst;Seoul National University", "aff_domain": ";amazon.com;cics.umass.edu;cs.umass.edu;cs.washington.edu;uw.edu;deepmind.com;cs.umass.edu;snu.ac.kr", "position": ";Intern;MS student;PhD student;Postdoc;Associate Professor;Researcher;Distinguished Professor;Assistant Professor", "bibtex": "@inproceedings{\nthai2023machine,\ntitle={Machine Reading Comprehension using Case-based Reasoning},\nauthor={Dung Ngoc Thai and Dhruv Agarwal and Mudit Chaudhary and Wenlong Zhao and Rajarshi Das and Jay-Yoon Lee and Hannaneh Hajishirzi and Manzil Zaheer and Andrew McCallum},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Tha4jW8er9}\n}", "github": "", "project": "", "reviewers": "Crp1;2R1W;paQh", "site": "https://openreview.net/forum?id=Tha4jW8er9", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "3;4;4", "reproducibility": "5;5;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.666666666666667, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-7258-5130;;;;;;0009-0004-5487-2848;", "linkedin": ";dhdhagar/;muditchaudhary;wenlong-zhao/;;;;andrew-mccallum-a412;", "aff_unique_index": "0;1;1;2;2;3;1;4", "aff_unique_norm": "Amazon;University of Massachusetts Amherst;University of Washington;Google;Seoul National University", "aff_unique_dep": "Amazon.com, Inc.;College of Information and Computer Science;;Google DeepMind;", "aff_unique_url": "https://www.amazon.com;https://www.umass.edu;https://www.washington.edu;https://deepmind.com;https://www.snu.ac.kr", "aff_unique_abbr": "Amazon;UMass Amherst;UW;DeepMind;SNU", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Amherst", "aff_country_unique_index": "0;0;0;0;0;1;0;2", "aff_country_unique": "United States;United Kingdom;South Korea" }, { "id": "TioAqBt8lz", "title": "Structure-aware Knowledge Graph-to-text Generation with Planning Selection and Similarity Distinction", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The knowledge graph-to-text (KG-to-text) generation task aims to synthesize coherent and engaging sentences that accurately convey the complex information derived from an input knowledge graph. One of the primary challenges in this task is bridging the gap between the diverse structures of the KG and the target text, while preserving the details of the input KG. To address this, we propose a novel approach that efficiently integrates graph structure-aware modules with pre-trained language models. Unlike conventional techniques, which only consider direct connections between first-order neighbors, our method delves deeper by incorporating Relative Distance Encoding as a bias within the graph structure-aware module. This enables our model to better capture the intricate topology information present in the KG. To further elevate the fidelity of the generated text, Planning Selection and Similarity Distinction are introduced. Our approach filters the most relevant linearized sequences by employing a planning scorer, while simultaneously distinguishing similar input KGs through contrastive learning techniques. Experiments on two datasets demonstrate the superiority of our model.", "keywords": "KG-to-text generation;Pre-trained language model;Planning Selection;Similarity Distinction", "primary_area": "", "supplementary_material": "", "author": "Feng Zhao;Hongzhi Zou;Cheng Yan", "authorids": "~Feng_Zhao8;~Hongzhi_Zou1;~Cheng_Yan4", "gender": "M;M;M", "homepage": "http://www.hust.edu.cn;https://github.com/wssf14;https://github.com/ExplosiveYan", "dblp": "181/2734-3.html;;", "google_scholar": ";;", "or_profile": "~Feng_Zhao8;~Hongzhi_Zou1;~Cheng_Yan4", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology", "aff_domain": "hust.edu.cn;hust.edu.cn;hust.edu.cn", "position": "Full Professor;MS student;PhD student", "bibtex": "@inproceedings{\nzhao2023structureaware,\ntitle={Structure-aware Knowledge Graph-to-text Generation with Planning Selection and Similarity Distinction},\nauthor={Feng Zhao and Hongzhi Zou and Cheng Yan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=TioAqBt8lz}\n}", "github": "", "project": "", "reviewers": "uUx9;wGZN;5CDR", "site": "https://openreview.net/forum?id=TioAqBt8lz", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;4", "excitement": "4;3;3", "reproducibility": "4;3;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-7205-3302;;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Huazhong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hust.edu.cn", "aff_unique_abbr": "HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "Tk4tvmdKVP", "title": "Not all quantifiers are equal: Probing Transformer-based language models' understanding of generalised quantifiers", "track": "main", "status": "Long Main", "tldr": "", "abstract": "How do different generalised quantifiers affect the behaviour of transformer-based language models (TLMs)? The recent popularity of TLMs and the central role generalised quantifiers have traditionally played in linguistics and logic bring this question into particular focus. The current research investigating this subject has not utilised a task defined purely in a logical sense, and thus, has not captured the underlying logical significance of generalised quantifiers. Consequently, they have not answered the aforementioned question faithfully or adequately. Therefore, we investigate how different generalised quantifiers affect TLMs by employing a textual entailment problem defined in a purely logical sense, namely, model-checking with natural language. Our approach permits the automatic construction of datasets with respect to which we can assess the ability of TLMs to learn the meanings of generalised quantifiers. Our investigation reveals that TLMs generally can comprehend the logical semantics of the most common generalised quantifiers, but that distinct quantifiers influence TLMs in varying ways.", "keywords": "Natural Language Inference;Transformer-based language models", "primary_area": "", "supplementary_material": "", "author": "Tharindu Madusanka;Iqra Zahid;Hao Li;Ian Pratt-Hartmann;Riza Batista-Navarro", "authorids": "~Tharindu_Madusanka1;~Iqra_Zahid1;~Hao_Li25;~Ian_Pratt-Hartmann1;~Riza_Batista-Navarro1", "gender": "M;F;M;M;F", "homepage": ";;https://www.research.manchester.ac.uk/portal/en/researchers/hao-li(38e27350-1ed5-453b-954c-e480fc98ccbd).html;http://www.cs.man.ac.uk/~ipratt/;https://research.manchester.ac.uk/en/persons/riza.batista", "dblp": "294/8252;238/6369;17/5705-74;60/4630.html;92/11424", "google_scholar": ";;yZjmYegAAAAJ;https://scholar.google.com.tw/citations?user=M1xicaEAAAAJ;fRBJmp9gk_cC", "or_profile": "~Tharindu_Madusanka1;~Iqra_Zahid1;~Hao_Li25;~Ian_Pratt-Hartmann1;~Riza_Batista-Navarro1", "aff": "University of Manchester;The University of Manchester;University of Manchester;University of Manchester;University of Manchester", "aff_domain": "cs.manchester.ac.uk;uom.ac.uk;manchester.ac.uk;cs.manchester.ac.uk;manchester.ac.uk", "position": "PhD student;PhD student;PhD student;Lecturer;Associate Professor", "bibtex": "@inproceedings{\nmadusanka2023not,\ntitle={Not all quantifiers are equal: Probing Transformer-based language models' understanding of generalised quantifiers},\nauthor={Tharindu Madusanka and Iqra Zahid and Hao Li and Ian Pratt-Hartmann and Riza Batista-Navarro},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Tk4tvmdKVP}\n}", "github": "", "project": "", "reviewers": "bRRx;SHqe;sJL9", "site": "https://openreview.net/forum?id=Tk4tvmdKVP", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;3", "excitement": "3;4;3", "reproducibility": "4;4;3", "correctness": "4;5;3", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-7024-7744;;0000-0002-9923-4346;0000-0003-0062-043X;", "linkedin": "tharindu-madusanka/;iqra-zahid-7ab978152/;hao-li-538597190/;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Manchester", "aff_unique_dep": "", "aff_unique_url": "https://www.manchester.ac.uk", "aff_unique_abbr": "UoM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "TkJkSkmhUy", "title": "Injecting structural hints: Using language models to study inductive biases in language learning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Both humans and transformer language models are able to learn language without explicit structural supervision. What cognitive inductive biases make this learning possible? Here, we examine the effect of different inductive learning biases by actively controlling the inductive biases of artificial learners: we structurally bias models by pretraining on synthetic formally-structured data, and evaluate these structural biases by fine-tuning on three typologically-distant human languages: English, Japanese, and Basque. We investigate the effect on downstream language perplexity of three types of inductive bias: 1) recursive, hierarchical processing 2) unrestricted token-token dependencies that can't be modeled by context-free grammars, and 3) a Zipfian power-law vocabulary distribution. We show that complex, non-context-free interactions between tokens form the best inductive biases. Our study leverages the capabilities of transformer models to run controlled language learning experiments that are not possible to run on humans, and surfaces hypotheses about the structures that facilitate language learning in both humans and machines.", "keywords": "transfer learning;pretraining;recursion;context-sensitivity", "primary_area": "", "supplementary_material": "", "author": "Isabel Papadimitriou;Dan Jurafsky", "authorids": "~Isabel_Papadimitriou1;~Dan_Jurafsky1", "gender": "F;M", "homepage": "https://www.isabelpapad.com/;http://web.stanford.edu/~jurafsky/", "dblp": "264/0034;31/985", "google_scholar": ";uZg9l58AAAAJ", "or_profile": "~Isabel_Papadimitriou1;~Dan_Jurafsky1", "aff": "Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\npapadimitriou2023injecting,\ntitle={Injecting structural hints: Using language models to study inductive biases in language learning},\nauthor={Isabel Papadimitriou and Dan Jurafsky},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=TkJkSkmhUy}\n}", "github": "", "project": "", "reviewers": "MCzq;A6Y3;wiKh", "site": "https://openreview.net/forum?id=TkJkSkmhUy", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "3;3;3", "reproducibility": "5;4;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-0214-0659;", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "Tn5hALAaA4", "title": "Crosslingual Transfer Learning for Low-Resource Languages Based on Multilingual Colexification Graphs", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In comparative linguistics, colexification refers to the phenomenon of a lexical form conveying two or more distinct meanings. Existing work on colexification patterns relies on annotated word lists, limiting scalability and usefulness in NLP. In contrast, we identify colexification patterns of more than 2,000 concepts across 1,335 languages directly from an unannotated parallel corpus. We then propose simple and effective methods to build multilingual graphs from the colexification patterns: \\textbf{ColexNet} and \\textbf{ColexNet+}. ColexNet's nodes are concepts and its edges are colexifications. In ColexNet+, concept nodes are additionally linked through intermediate nodes, each representing an ngram in one of 1,334 languages. We use ColexNet+ to train $\\overrightarrow{\\mbox{ColexNet+}}$, high-quality multilingual embeddings that are well-suited for transfer learning. In our experiments, we first show that ColexNet achieves high recall on CLICS, a dataset of crosslingual colexifications. We then evaluate $\\overrightarrow{\\mbox{ColexNet+}}$ on roundtrip translation, sentence retrieval and sentence classification and show that our embeddings surpass several transfer learning baselines. This demonstrates the benefits of using colexification as a source of information in multilingual NLP.", "keywords": "multilingual;colexification;transfer learning", "primary_area": "", "supplementary_material": "", "author": "Yihong Liu;Haotian Ye;Leonie Weissweiler;Renhao Pei;Hinrich Schuetze", "authorids": "~Yihong_Liu1;~Haotian_Ye2;~Leonie_Weissweiler1;~Renhao_Pei1;~Hinrich_Schuetze3", "gender": "M;;;;M", "homepage": "https://yihongl1u.github.io/;https://cis.lmu.de/personen/mitarbeiter/yehao/index.html;https://www.cis.lmu.de/~weissweiler/;;https://www.cis.uni-muenchen.de/schuetze/", "dblp": "86/3284;;212/0281;347/2100;s/HinrichSchutze", "google_scholar": "VjJUa5cAAAAJ;WaXN8W8AAAAJ;o4fK4n4AAAAJ;https://scholar.google.com/citations?hl=en;", "or_profile": "~Yihong_Liu1;~Haotian_Ye2;~Leonie_Weissweiler1;~Renhao_Pei1;~Hinrich_Schuetze3", "aff": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Center for Information and Language Processing;LMU Munich;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Center for Information and Language Processing", "aff_domain": "lmu.de;cis.lmu.de;lmu.de;lmu.de;lmu.de", "position": "PhD student;PhD student;PhD student;Intern;Full Professor", "bibtex": "@inproceedings{\nliu2023crosslingual,\ntitle={Crosslingual Transfer Learning for Low-Resource Languages Based on Multilingual Colexification Graphs},\nauthor={Yihong Liu and Haotian Ye and Leonie Weissweiler and Renhao Pei and Hinrich Schuetze},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Tn5hALAaA4}\n}", "github": "", "project": "", "reviewers": "A1sh;knrQ;7BGY;YCED", "site": "https://openreview.net/forum?id=Tn5hALAaA4", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;3;3;4", "excitement": "3;4;3;3", "reproducibility": "4;4;5;4", "correctness": "4;4;4;4", "rating_avg": 4.0, "confidence_avg": 3.5, "excitement_avg": 3.25, "reproducibility_avg": 4.25, "correctness_avg": 4.0, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-1073-0958;;;0009-0005-1526-7466;", "linkedin": ";htyeh;;renhaopei/;", "aff_unique_index": "0;1;2;0;1", "aff_unique_norm": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Center for Information and Language Processing;Ludwig Maximilian University of Munich", "aff_unique_dep": ";;", "aff_unique_url": "https://www.lmu.de;;https://www.lmu.de", "aff_unique_abbr": "LMU;;LMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Munich", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany;" }, { "id": "TnpFFjHCcw", "title": "Conversational Semantic Parsing using Dynamic Context Graphs", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In this paper we consider the task of conversational semantic parsing over general purpose knowledge graphs (KGs) with millions of entities, and thousands of relation-types. We focus on models which are capable of interactively mapping user utterances into executable logical forms (e.g., Sparql) in the context of the conversational history. Our key idea is to represent information about an utterance and its context via a subgraph which is created dynamically, i.e., the number of nodes varies per utterance. Rather than treating the subgraph as a sequence, we exploit its underlying structure and encode it with a graph neural network which further allows us to represent a large number of (unseen) nodes. Experimental results show that dynamic context modeling is superior to static approaches, delivering performance improvements across the board (i.e., for simple and complex questions). Our results further confirm that modeling the structure of context is better at processing discourse information, (i.e., at handling ellipsis and resolving coreference) and longer interactions.", "keywords": "semantic parsing;SPARQL;Knowledge Graphs;Conversational Semantic Parsing", "primary_area": "", "supplementary_material": "", "author": "Parag Jain;Mirella Lapata", "authorids": "~Parag_Jain1;~Mirella_Lapata1", "gender": ";F", "homepage": ";https://homepages.inf.ed.ac.uk/mlap/", "dblp": "98/1178;59/6701", "google_scholar": "https://scholar.google.com/citations?hl=en;j67B9Q4AAAAJ", "or_profile": "~Parag_Jain1;~Mirella_Lapata1", "aff": "University of Edinburgh;Edinburgh University, University of Edinburgh", "aff_domain": "ed.ac.uk;inf.ed.ac.uk", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\njain2023conversational,\ntitle={Conversational Semantic Parsing using Dynamic Context Graphs},\nauthor={Parag Jain and Mirella Lapata},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=TnpFFjHCcw}\n}", "github": "", "project": "", "reviewers": "4Uzo;bYQ1;Qheq", "site": "https://openreview.net/forum?id=TnpFFjHCcw", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;5", "excitement": "3;4;3", "reproducibility": "3;4;5", "correctness": "3;4;3", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "University of Edinburgh", "aff_unique_dep": "", "aff_unique_url": "https://www.ed.ac.uk", "aff_unique_abbr": "Edinburgh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "Tnx0922coo", "title": "Disentangling Transformer Language Models as Superposed Topic Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Topic Modelling is an established research area where the quality of a given topic is measured using coherence metrics. Often, we infer topics from Neural Topic Models (NTM) by interpreting their decoder weights, consisting of top-activated words projected from individual neurons. Transformer-based Language Models (TLM) similarly consist of decoder weights. However, due to its hypothesised superposition properties, the final logits originating from the residual path are considered uninterpretable. Therefore, we posit that we can interpret TLM as superposed NTM by proposing a novel weight-based, model-agnostic and corpus-agnostic approach to search and disentangle decoder-only TLM, potentially mapping individual neurons to multiple coherent topics. Our results show that it is empirically feasible to disentangle coherent topics from GPT-2 models using the Wikipedia corpus. We validate this approach for GPT-2 models using Zero-Shot Topic Modelling. Finally, we extend the proposed approach to disentangle and analyse LLaMA models.", "keywords": "Topic Modelling;Mechanistic Interpretability;Pre-trained Language Models;Transformers", "primary_area": "", "supplementary_material": "", "author": "Jia Peng Lim;Hady W. Lauw", "authorids": "~Jia_Peng_Lim1;~Hady_W._Lauw1", "gender": "M;M", "homepage": "http://www.hadylauw.com;", "dblp": "00/2494;339/2524", "google_scholar": "HTC1z2gAAAAJ;UctkM5cAAAAJ", "or_profile": "~Hady_W_Lauw1;~Lim_Jia_Peng1", "aff": "Singapore Management University;Singapore Management University", "aff_domain": "smu.edu.sg;smu.edu.sg", "position": "Associate Professor;PhD student", "bibtex": "@inproceedings{\nlim2023disentangling,\ntitle={Disentangling Transformer Language Models as Superposed Topic Models},\nauthor={Jia Peng Lim and Hady W. Lauw},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Tnx0922coo}\n}", "github": "", "project": "", "reviewers": "K1Jc;AWCb;Y4Bt", "site": "https://openreview.net/forum?id=Tnx0922coo", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;3;3", "excitement": "4;4;4", "reproducibility": "2;3;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-8245-8677;", "linkedin": "hadylauw;jia-peng-lim/", "aff_unique_index": "0;0", "aff_unique_norm": "Singapore Management University", "aff_unique_dep": "", "aff_unique_url": "https://www.smu.edu.sg", "aff_unique_abbr": "SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "id": "ToGkF2nCNG", "title": "Measure Children's Mindreading Ability with Machine Reading", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recently, much research in psychology has benefited from the advances in machine learning techniques. \nSome recent studies showed that it is possible to build automated scoring models for children's mindreading.\nThese models were trained on a set of manually-labeled question-response pairs, which were collected by asking children to answer one or two questions after a short story is told or a video clip is played.\nHowever, existing models did not take the features of the stories and video clips into account when scoring, which obviously will reduce the accuracy of the scoring models. Furthermore, considering that different psychological tests may contain the same questions, this approach cannot be extended to other related psychological test datasets. \nIn this study, we proposed a multi-modal learning framework to leverage the features extracted from the stories and videos related to the questions being asked during the children's mindreading evaluation.\nExperimental results show that the scores produced by the proposed models agree well with those graded by human experts, highlighting the potential of the proposed network architecture for practical automated children's mindreading scoring systems.", "keywords": "Natural Language Processing;Machine Reading Comprehension;Multimodal;Psychology;Mind-reading", "primary_area": "", "supplementary_material": "", "author": "Yuliang Yan;Xiaohua Wang;Xiang Zhou;Xiaoqing Zheng;Xuanjing Huang", "authorids": "~Yuliang_Yan2;~Xiaohua_Wang2;~Xiang_Zhou5;~Xiaoqing_Zheng2;~Xuanjing_Huang1", "gender": "M;;M;;F", "homepage": "https://yuliangyan0807.github.io/;;https://github.com/wurtuzi;;https://xuanjing-huang.github.io/", "dblp": ";;;;05/6735-1", "google_scholar": "ZukVBVUAAAAJ;;;;RGsMgZA4H78C", "or_profile": "~Yuliang_Yan2;~Xiaohua_Wang2;~Xiang_Zhou5;~Xiaoqing_Zheng2;~Xuanjing_Huang1", "aff": "Fudan University;Fudan University;Fudan University;;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;;fudan.edu.cn", "position": "MS student;PhD student;MS student;;Full Professor", "bibtex": "@inproceedings{\nyan2023measure,\ntitle={Measure Children's Mindreading Ability with Machine Reading},\nauthor={Yuliang Yan and Xiaohua Wang and Xiang Zhou and Xiaoqing Zheng and Xuanjing Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ToGkF2nCNG}\n}", "github": "", "project": "", "reviewers": "J6oG;LmF8;x51Z", "site": "https://openreview.net/forum?id=ToGkF2nCNG", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "2;4;2", "reproducibility": "5;5;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-1124-855X;;;0000-0001-9197-9426", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "ToMdTqVIb5", "title": "Monte Carlo Thought Search: Large Language Model Querying for Complex Scientific Reasoning in Catalyst Design", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Discovering novel catalysts requires complex reasoning involving multiple chemical properties and resultant trade-offs, leading to a combinatorial growth in the search space. While large language models (LLM) have demonstrated novel capabilities for chemistry through complex instruction following capabilities and high quality reasoning, a goal-driven combinatorial search using LLMs has not been explored in detail. In this work, we present a Monte Carlo Tree Search-based approach that improves beyond state-of-the-art chain-of-thought prompting variants to augment scientific reasoning. We introduce two new reasoning datasets: 1) a curation of computational chemistry simulations, and 2) diverse questions written by catalysis researchers for reasoning about novel chemical conversion processes. We improve over the best baseline by 25.8% and find that our approach can augment scientist's reasoning and discovery process with novel insights.", "keywords": "Chain-of-Thought;Large Language Model;Reasoning;Scientific Discovery;Chemistry;Catalysis", "primary_area": "", "supplementary_material": "", "author": "Henry W. Sprueill;Carl Edwards;Mariefel V Olarte;Udishnu Sanyal;Heng Ji;Sutanay Choudhury", "authorids": "~Henry_W._Sprueill1;~Carl_Edwards1;~Mariefel_V_Olarte1;~Udishnu_Sanyal1;~Heng_Ji3;~Sutanay_Choudhury2", "gender": "M;;;F;;M", "homepage": "https://cnedwards.com/;;;http://blender.cs.illinois.edu/hengji.html;;", "dblp": "300/1001;;;;57/7437;", "google_scholar": "https://scholar.google.com/citations?hl=en;;;z7GCqT4AAAAJ;oouJk7YAAAAJ;mfhLVU0AAAAJ", "or_profile": "~Carl_Edwards1;~Mariefel_V_Olarte1;~Udishnu_Sanyal1;~Heng_Ji3;~Sutanay_Choudhury2;~Henry_William_Sprueill1", "aff": "University of Illinois, Urbana Champaign;Pacific Northwest National Laboratory;Pacific Northwest National Lab;University of Illinois, Urbana-Champaign;Pacific Northwest National Lab;Pacific Northwest National Laboratory", "aff_domain": "illinois.edu;pnnl.gov;pnl.gov;uiuc.edu;pnl.gov;pnnl.gov", "position": "PhD student;Research Engineer IV;Researcher;Full Professor;Scientist;Researcher", "bibtex": "@inproceedings{\nsprueill2023monte,\ntitle={Monte Carlo Thought Search: Large Language Model Querying for Complex Scientific Reasoning in Catalyst Design},\nauthor={Henry W. Sprueill and Carl Edwards and Mariefel V Olarte and Udishnu Sanyal and Heng Ji and Sutanay Choudhury},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ToMdTqVIb5}\n}", "github": "", "project": "", "reviewers": "CnCD;etXD;LJ9V", "site": "https://openreview.net/forum?id=ToMdTqVIb5", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "4;4;3", "reproducibility": "4;4;4", "correctness": "5;4;2", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-2989-1110;0000-0002-7935-8691;;;0000-0002-7981-4175", "linkedin": "carl-edwards-70a90592;;;;;", "aff_unique_index": "0;1;1;2;1;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;Pacific Northwest National Laboratory;University of Illinois", "aff_unique_dep": ";;", "aff_unique_url": "https://illinois.edu;https://www.pnnl.gov;https://illinois.edu", "aff_unique_abbr": "UIUC;PNNL;UIUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Tpd5RuSzpq", "title": "PUNR: Pre-training with User Behavior Modeling for News Recommendation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "News recommendation aims to predict click behaviors based on user behaviors. \nHow to effectively model the user representations is the key to recommending preferred news. \nExisting works are mostly focused on improvements in the supervised fine-tuning stage. \nHowever, there is still a lack of PLM-based unsupervised pre-training methods optimized for user representations.\nIn this work, we propose an unsupervised pre-training paradigm with two tasks, i.e. user behavior masking and user behavior generation, both towards effective user behavior modeling. Firstly, we introduce the user behavior masking pre-training task to recover the masked user behaviors based on their contextual behaviors. In this way, the model could capture a much stronger and more comprehensive user news reading pattern. Besides, we incorporate a novel auxiliary user behavior generation pre-training task to enhance the user representation vector derived from the user encoder.\nWe use the above pre-trained user modeling encoder to obtain news and user representations in downstream fine-tuning.\nEvaluations on the real-world news benchmark show significant performance improvements over existing baselines.", "keywords": "News Recommendation;Pre-training;User Behavior Modeling", "primary_area": "", "supplementary_material": "", "author": "Guangyuan Ma;Hongtao Liu;Xing W;Wanhui Qian;Zhepeng Lv;Qing Yang;Songlin Hu", "authorids": "~Guangyuan_Ma1;~Hongtao_Liu1;~Xing_W1;~Wanhui_Qian1;~Zhepeng_Lv1;~Qing_Yang11;~Songlin_Hu2", "gender": "M;M;M;M;F;M;M", "homepage": ";;https://scholar.google.com.hk/citations?user=ZKd3UjkAAAAJ&hl=zh-CN;;https://www.duxiaoman.com/;https://www.duxiaoman.com/index;http://people.ucas.ac.cn/~0000967?language=en", "dblp": "289/8498;;;245/3679;;47/3749;67/4108-1.html", "google_scholar": "GHBLzN0AAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=ZKd3UjkAAAAJ;;;;", "or_profile": "~Guangyuan_Ma1;~Hongtao_Liu1;~Xing_W1;~Wanhui_Qian1;~Zhepeng_Lv1;~Qing_Yang11;~Songiln_Hu1", "aff": "University of Chinese Academy of Sciences;Du Xiaoman Financial;University of Chinese Academy of Sciences;;Du Xiaoman Technology(BeiJing);Du Xiaoman Technology(BeiJing);Institute of Information Engineering, Chinese Academy of Sciences", "aff_domain": "ucas.ac.cn;duxiaoman.com;ucas.edu.cn;;duxiaoman.com;duxiaoman.com;iie.ac.cn", "position": "PhD student;Researcher;PhD student;;Researcher;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nma2023punr,\ntitle={{PUNR}: Pre-training with User Behavior Modeling for News Recommendation},\nauthor={Guangyuan Ma and Hongtao Liu and Xing W and Wanhui Qian and Zhepeng Lv and Qing Yang and Songlin Hu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Tpd5RuSzpq}\n}", "github": "", "project": "", "reviewers": "ed5M;YPna;n3WE", "site": "https://openreview.net/forum?id=Tpd5RuSzpq", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "4;2;3", "reproducibility": "4;2;4", "correctness": "4;2;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-6916-9611;;;;;;", "linkedin": ";;;;;;", "aff_unique_index": "0;1;0;2;2;3", "aff_unique_norm": "University of Chinese Academy of Sciences;Du Xiaoman Financial;Du Xiaoman Technology;Chinese Academy of Sciences", "aff_unique_dep": ";;;Institute of Information Engineering", "aff_unique_url": "http://www.ucas.ac.cn;https://www.duxiaoman.com;;http://www.cas.cn", "aff_unique_abbr": "UCAS;DXF;;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "TqIDmoIzLT", "title": "CleanCoNLL: A Nearly Noise-Free Named Entity Recognition Dataset", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The CoNLL-03 corpus is arguably the most well-known and utilized benchmark dataset for named entity recognition (NER). However, prior works found significant numbers of annotation errors, incompleteness, and inconsistencies in the data. This poses challenges to objectively comparing NER approaches and analyzing their errors, as current state-of-the-art models achieve F1-scores that are comparable to or even exceed the estimated noise level in CoNLL-03. To address this issue, we present a comprehensive relabeling effort assisted by automatic consistency checking that corrects 7.0% of all labels in the English CoNLL-03. Our effort adds a layer of entity linking annotation both for better explainability of NER labels and as additional safeguard of annotation quality. Our experimental evaluation finds not only that state-of-the-art approaches reach significantly higher F1-scores (97.1%) on our data, but crucially that the share of correct predictions falsely counted as errors due to annotation noise drops from 47% to 6%. This indicates that our resource is well suited to analyze the remaining errors made by state-of-the-art models, and that the theoretical upper bound even on high resource, coarse-grained NER is not yet reached.\nTo facilitate such analysis, we make CleanCoNLL publicly available to the research community.", "keywords": "Dataset Relabeling and Evaluation;Label Error Detection and Correction;Named Entity Recognition;CoNLL-03;Entity Linking", "primary_area": "", "supplementary_material": "", "author": "Susanna R\u00fccker;Alan Akbik", "authorids": "~Susanna_R\u00fccker1;~Alan_Akbik2", "gender": "F;M", "homepage": ";https://alanakbik.github.io/", "dblp": "264/9489;127/0198", "google_scholar": "https://scholar.google.de/citations?hl=de;adKmg3IAAAAJ", "or_profile": "~Susanna_R\u00fccker1;~Alan_Akbik2", "aff": "Humboldt-Universit\u00e4t zu Berlin;Humboldt Universit\u00e4t Berlin", "aff_domain": "hu-berlin.de;hu-berlin.de", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nr{\\\"u}cker2023cleanconll,\ntitle={CleanCo{NLL}: A Nearly Noise-Free Named Entity Recognition Dataset},\nauthor={Susanna R{\\\"u}cker and Alan Akbik},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=TqIDmoIzLT}\n}", "github": "", "project": "", "reviewers": "NcfC;xzrG;L4cT", "site": "https://openreview.net/forum?id=TqIDmoIzLT", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;3", "excitement": "4;3;4", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0005-4685-6669;", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Humboldt University of Berlin", "aff_unique_dep": "", "aff_unique_url": "https://www.hu-berlin.de", "aff_unique_abbr": "HU Berlin", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berlin", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "TtQfZwf5s5", "title": "MT2: Towards a Multi-Task Machine Translation Model with Translation-Specific In-Context Learning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Sentence-level translation, document-level translation, translation memory, and terminology constrained translation play an important role in machine translation. Most of the previous work uses separate models or methods to solve these tasks, which is not conducive to knowledge transfer of different tasks and increases the complexity of system construction. In this work, we explore the potential of pre-trained language model in machine translation tasks and propose a Multi-Task Machine Translation (MT2) model to integrate these translation tasks. We design a novel translation-specific In-Context Learning (ICL) paradigm for model training, in which all of the translation tasks can be modeled as context-learning tasks that integrate contextual information for performance improvement. Specifically, we propose a retrieval and alignment method to obtain a large scale context-enhancement training data, then we train the model in an in-context learning manner. Furthermore, we adopt two context-dependent training strategies to encourage the model to better understand and utilize contextual information for translation.\nExtensive experiments on translation memory, terminology constrained translation, document-level translation, and few-shot domain-adaptation tasks demonstrate the superior performance of our model, verifying the effectiveness of our proposed approach.", "keywords": "Machine Translation;In-Context Learning", "primary_area": "", "supplementary_material": "", "author": "Chunyou Li;Mingtong Liu;Hongxiao Zhang;Yufeng Chen;Jinan Xu;Ming Zhou", "authorids": "~Chunyou_Li1;~Mingtong_Liu1;~Hongxiao_Zhang1;~Yufeng_Chen1;~Jinan_Xu1;~Ming_Zhou5", "gender": ";M;F;F;M;M", "homepage": ";https://www.researchgate.net/profile/Mingtong-Liu;https://github.com/zzzxiaohong;;;http://faculty.bjtu.edu.cn/8300/", "dblp": ";;;64/5715;;67/3124", "google_scholar": ";;STZ6nF4AAAAJ;;a0w5c0gAAAAJ;wMuW0W4AAAAJ", "or_profile": "~Chunyou_Li1;~Mingtong_Liu1;~Hongxiao_Zhang1;~Yufeng_Chen1;~Ming_Zhou5;~Xu_Jinan1", "aff": "Beijing Jiaotong University;Sinovation Ventures;Beijing Jiaotong University;Beijing jiaotong univercity;Sinovation Ventures;Beijing Jiaotong University", "aff_domain": "bjtu.edu.cn;chuangxin.com;bjtu.edu.cn;bjtu.edu.cn;chuangxin.com;bjtu.edu.cn", "position": "MS student;Researcher;MS student;Assistant Professor;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nli2023mt,\ntitle={{MT}2: Towards a Multi-Task Machine Translation Model with Translation-Specific In-Context Learning},\nauthor={Chunyou Li and Mingtong Liu and Hongxiao Zhang and Yufeng Chen and Jinan Xu and Ming Zhou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=TtQfZwf5s5}\n}", "github": "", "project": "", "reviewers": "qFRK;7odN;1ZMP;afzn", "site": "https://openreview.net/forum?id=TtQfZwf5s5", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "3;4;4;4", "excitement": "4;3;4;4", "reproducibility": "4;3;4;4", "correctness": "4;3;4;4", "rating_avg": 5.0, "confidence_avg": 3.75, "excitement_avg": 3.75, "reproducibility_avg": 3.75, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-3139-1812;;;;;", "linkedin": ";;;;;jinan-xu-3544b137/", "aff_unique_index": "0;1;0;0;1;0", "aff_unique_norm": "Beijing Jiao Tong University;Sinovation Ventures", "aff_unique_dep": ";", "aff_unique_url": "http://www.njtu.edu.cn/en;https://www.sinovationventures.com", "aff_unique_abbr": "BJTU;Sinovation Ventures", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "TubO0kgAeL", "title": "This is not a Dataset: A Large Negation Benchmark to Challenge Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Although large language models (LLMs) have apparently acquired a certain level of grammatical knowledge and the ability to make generalizations, they fail to interpret negation, a crucial step in Natural Language Processing. We try to clarify the reasons for the sub-optimal performance of LLMs understanding negation. We introduce a large semi-automatically generated dataset of circa 400,000 descriptive sentences about commonsense knowledge that can be true or false in which negation is present in about 2/3 of the corpus in different forms. We have used our dataset with the largest available open LLMs in a zero-shot approach to grasp their generalization and inference capability and we have also fine-tuned some of the models to assess whether the understanding of negation can be trained. Our findings show that, while LLMs are proficient at classifying affirmative sentences, they struggle with negative sentences and lack a deep understanding of negation, often relying on superficial cues. Although fine-tuning the models on negative sentences improves their performance, the lack of generalization in handling negation is persistent, highlighting the ongoing challenges of LLMs regarding negation understanding and generalization. The dataset and code are publicly available.", "keywords": "negation;dataset;LLM;commonsense;evaluation;foundation models;WordNet;real-word knowledge;Large Language models", "primary_area": "", "supplementary_material": "", "author": "Iker Garc\u00eda-Ferrero;Bego\u00f1a Altuna;Javier Alvez;Itziar Gonzalez-Dios;German Rigau", "authorids": "~Iker_Garc\u00eda-Ferrero1;~Bego\u00f1a_Altuna1;~Javier_Alvez1;~Itziar_Gonzalez-Dios1;~German_Rigau2", "gender": "M;M;F;M;", "homepage": "https://ikergarcia1996.github.io/Iker-Garcia-Ferrero/;https://adimen.si.ehu.es/~javier/;http://www.ixa.eus/node/66?language=en;https://adimen.si.ehu.es/~rigau/;", "dblp": "305/9880;39/543.html;127/1203;66/1456.html;", "google_scholar": "https://scholar.google.es/citations?user=yoOzj1MAAAAJ;TY9BSA4AAAAJ;WVHDcW4AAAAJ;3RHckhYAAAAJ;", "or_profile": "~Iker_Garc\u00eda-Ferrero1;~Javier_Alvez1;~Itziar_Gonzalez-Dios1;~German_Rigau2;~Bego\u00f1a_Altuna2", "aff": "University of Pennsylvania;University of the Basque Country UPV/EHU;Universidad del Pa\u00eds Vasco;Universidad del Pa\u00eds Vasco;Universidad del Pa\u00eds Vasco", "aff_domain": "upenn.edu;ehu.eus;ehu.eus;ehu.eus;ehu.eus", "position": "PhD student;Associate Professor;Assistant Professor;Associate Professor;Postdoc", "bibtex": "@inproceedings{\ngarc{\\'\\i}a-ferrero2023this,\ntitle={This is not a Dataset: A Large Negation Benchmark to Challenge Large Language Models},\nauthor={Iker Garc{\\'\\i}a-Ferrero and Bego{\\~n}a Altuna and Javier Alvez and Itziar Gonzalez-Dios and German Rigau},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=TubO0kgAeL}\n}", "github": "", "project": "", "reviewers": "rrGn;jGqB;rCcw", "site": "https://openreview.net/forum?id=TubO0kgAeL", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;3;5", "excitement": "4;4;4", "reproducibility": "5;4;5", "correctness": "5;3;4", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 4.0, "reproducibility_avg": 4.666666666666667, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-9612-7134;0000-0001-8413-0854;0000-0003-1048-5403;0000-0003-1119-0930;0000-0002-4027-2014", "linkedin": "iker-garc%C3%ADa-ferrero-75343b172/;;itziar-gonzalez-dios/;german-rigau-a4ba3a173/;", "aff_unique_index": "0;1;2;2;2", "aff_unique_norm": "University of Pennsylvania;University of the Basque Country;Universidad del Pa\u00eds Vasco", "aff_unique_dep": ";;", "aff_unique_url": "https://www.upenn.edu;https://www.ehu.eus/en;https://www.ehu.eus/en", "aff_unique_abbr": "UPenn;UPV/EHU;UPV/EHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "United States;Spain" }, { "id": "TvTwz12BZN", "title": "Segmented Recurrent Transformer: An Efficient Sequence-to-Sequence Model", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Transformers have shown dominant performance across a range of domains including language and vision. However, their computational cost grows quadratically with the sequence length, making their usage prohibitive for resource-constrained applications. To counter this, our approach is to divide the whole sequence into segments and apply attention to the individual segments. We propose a segmented recurrent transformer (SRformer) that combines segmented (local) attention with recurrent attention. The loss caused by reducing the attention window length is compensated by aggregating information across segments with recurrent attention. SRformer leverages Recurrent Accumulate-and-Fire (RAF) neurons' inherent memory to update the cumulative product of keys and values. The segmented attention and lightweight RAF neurons ensure the efficiency of the proposed transformer. Such an approach leads to models with sequential processing capability at a lower computation/memory cost. We apply the proposed method to T5 and BART transformers. The modified models are tested on summarization datasets including CNN-dailymail, XSUM, ArXiv, and MediaSUM. Notably, using segmented inputs of varied sizes, the proposed model achieves 6-22% higher ROUGE1 scores than a segmented transformer and outperforms other recurrent transformer approaches. Furthermore, compared to full attention, the proposed model reduces the computational complexity of cross attention by around 40%.", "keywords": "abstractive summarization;transformers;language models", "primary_area": "", "supplementary_material": "", "author": "Yinghan Long;Sayeed Shafayet Chowdhury;Kaushik Roy", "authorids": "~Yinghan_Long1;~Sayeed_Shafayet_Chowdhury3;~Kaushik_Roy1", "gender": "F;M;M", "homepage": ";;https://engineering.purdue.edu/NRL/Group", "dblp": "249/5914;;r/KaushikRoy", "google_scholar": "xTDv8W0AAAAJ;646ndV4AAAAJ;to4P8KgAAAAJ", "or_profile": "~Yinghan_Long1;~Sayeed_Shafayet_Chowdhury3;~Kaushik_Roy1", "aff": "Purdue University;Purdue University;Purdue University", "aff_domain": "purdue.edu;purdue.edu;purdue.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nlong2023segmented,\ntitle={Segmented Recurrent Transformer: An Efficient Sequence-to-Sequence Model},\nauthor={Yinghan Long and Sayeed Shafayet Chowdhury and Kaushik Roy},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=TvTwz12BZN}\n}", "github": "", "project": "", "reviewers": "qsxJ;yFAe;f7Z1", "site": "https://openreview.net/forum?id=TvTwz12BZN", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "3;3;3", "reproducibility": "4;3;3", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "yinghan-long-9949b2179/;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Purdue University", "aff_unique_dep": "", "aff_unique_url": "https://www.purdue.edu", "aff_unique_abbr": "Purdue", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "TxEV8D0z0r", "title": "trlX: A Framework for Large Scale Reinforcement Learning from Human Feedback", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Reinforcement learning from human feedback (\\textbf{RLHF}) utilizes human feedback to better align large language models with human preferences via online optimization against a learned reward model. Current RLHF paradigms rely on Proximal Policy Optimization (\\textbf{PPO}), which quickly becomes a challenge to implement and scale up to large architectures. To address this difficulty we present the \\textbf{AutoRLHF} library as a feature complete open-source framework for RLHF fine-tuning of models up to and exceeding 70 billion parameters. To do so we implement support for multiple types of distributed training including distributed data parallel, model sharded, as well as tensor, sequential, and pipeline parallelism. Additionally, we implement compute and memory saving features, giving AutoRLHF the flexibility to support users with a wide range of compute resources. This includes offline RL methods like Implicit Language Q Learning (\\textbf{ILQL}) as a compute efficient alternative to PPO. We find offline fine-tuning offers competitive performance relative to online algorithms while being easier to implement, train, and scale. To evaluate our framework we train RLHF models on two separate well-known tasks using publicly available human preference data. Models trained with AutoRLHF achieve preference win-rates over baselines at rates comparable to the original works.", "keywords": "RLHF;LLM;Framework", "primary_area": "", "supplementary_material": "", "author": "Alexander Havrilla;Maksym Zhuravinskyi;Duy Van Phung;Aman Tiwari;Jonathan Tow;Stella Biderman;Quentin Gregory Anthony;Louis Castricato", "authorids": "~Alexander_Havrilla2;~Maksym_Zhuravinskyi1;~Duy_Van_Phung1;~Aman_Tiwari1;~Jonathan_Tow1;~Stella_Biderman1;~Quentin_Gregory_Anthony1;~Louis_Castricato3", "gender": "M;M;M;Non-Binary;M;F;M;M", "homepage": "https://dahoas.github.io/;https://morphed.space;https://phungvanduy.github.io/;https://carper.ai;;http://www.stellabiderman.com;https://quentin-anthony.github.io/;http://louiscatricato.com", "dblp": ";362/8201;278/1784.html;;;239/5641;;", "google_scholar": ";BLXPkDEAAAAJ;0dVSWTkAAAAJ;;;bO7H0DAAAAAJ;https://scholar.google.com/citations?hl=en;WrUnrz4AAAAJ", "or_profile": "~Alexander_Havrilla2;~Maksym_Zhuravinskyi1;~Duy_Van_Phung1;~Aman_Tiwari1;~Jonathan_Tow1;~Stella_Biderman1;~Quentin_Gregory_Anthony1;~Louis_Castricato3", "aff": "Georgia Institute of Technology;Stability AI;;Carper;;Booz Allen Hamilton;Ohio State University, Columbus;Brown University", "aff_domain": "gatech.edu;stability.ai;;carper.ai;;boozallen.com;osu.edu;brown.edu", "position": "PhD student;Researcher;;Researcher;;Industry researcher;PhD student;PhD student", "bibtex": "@inproceedings{\nhavrilla2023trlx,\ntitle={trlX: A Framework for Large Scale Reinforcement Learning from Human Feedback},\nauthor={Alexander Havrilla and Maksym Zhuravinskyi and Duy Van Phung and Aman Tiwari and Jonathan Tow and Stella Biderman and Quentin Gregory Anthony and Louis Castricato},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=TxEV8D0z0r}\n}", "github": "", "project": "", "reviewers": "AE8J;A6on;jdVB", "site": "https://openreview.net/forum?id=TxEV8D0z0r", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;2;4", "excitement": "4;4;4", "reproducibility": "4;3;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0001-8228-1042;0000-0002-6823-9080;", "linkedin": ";;duy-phung-ai/;;;stellabiderman;quentin-anthony;", "aff_unique_index": "0;1;2;3;4;5", "aff_unique_norm": "Georgia Institute of Technology;Stability AI;Carper;Booz Allen Hamilton;Ohio State University;Brown University", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.gatech.edu;https://stability.ai;;https://www.boozallen.com;https://www.osu.edu;https://www.brown.edu", "aff_unique_abbr": "Georgia Tech;Stability AI;;BAH;OSU;Brown", "aff_campus_unique_index": "1", "aff_campus_unique": ";Columbus", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States;" }, { "id": "U1rj4p5aKa", "title": "Automatic Pronunciation Assessment - A Review", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Pronunciation assessment and its application in computer-aided pronunciation training (CAPT) have seen impressive progress in recent years. With the rapid growth in language processing and deep learning over the past few years, there is a need for an updated review. In this paper, we review methods employed in pronunciation assessment for both phonemic and prosodic. We categorize the main challenges observed in prominent research trends, and highlight existing limitations, and available resources. This is followed by a discussion of the remaining challenges and possible directions for future work.", "keywords": "computer aided pronunciation training (CAPT);pronunciation assessment;second language learning;pronunciation error detection", "primary_area": "", "supplementary_material": "", "author": "Yassine El Kheir;Ahmed Ali;Shammur Absar Chowdhury", "authorids": "~Yassine_El_Kheir1;~Ahmed_Ali1;~Shammur_Absar_Chowdhury1", "gender": "M;M;F", "homepage": "https://yaselley.github.io/;https://www.hbku.edu.qa/en/staff/dr-ahmed-ali;http://shammur.one", "dblp": ";22/1217;140/2718", "google_scholar": "KATQHwgAAAAJ;t0gYEjAAAAAJ;LkSfdoAAAAAJ", "or_profile": "~Yassine_El_Kheir1;~Ahmed_Ali1;~Shammur_Absar_Chowdhury1", "aff": "KTH Royal Institute of Technology;Qatar Computing Research Institute;Qatar Computing Research Institute", "aff_domain": "kth.se;qcri.com;qcri.com", "position": "MS student;Principal Engineer;Scientist", "bibtex": "@inproceedings{\nkheir2023automatic,\ntitle={Automatic Pronunciation Assessment - A Review},\nauthor={Yassine El Kheir and Ahmed Ali and Shammur Absar Chowdhury},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=U1rj4p5aKa}\n}", "github": "", "project": "", "reviewers": "hbGh;NoL5;YAq7", "site": "https://openreview.net/forum?id=U1rj4p5aKa", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;2;3", "excitement": "3;2;4", "reproducibility": "", "correctness": "4;3;4", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-1331-2543", "linkedin": ";;shammurchowdhury/", "aff_unique_index": "0;1;1", "aff_unique_norm": "KTH Royal Institute of Technology;Qatar Computing Research Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.kth.se;https://www.qcri.org", "aff_unique_abbr": "KTH;QCRI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Sweden;Qatar" }, { "id": "U6SEUS76IE", "title": "FedID: Federated Interactive Distillation for Large-Scale Pretraining Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The growing concerns and regulations surrounding the protection of user data privacy have necessitated decentralized training paradigms. To this end, federated learning (FL) is widely studied in user-related natural language processing (NLP). However, it suffers from several critical limitations including extensive communication overhead, inability to handle heterogeneity, and vulnerability to white-box inference attacks. Federated distillation (FD) is proposed to alleviate these limitations, but its performance is faded by confirmation bias. To tackle this issue, we propose Federated Interactive Distillation (FedID), which utilizes a small amount of labeled data retained by the server to further rectify the local models during knowledge transfer. Additionally, based on the GLUE benchmark, we develop a benchmarking framework across multiple tasks with diverse data distributions to contribute to the research of FD in NLP community. Experiments show that our proposed FedID framework achieves the best results in homogeneous and heterogeneous federated scenarios. The code for this paper is available at: https://github.com/maxinge8698/FedID.", "keywords": "decentralized learning;federated learning;federated distillation;pre-trained language model", "primary_area": "", "supplementary_material": "", "author": "Xinge Ma;Jiangming Liu;Jin Wang;Xuejie Zhang", "authorids": "~Xinge_Ma1;~Jiangming_Liu1;~Jin_Wang7;~Xuejie_Zhang1", "gender": "M;M;M;M", "homepage": "https://github.com/maxinge8698;https://leoncrashcode.github.io/;http://www.ise.ynu.edu.cn/teacher/973;", "dblp": "297/8939;154/8222;92/1375-8;68/3522-2.html", "google_scholar": "d116clgAAAAJ;8kOZVRsAAAAJ;ZVBB1eAAAAAJ;", "or_profile": "~Xinge_Ma1;~Jiangming_Liu1;~Jin_Wang7;~Xuejie_Zhang1", "aff": "Yunnan University;Tencent;Yunnan University;Yunnan University", "aff_domain": "ynu.edu.cn;tencent.com;ynu.edu.cn;ynu.edu.cn", "position": "MS student;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nma2023fedid,\ntitle={Fed{ID}: Federated Interactive Distillation for Large-Scale Pretraining Language Models},\nauthor={Xinge Ma and Jiangming Liu and Jin Wang and Xuejie Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=U6SEUS76IE}\n}", "github": "", "project": "", "reviewers": "VAtG;aUk1;kBJp", "site": "https://openreview.net/forum?id=U6SEUS76IE", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "4;4;3", "reproducibility": "4;4;3", "correctness": "5;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-4416-3840;;;", "linkedin": ";;;", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Yunnan University;Tencent", "aff_unique_dep": ";Tencent Holdings Limited", "aff_unique_url": "http://www.ynu.edu.cn;https://www.tencent.com", "aff_unique_abbr": "YNU;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "U78nBY8hRi", "title": "DALE: Generative Data Augmentation for Low-Resource Legal NLP", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We present DALE, a novel and effective generative Data Augmentation framework for low-resource LEgal NLP. DALE addresses the challenges existing frameworks pose in generating effective data augmentations of legal documents - legal language, with its specialized vocabulary and complex semantics, morphology, and syntax, does not benefit from data augmentations that merely rephrase the source sentence. To address this, DALE, built on an Encoder-Decoder Language Model, is pre-trained on a novel unsupervised text denoising objective based on selective masking - our masking strategy exploits the domain-specific language characteristics of templatized legal documents to mask collocated spans of text. Denoising these spans help DALE acquire broad legal knowledge and develop the ability to generate coherent and diverse augmentations with novel contexts. Finally, DALE performs conditional generation to generate synthetic augmentations for low-resource Legal NLP tasks. We demonstrate the effectiveness of DALE on 13 datasets spanning 6 tasks and 4 low-resource settings. DALE outperforms all our baselines, including LLMs, qualitatively and quantitatively, with absolute improvements of 1%-50%.", "keywords": "legal;low-resource;augmentation;generation;efficient", "primary_area": "", "supplementary_material": "", "author": "Sreyan Ghosh;Chandra Kiran Reddy Evuru;Sonal Kumar;Ramaneswaran S;S Sakshi;Utkarsh Tyagi;Dinesh Manocha", "authorids": "~Sreyan_Ghosh1;~Chandra_Kiran_Reddy_Evuru1;~Sonal_Kumar1;~Ramaneswaran_S1;~S_Sakshi1;~Utkarsh_Tyagi1;~Dinesh_Manocha3", "gender": "M;M;M;M;F;M;M", "homepage": "https://sreyan88.github.io/;;https://sonalkum.github.io;;https://sakshi113.github.io/;https://utkarsh4430.github.io;https://www.cs.umd.edu/people/dmanocha", "dblp": "173/5626;355/1221;;;;286/2046;m/DineshManocha", "google_scholar": "5HKZJHAAAAAJ;;jiJ2DcEAAAAJ;YIhHxbwAAAAJ;F_-YNVAAAAAJ;https://scholar.google.co.in/citations?user=RLjKaTwAAAAJ;X08l_4IAAAAJ", "or_profile": "~Sreyan_Ghosh1;~Chandra_Kiran_Reddy_Evuru1;~Sonal_Kumar1;~Ramaneswaran_S1;~S_Sakshi1;~Utkarsh_Tyagi1;~Dinesh_Manocha3", "aff": "University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;NVIDIA;;University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "umd.edu;umd.edu;umd.edu;nvidia.com;;umd.edu;umd.edu", "position": "PhD student;MS student;PhD student;Researcher;;MS student;Professor", "bibtex": "@inproceedings{\nghosh2023dale,\ntitle={{DALE}: Generative Data Augmentation for Low-Resource Legal {NLP}},\nauthor={Sreyan Ghosh and Chandra Kiran Reddy Evuru and Sonal Kumar and Ramaneswaran S and S Sakshi and Utkarsh Tyagi and Dinesh Manocha},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=U78nBY8hRi}\n}", "github": "", "project": "", "reviewers": "DwnS;9xzW;fzBD", "site": "https://openreview.net/forum?id=U78nBY8hRi", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;5", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;0000-0001-7047-9801", "linkedin": ";ckevuru/;realsonalkumar/;;sakshi113/;utkarsh4430/;dinesh-manocha-2311846", "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "University of Maryland;NVIDIA", "aff_unique_dep": ";NVIDIA Corporation", "aff_unique_url": "https://www/umd.edu;https://www.nvidia.com", "aff_unique_abbr": "UMD;NVIDIA", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "U7mWHBoTfb", "title": "Improving Language Models\u2019 Meaning Understanding and Consistency by Learning Conceptual Roles from Dictionary", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The non-humanlike behaviour of contemporary pre-trained language models (PLMs) is a leading cause undermining their trustworthiness. \nA striking phenomenon of such faulty behaviours is the generation of inconsistent predictions, which produces logically contradictory results, such as generating different predictions for texts delivering the same meaning or violating logical properties. Previous studies exploited data augmentation or implemented specialised loss functions to alleviate the issue. However, their usage is limited, because they consume expensive training resources for large-sized PLMs and can only handle a certain consistency type. To this end, we propose a practical approach that alleviates the inconsistent behaviour issue by fundamentally improving PLMs' meaning awareness. Based on the conceptual role theory, our method allows PLMs to capture accurate meaning by learning precise interrelationships between concepts from word-definition pairs in a dictionary. Next, we propose an efficient parameter integration technique that updates only a few additional parameters to combine the learned interrelationship with PLMs' pre-trained knowledge. Our experimental results reveal that the approach can concurrently improve multiple types of consistency, enables efficient knowledge integration, and easily applies to other languages.", "keywords": "Language Model;Consistency;Conceptual Role Theory", "primary_area": "", "supplementary_material": "", "author": "Myeongjun Erik Jang;Thomas Lukasiewicz", "authorids": "~Myeongjun_Erik_Jang1;~Thomas_Lukasiewicz2", "gender": ";M", "homepage": "https://www.cs.ox.ac.uk/people/thomas.lukasiewicz/;", "dblp": "l/ThomasLukasiewicz;215/3446", "google_scholar": "arjucpEAAAAJ;https://scholar.google.co.kr/citations?user=yqQVRNIAAAAJ", "or_profile": "~Thomas_Lukasiewicz2;~Myeongjun_Jang1", "aff": "Department of Computer Science, University of Oxford;University of Oxford", "aff_domain": "cs.ox.ac.uk;ox.ac.uk", "position": "Full Professor;PhD student", "bibtex": "@inproceedings{\njang2023improving,\ntitle={Improving Language Models{\\textquoteright} Meaning Understanding and Consistency by Learning Conceptual Roles from Dictionary},\nauthor={Myeongjun Erik Jang and Thomas Lukasiewicz},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=U7mWHBoTfb}\n}", "github": "", "project": "", "reviewers": "zTcT;DNHZ;37Br", "site": "https://openreview.net/forum?id=U7mWHBoTfb", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "4;4;2", "reproducibility": "3;3;5", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";mj-jang/", "aff_unique_index": "0;0", "aff_unique_norm": "University of Oxford", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.ox.ac.uk", "aff_unique_abbr": "Oxford", "aff_campus_unique_index": "0", "aff_campus_unique": "Oxford;", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "U8PL5FzvrV", "title": "Improving Dialogue Discourse Parsing via Reply-to Structures of Addressee Recognition", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Dialogue discourse parsing aims to reflect the relation-based structure of dialogue by establishing discourse links according to discourse relations. To alleviate data sparsity, previous studies have adopted multitasking approaches to jointly learn dialogue discourse parsing with related tasks (e.g., reading comprehension) that require additional human annotation, thus limiting their generality. In this paper, we propose a multitasking framework that integrates dialogue discourse parsing with its neighboring task addressee recognition. Addressee recognition reveals the reply-to structure that partially overlaps with the relation-based structure, which can be exploited to facilitate relation-based structure learning. To this end, we first proposed a reinforcement learning agent to identify training examples from addressee recognition that are most helpful for dialog discourse parsing. Then, a task-aware structure transformer is designed to capture the shared and private dialogue structure of different tasks, thereby further promoting dialogue discourse parsing. Experimental results on both the Molweni and STAC datasets show that our proposed method can outperform the SOTA baselines. The code will be available at https://github.com/yxfanSuda/RLTST.", "keywords": "Dialogue Discourse Parsing;Reinforcement Learning;Task-aware Structure Transformer", "primary_area": "", "supplementary_material": "", "author": "Yaxin FAN;Feng Jiang;PEIFENG LI;Fang Kong;Qiaoming Zhu", "authorids": "~Yaxin_FAN2;~Feng_Jiang4;~PEIFENG_LI2;~Fang_Kong1;~Qiaoming_Zhu1", "gender": "M;M;M;F;M", "homepage": "https://fanyaxin.top/;;http://web.suda.edu.cn/pfli/;;https://scst.suda.edu.cn/0f/a2/c11250a528290/page.htm", "dblp": "234/9447;75/1693-7;00/1996.html;48/7676-1.html;28/1279", "google_scholar": "N0oiLQwAAAAJ;zrxpiWYAAAAJ;NY3GrVIAAAAJ;;6BXGJK8AAAAJ", "or_profile": "~Yaxin_FAN2;~Feng_Jiang4;~PEIFENG_LI2;~Fang_Kong1;~Qiaoming_Zhu1", "aff": "Soochow University;The Chinese University of Hong Kong, Shenzhen;Soochow University, China;Soochow University;Soochow University", "aff_domain": "suda.edu.cn;cuhk.edu.cn;suda.edu.cn;suda.edu.cn;suda.edu.cn", "position": "PhD student;Postdoc;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nfan2023improving,\ntitle={Improving Dialogue Discourse Parsing via Reply-to Structures of Addressee Recognition},\nauthor={Yaxin FAN and Feng Jiang and PEIFENG LI and Fang Kong and Qiaoming Zhu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=U8PL5FzvrV}\n}", "github": "", "project": "", "reviewers": "hgfu;AseC;nH36", "site": "https://openreview.net/forum?id=U8PL5FzvrV", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "4;4;3", "reproducibility": "4;4;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-3465-311X;0000-0003-4850-3128;;0000-0002-2708-8976", "linkedin": ";;;;", "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Soochow University;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.soochow.edu.cn;https://www.cuhk.edu.cn", "aff_unique_abbr": "Soochow U;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "UECSdvL8U7", "title": "Evaluating Bias and Fairness in Gender-Neutral Pretrained Vision-and-Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Pretrained machine learning models are known to perpetuate and even amplify existing biases in data, which can result in unfair outcomes that ultimately impact user experience. Therefore, it is crucial to understand the mechanisms behind those prejudicial biases to ensure that model performance does not result in discriminatory behaviour toward certain groups or populations. In this work, we define gender bias as our case study. We quantify bias amplification in pretraining and after fine-tuning on three families of vision-and-language models. We investigate the connection, if any, between the two learning stages, and evaluate how bias amplification reflects on model performance. Overall, we find that bias amplification in pretraining and after fine-tuning are independent. We then examine the effect of continued pretraining on gender-neutral data, finding that this reduces group disparities, i.e., promotes fairness, on VQAv2 and retrieval tasks without significantly compromising task performance.", "keywords": "bias;fairness;multimodal", "primary_area": "", "supplementary_material": "", "author": "Laura Cabello;Emanuele Bugliarello;Stephanie Brandl;Desmond Elliott", "authorids": "~Laura_Cabello1;~Emanuele_Bugliarello1;~Stephanie_Brandl1;~Desmond_Elliott1", "gender": ";M;F;", "homepage": ";http://e-bug.github.io/;https://stephaniebrandl.github.io;", "dblp": ";241/9497;194/9380;46/7536", "google_scholar": ";9yc1aXYAAAAJ;eCDiVTMAAAAJ;", "or_profile": "~Laura_Cabello1;~Emanuele_Bugliarello1;~Stephanie_Brandl1;~Desmond_Elliott1", "aff": ";University of Copenhagen;K\u00f8benhavns Universitet;University of Copenhagen", "aff_domain": ";ku.dk;di.ku.dk;ku.dk", "position": ";PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\ncabello2023evaluating,\ntitle={Evaluating Bias and Fairness in Gender-Neutral Pretrained Vision-and-Language Models},\nauthor={Laura Cabello and Emanuele Bugliarello and Stephanie Brandl and Desmond Elliott},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=UECSdvL8U7}\n}", "github": "", "project": "", "reviewers": "cgUh;xmLY;HKVB;mmdc", "site": "https://openreview.net/forum?id=UECSdvL8U7", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "3;4;3;4", "excitement": "3;3;4;3", "reproducibility": "4;3;4;4", "correctness": "4;3;4;4", "rating_avg": 5.0, "confidence_avg": 3.5, "excitement_avg": 3.25, "reproducibility_avg": 3.75, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-2999-7081;;", "linkedin": ";emanuelebugliarello/;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Copenhagen", "aff_unique_dep": "", "aff_unique_url": "https://www.ku.dk", "aff_unique_abbr": "UCPH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Denmark" }, { "id": "UEx5dZqXvr", "title": "Scaling Law for Document Neural Machine Translation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The scaling laws of language models have played a significant role in advancing large language models. In order to promote the development of document translation, we systematically examine the scaling laws in this field. In this paper, we carry out an in-depth analysis of the influence of three factors on translation quality: model scale, data scale, and sequence length. Our findings reveal that increasing sequence length effectively enhances model performance when model size is limited. However, sequence length cannot be infinitely extended; it must be suitably aligned with the model scale and corpus volume. Further research shows that providing adequate context can effectively enhance the translation quality of a document's initial portion. Nonetheless, exposure bias remains the primary factor hindering further improvement in translation quality for the latter half of the document.", "keywords": "Document Machine Translation;NMT;Scaling", "primary_area": "", "supplementary_material": "", "author": "Zhang Zhuocheng;Shuhao Gu;Min zhang;Yang Feng", "authorids": "~Zhang_Zhuocheng1;~Shuhao_Gu1;~Min_zhang14;~Yang_Feng4", "gender": "M;M;M;", "homepage": "https://github.com/salvation-z;;https://zhangmin-nlp-ai.github.io/;http://people.ucas.edu.cn/~yangfeng?language=en", "dblp": ";239/5079;83/5342-?;07/6095-4.html", "google_scholar": ";PED7pDIAAAAJ;https://scholar.google.com/citations?;https://scholar.google.com/citations?hl=en", "or_profile": "~Zhang_Zhuocheng1;~Shuhao_Gu1;~Min_zhang14;~Yang_Feng4", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;, Chinese Academy of Sciences;Harbin Institute of Technology;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;ict.ac.cn;hit.edu.cn;ict.ac.cn", "position": "PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhuocheng2023scaling,\ntitle={Scaling Law for Document Neural Machine Translation},\nauthor={Zhang Zhuocheng and Shuhao Gu and Min zhang and Yang Feng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=UEx5dZqXvr}\n}", "github": "", "project": "", "reviewers": "zccM;4ECh;38v3", "site": "https://openreview.net/forum?id=UEx5dZqXvr", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "3;3;3", "reproducibility": "5;4;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-3895-5510;", "linkedin": ";;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Chinese Academy of Sciences;Harbin Institute of Technology", "aff_unique_dep": "Institute of Computing Technology;", "aff_unique_url": "http://www.ict.ac.cn;http://www.hit.edu.cn/", "aff_unique_abbr": "CAS;HIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "UEzKGW4U39", "title": "Isotropy-Enhanced Conditional Masked Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Non-autoregressive models have been widely used for various text generation tasks to accelerate the inference process but at the cost of generation quality to some extent. To achieve a good balance between inference speedup and generation quality, iterative NAR models like CMLM and Disco are proposed. Researchers have made much follow-up progress based on them, and some recent iterative models can achieve very promising performance while maintaining significant speedup. In this paper, we give more insights into iterative NAR models by exploring the anisotropic problem, i.e., the representations of distinct predicted target tokens are similar and indiscriminative. Upon the confirmation of the anisotropic problem in iterative NAR models, we first analyze the effectiveness of the contrastive learning method and further propose the Look Neighbors strategy to enhance the learning of token representations during training. Experiments on 4 WMT datasets show that our methods consistently improve the performance as well as alleviate the anisotropic problem of the conditional masked language model, even outperforming the current SoTA result on WMT14 EN $\\rightarrow$ DE.", "keywords": "Non-autoregressive;Anisotropic Problem;Neural Machine Translation;Natural Language Processing", "primary_area": "", "supplementary_material": "", "author": "Pei Guo;yisheng xiao;Juntao Li;Yixin Ji;Min Zhang", "authorids": "~Pei_Guo3;~yisheng_xiao1;~Juntao_Li2;~Yixin_Ji2;~Min_Zhang9", "gender": ";M;M;M;M", "homepage": "https://github.com/AllForward;https://github.com/xysnlp;https://lijuntaopku.github.io/;https://github.com/Dereck0602;https://zhangmin-nlp-ai.github.io/", "dblp": "180/8507;318/9066;;;83/5342-5", "google_scholar": "https://scholar.google.com.hk/citations?user=d_B96V0AAAAJ;https://scholar.google.com.hk/citations?user=WS2_XwoAAAAJ;sZSygsYAAAAJ;I3UQhtIAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Pei_Guo3;~yisheng_xiao1;~Juntao_Li2;~Yixin_Ji2;~Min_Zhang9", "aff": "Soochow University;Suzhou University;Soochow University, China;Soochow University;Harbin Institute of Technology, Shenzhen", "aff_domain": "suda.edu.cn;suda.edu.cn;suda.edu.cn;suda.edu.cn;hit.edu.cn", "position": "MS student;PhD student;Associate Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nguo2023isotropyenhanced,\ntitle={Isotropy-Enhanced Conditional Masked Language Models},\nauthor={Pei Guo and yisheng xiao and Juntao Li and Yixin Ji and Min Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=UEzKGW4U39}\n}", "github": "", "project": "", "reviewers": "Nvxc;JroP;azMj;ptyC", "site": "https://openreview.net/forum?id=UEzKGW4U39", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "5;4;5;4", "excitement": "3;3;4;3", "reproducibility": "4;3;2;4", "correctness": "3;3;3;3", "rating_avg": 3.0, "confidence_avg": 4.5, "excitement_avg": 3.25, "reproducibility_avg": 3.25, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-6286-7529;;", "linkedin": ";;;;", "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Soochow University;Suzhou University;Harbin Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.soochow.edu.cn;https://www.suda.edu.cn;http://en.hhit.edu.cn/", "aff_unique_abbr": "Soochow U;Suda;HIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "UGd9eSwsvn", "title": "Zero-Shot Data Maps. Efficient Dataset Cartography Without Model Training", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Data Maps (Swayamdipta, et al. 2020) have emerged as a powerful tool for diagnosing large annotated datasets. Given a model fitted on a dataset, these maps show each data instance from the dataset in a 2-dimensional space defined by a) the model's confidence in the true class and b) the variability of this confidence. In previous work, confidence and variability are usually computed using training dynamics, which requires the fitting of a strong model to the dataset. In this paper, we introduce a novel approach: Zero-Shot Data Maps based on fast bi-encoder networks. For each data point, confidence on the true label and variability are computed over the members of an ensemble of zero-shot models constructed with different --- but semantically equivalent --- label descriptions, i.e., textual representations of each class in a given label space.\n We conduct a comparative analysis of maps compiled using traditional training dynamics and our proposed zero-shot models across various datasets. Our findings reveal that Zero-Shot Data Maps generally match those produced by the traditional method while delivering up to a 14x speedup. The code is available [here](https://github.com/symanto-research/zeroshot-cartography).", "keywords": "data maps;bi-encoders;zero-shot;training dynamics", "primary_area": "", "supplementary_material": "", "author": "Angelo Basile;Marc Franco-Salvador;Paolo Rosso", "authorids": "~Angelo_Basile1;~Marc_Franco-Salvador1;~Paolo_Rosso1", "gender": ";M;M", "homepage": ";;http://personales.upv.es/prosso/", "dblp": ";127/1231;05/3463", "google_scholar": ";tjhy5T8AAAAJ;https://scholar.google.es/citations?user=HFKXPH8AAAAJ", "or_profile": "~Angelo_Basile1;~Marc_Franco-Salvador1;~Paolo_Rosso1", "aff": ";Symanto;Universitat Polit\u00e8cnica de Val\u00e8ncia", "aff_domain": ";symanto.com;upv.es", "position": ";Chief Scientific Offier;Full Professor", "bibtex": "@inproceedings{\nbasile2023zeroshot,\ntitle={Zero-Shot Data Maps. Efficient Dataset Cartography Without Model Training},\nauthor={Angelo Basile and Marc Franco-Salvador and Paolo Rosso},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=UGd9eSwsvn}\n}", "github": "", "project": "", "reviewers": "b4Jj;t41N;FBi8;CCvW", "site": "https://openreview.net/forum?id=UGd9eSwsvn", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;3;3;3", "excitement": "4;2;2;4", "reproducibility": "4;3;4;5", "correctness": "5;2;2;4", "rating_avg": 3.0, "confidence_avg": 3.25, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-8922-1242", "linkedin": ";marfrasa/?locale=en_US;paolo-rosso-753b1016/?originalSubdomain=es", "aff_unique_index": "0;1", "aff_unique_norm": "Symanto;Universitat Polit\u00e8cnica de Val\u00e8ncia", "aff_unique_dep": ";", "aff_unique_url": "https://www.symanto.com;https://www.upv.es", "aff_unique_abbr": ";UPV", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Germany;Spain" }, { "id": "UIIi9hBNW8", "title": "\"You Are An Expert Linguistic Annotator\": Limits of LLMs as Analyzers of Abstract Meaning Representation", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Large language models (LLMs) demonstrate an amazing proficiency and fluency in the $\\textit{use}$ of language. Does that mean that they have also acquired insightful linguistic knowledge $\\textit{about}$ the language, to an extent that they can serve as an \"expert linguistic annotator\"? In this paper, we examine the successes and limitations of the GPT-3, ChatGPT, and GPT-4 models, focusing on the Abstract Meaning Representation (AMR) parsing formalism (Banarescu et al., 2013), which provides rich graphical representations of sentence meaning structure while abstracting away from surface forms. We compare models' analysis of this semantic structure across two settings: 1) direct production of AMR parses based on zero- and few-shot examples, and 2) indirect partial reconstruction of AMR via metalinguistic natural language queries (e.g., ``Identify the primary event of this sentence, and the predicate corresponding to that event.'').\nAcross these settings, we find that models can reliably reproduce the basic format of AMR, as well as some core event, argument, and modifier structure$-$however, model outputs are prone to frequent and major errors, and holistic analysis of parse acceptability shows that even with few-shot demonstrations, models have virtually 0% success in producing fully accurate parses. Eliciting responses in natural language produces similar patterns of errors. Overall, our findings indicate that these models out-of-the-box can accurately identify some core aspects of semantic structure, but there remain key limitations in their ability to support fully accurate semantic analyses or parses.", "keywords": "semantic structure;AMR;linguistic annotation;LLMs;few-shot;zero-shot", "primary_area": "", "supplementary_material": "", "author": "Allyson Ettinger;Jena D. Hwang;Valentina Pyatkin;Chandra Bhagavatula;Yejin Choi", "authorids": "~Allyson_Ettinger1;~Jena_D._Hwang1;~Valentina_Pyatkin1;~Chandra_Bhagavatula1;~Yejin_Choi1", "gender": "F;F;;M;F", "homepage": "https://aetting.github.io;https://jenahwang.github.io/;;https://www.chandrab.page;https://yejinc.github.io/", "dblp": "165/0758;83/10905;;151/3093;89/579-1", "google_scholar": ";9QuMhLgAAAAJ;;AsgHp14AAAAJ;vhP-tlcAAAAJ", "or_profile": "~Allyson_Ettinger1;~Jena_D._Hwang1;~Valentina_Pyatkin1;~Chandra_Bhagavatula1;~Yejin_Choi1", "aff": "University of Chicago;Allen Institute for Artificial Intelligence;;Allen Institute for Artificial Intelligence;Department of Computer Science, University of Washington", "aff_domain": "uchicago.edu;allenai.org;;allenai.org;cs.washington.edu", "position": "Assistant Professor;Researcher;;Researcher;Full Professor", "bibtex": "@inproceedings{\nettinger2023you,\ntitle={''You Are An Expert Linguistic Annotator'': Limits of {LLM}s as Analyzers of Abstract Meaning Representation},\nauthor={Allyson Ettinger and Jena D. Hwang and Valentina Pyatkin and Chandra Bhagavatula and Yejin Choi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=UIIi9hBNW8}\n}", "github": "", "project": "", "reviewers": "4W3E;xh4S;ZiDs", "site": "https://openreview.net/forum?id=UIIi9hBNW8", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "3;3;3", "reproducibility": "0;3;3", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 2.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;;", "aff_unique_index": "0;1;1;2", "aff_unique_norm": "University of Chicago;Allen Institute for Artificial Intelligence;University of Washington", "aff_unique_dep": ";;Department of Computer Science", "aff_unique_url": "https://www.uchicago.edu;https://allenai.org;https://www.washington.edu", "aff_unique_abbr": "UChicago;AI2;UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "UMywlqrW3n", "title": "Getting MoRE out of Mixture of Language Model Reasoning Experts", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "While recent large language models (LLMs) improve on various question answering (QA) datasets, it remains difficult for a single model to generalize across question types that require distinct reasoning abilities. We provide empirical evidence that state-of-the-art LLMs suffer from poor generalizability on reasoning types beyond those seen in the prompt. To remedy this, we propose a Mixture-of-Reasoning-Experts (MORE) framework that ensembles diverse specialized language models. We specialize the backbone language model with prompts optimized for different reasoning categories, including factual, multihop, mathematical, and commonsense reasoning. Our key insight is to leverage agreement among the specialized experts to select the best answer for each question, or to abstain from answering. This gives MORE higher accuracy than any single specialized model on a collection of 12 QA datasets from four reasoning types. Beyond generalizability, the interpretable design of MORE improves selective question answering results compared to baselines without incorporating inter-expert agreement. This framework is also more interpretable and useful to human consumers of QA outputs. Our human study confirms that presenting expert predictions and the answer selection process helps annotators more accurately calibrate when to trust the system\u2019s output. We release all code and data to facilitate future work.", "keywords": "Large Language Models;Reasoning;Prompting;Generalization;Calibration;Interpretability", "primary_area": "", "supplementary_material": "", "author": "Chenglei Si;Weijia Shi;Chen Zhao;Luke Zettlemoyer;Jordan Lee Boyd-Graber", "authorids": "~Chenglei_Si1;~Weijia_Shi1;~Chen_Zhao2;~Luke_Zettlemoyer1;~Jordan_Lee_Boyd-Graber1", "gender": "M;;M;M;M", "homepage": "https://noviscl.github.io/;https://weijiashi.notion.site/;http://umiacs.umd.edu/~chenz/;https://www.cs.washington.edu/people/faculty/lsz/;http://boydgraber.org", "dblp": "251/8778;132/80601;81/3-9;21/6793;57/5950", "google_scholar": "https://scholar.google.com.sg/citations?user=CyKr1q8AAAAJ;https://scholar.google.com/citations?hl=en;zehsvT8AAAAJ;https://scholar.google.com.tw/citations?user=UjpbO6IAAAAJ;BT4XTP4AAAAJ", "or_profile": "~Chenglei_Si1;~Weijia_Shi1;~Chen_Zhao2;~Luke_Zettlemoyer1;~Jordan_Lee_Boyd-Graber1", "aff": "Stanford University;University of Washington, Seattle;New York University;Meta;University of Maryland, College Park", "aff_domain": "stanford.edu;uw.edu;nyu.edu;meta.com;umd.edu", "position": "PhD student;PhD student;Postdoc;Researcher;Associate Professor", "bibtex": "@inproceedings{\nsi2023getting,\ntitle={Getting Mo{RE} out of Mixture of Language Model Reasoning Experts},\nauthor={Chenglei Si and Weijia Shi and Chen Zhao and Luke Zettlemoyer and Jordan Lee Boyd-Graber},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=UMywlqrW3n}\n}", "github": "", "project": "", "reviewers": "MmuJ;G4bF;azTT", "site": "https://openreview.net/forum?id=UMywlqrW3n", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;3;3", "reproducibility": "4;2;3", "correctness": "4;3;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-3200-0000-0011;;;0000-0002-7770-4431", "linkedin": ";weijia-shi-773768112;;luke-zettlemoyer-a0109b226/;jordan-boyd-graber-99a83994", "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Stanford University;University of Washington;New York University;Meta;University of Maryland", "aff_unique_dep": ";;;Meta Platforms, Inc.;", "aff_unique_url": "https://www.stanford.edu;https://www.washington.edu;https://www.nyu.edu;https://meta.com;https://www/umd.edu", "aff_unique_abbr": "Stanford;UW;NYU;Meta;UMD", "aff_campus_unique_index": "0;1;3", "aff_campus_unique": "Stanford;Seattle;;College Park", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "UNFR2Y6Xx0", "title": "IDTraffickers: An Authorship Attribution Dataset to link and connect Potential Human-Trafficking Operations on Text Escort Advertisements", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Human trafficking (HT) is a pervasive global issue affecting vulnerable individuals, violating their fundamental human rights. Investigations reveal that a significant number of HT cases are associated with online advertisements (ads), particularly in escort markets. Consequently, identifying and connecting HT vendors has become increasingly challenging for Law Enforcement Agencies (LEAs). To address this issue, we introduce IDTraffickers, an extensive dataset consisting of 87,595 text ads and 5,244 vendor labels to enable the verification and identification of potential HT vendors on online escort markets. To establish a benchmark for authorship identification, we train a DeCLUTR-small model, achieving a macro-F1 score of 0.8656 in a closed-set classification environment. Next, we leverage the style representations extracted from the trained classifier to conduct authorship verification, resulting in a mean r-precision score of 0.8852 in an open-set ranking environment. Finally, to encourage further research and ensure responsible data sharing, we plan to release IDTraffickers for the authorship attribution task to researchers under specific conditions, considering the sensitive nature of the data. We believe that the availability of our dataset and benchmarks will empower future researchers to utilize our findings, thereby facilitating the effective linkage of escort ads and the development of more robust approaches for identifying HT indicators.", "keywords": "Human Trafficking;Authorship Attribution;Natural Language Processing;Dataset;Benchmarks", "primary_area": "", "supplementary_material": "", "author": "Vageesh Kumar Saxena;Benjamin Ashpole;Gijs van Dijck;Gerasimos Spanakis", "authorids": "~Vageesh_Kumar_Saxena1;~Benjamin_Ashpole1;~Gijs_van_Dijck1;~Gerasimos_Spanakis1", "gender": ";M;;M", "homepage": ";;https://www.maastrichtuniversity.nl/gijs.vandijck;https://dke.maastrichtuniversity.nl/jerry.spanakis", "dblp": ";;210/4978;43/7739", "google_scholar": "fVLj_hsAAAAJ;;U0IUUBMAAAAJ;https://scholar.google.gr/citations?user=LiUXYVgAAAAJ", "or_profile": "~Vageesh_Kumar_Saxena1;~Benjamin_Ashpole1;~Gijs_van_Dijck1;~Gerasimos_Spanakis1", "aff": "Maastricht University ;;Maastricht University;Maastricht University", "aff_domain": "maastrichtuniversity.nl;;maastrichtuniversity.nl;maastrichtuniversity.nl", "position": "PhD student;;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nsaxena2023idtraffickers,\ntitle={{IDT}raffickers: An Authorship Attribution Dataset to link and connect Potential Human-Trafficking Operations on Text Escort Advertisements},\nauthor={Vageesh Kumar Saxena and Benjamin Ashpole and Gijs van Dijck and Gerasimos Spanakis},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=UNFR2Y6Xx0}\n}", "github": "", "project": "", "reviewers": "iA59;buUC;fd99", "site": "https://openreview.net/forum?id=UNFR2Y6Xx0", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;4", "excitement": "4;4;4", "reproducibility": "4;3;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-4102-4415;", "linkedin": ";https://linkedin.com/in/bashpole;gijs-van-dijck-b4245138/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Maastricht University", "aff_unique_dep": "", "aff_unique_url": "https://www.maastrichtuniversity.nl", "aff_unique_abbr": "MU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Netherlands" }, { "id": "UNvLur0th4", "title": "Finding Common Ground: Annotating and Predicting Common Ground in Spoken Conversations", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "When we communicate with other humans, we do not simply generate a sequence of words. Rather, we use our cognitive state (beliefs, desires, intentions) and our model of the audience\u2019s cognitive state to create utterances that affect the audience\u2019s cognitive state in the intended manner. An important part of cognitive state is the common ground, which is the content the speaker believes, and the speaker believes the audience believes, and so on. While much attention has been paid to common ground in cognitive science, there has not been much work in natural language processing. In this paper, we introduce a new annotation and corpus to capture common ground. We then describe some initial experiments extracting propositions from dialog and tracking their status in the common ground from the perspective of each speaker.", "keywords": "Common Ground;Belief Extraction;Corpus Construction;Cognitive state;T5 Language Model", "primary_area": "", "supplementary_material": "", "author": "Magdalena Markowska;Mohammad Taghizadeh;Adil Soubki;Seyed Abolghasem Mirroshandel;Owen Rambow", "authorids": "~Magdalena_Markowska1;~Mohammad_Taghizadeh1;~Adil_Soubki1;~Seyed_Abolghasem_Mirroshandel2;~Owen_Rambow3", "gender": "F;M;;M;M", "homepage": "https://m-markowska.github.io;https://www.youtube.com/@MohammadTaghizadeh;https://nlp.rip;http://owenrambow.com;https://nlp.guilan.ac.ir/mirroshandel/index.html", "dblp": "257/1537;;;55/1330;43/3889", "google_scholar": "ydyFN3EAAAAJ;;;https://scholar.google.com/scholar?hl=en;WGH3eIsAAAAJ", "or_profile": "~Magdalena_Markowska1;~Mohammad_Taghizadeh1;~Adil_Soubki1;~Owen_Rambow3;~seyedabolghasem_mirroshandel1", "aff": "State University of New York at Stony Brook;Daneshjooyar (Persian online educational academy);State University of New York at Stony Brook;Stony Brook University;State University of New York at Stony Brook", "aff_domain": "stonybrook.edu;daneshjooyar.com;cs.stonybrook.edu;stonybrook.edu;stonybrook.edu", "position": "PhD student;Instructor;PhD student;Full Professor;Researcher", "bibtex": "@inproceedings{\nmarkowska2023finding,\ntitle={Finding Common Ground: Annotating and Predicting Common Ground in Spoken Conversations},\nauthor={Magdalena Markowska and Mohammad Taghizadeh and Adil Soubki and Seyed Abolghasem Mirroshandel and Owen Rambow},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=UNvLur0th4}\n}", "github": "", "project": "", "reviewers": "vhRH;R4T2;Vosj;tfoe", "site": "https://openreview.net/forum?id=UNvLur0th4", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "2;4;3;4", "excitement": "3;3;4;3", "reproducibility": "4;3;3;4", "correctness": "4;3;3;3", "rating_avg": 4.0, "confidence_avg": 3.25, "excitement_avg": 3.25, "reproducibility_avg": 3.5, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0001-8853-9112", "linkedin": "magdalena-markowska-497a8b252/;mtaghizadeh/;;;seyed-abolghasem-mirroshandel-1a3a5950", "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "State University of New York at Stony Brook;Daneshjooyar;Stony Brook University", "aff_unique_dep": ";Persian online educational academy;", "aff_unique_url": "https://www.stonybrook.edu;;https://www.stonybrook.edu", "aff_unique_abbr": "SUNY Stony Brook;;SBU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stony Brook;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "United States;Iran" }, { "id": "UQpbq4v8Xi", "title": "Generating Data for Symbolic Language with Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "While large language models (LLMs) bring not only performance but also complexity, recent work has started to turn LLMs into data generators rather than task inferencers, where another affordable task model is trained for efficient deployment and inference.\nHowever, such an approach has primarily been applied to natural language tasks, and has not yet been explored for symbolic language tasks with complex structured outputs (e.g., semantic parsing and code generation).\nIn this paper, we propose SymGen which utilizes LLMs\nfor generating various annotation-expensive symbolic language data. \nSymGen consists of an informative prompt to steer generation and an agreement-based verifier to improve data correctness. \nWe conduct extensive experiments on six symbolic language tasks across various settings.\nCompared with the LLMs, we demonstrate the 1%-sized task model can achieve comparable or better performance, largely cutting inference and deployment costs. \nWe also show that generated data with only a few human demonstrations can be as effective as over 10 times the amount of human-annotated data when training the task model, saving a considerable amount of annotation effort.\nSymGen takes a step toward data generation for annotation-expensive complex tasks, and we release the code at URL.", "keywords": "Large Language Model;Data Generation;Symbolic Language;Code Generation", "primary_area": "", "supplementary_material": "", "author": "Jiacheng Ye;Chengzu Li;Lingpeng Kong;Tao Yu", "authorids": "~Jiacheng_Ye2;~Chengzu_Li1;~Lingpeng_Kong1;~Tao_Yu5", "gender": "M;;M;M", "homepage": "https://jiacheng-ye.github.io/;;https://ikekonglp.github.io/;https://taoyds.github.io/", "dblp": ";311/5577;144/7656;67/1014-9", "google_scholar": "gh0CyxgAAAAJ;https://scholar.google.com/citations?hl=en;f1hBi5wAAAAJ;5_Fn5CIAAAAJ", "or_profile": "~Jiacheng_Ye2;~Chengzu_Li1;~Lingpeng_Kong1;~Tao_Yu5", "aff": "University of Hong Kong;University of Cambridge;Department of Computer Science, The University of Hong Kong;The University of Hong Kong", "aff_domain": "hku.hk;cam.ac.uk;cs.hku.hk;hku.hk", "position": "PhD student;MS student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nye2023generating,\ntitle={Generating Data for Symbolic Language with Large Language Models},\nauthor={Jiacheng Ye and Chengzu Li and Lingpeng Kong and Tao Yu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=UQpbq4v8Xi}\n}", "github": "", "project": "", "reviewers": "9jXq;BFwA;GgSV", "site": "https://openreview.net/forum?id=UQpbq4v8Xi", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;4;4", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;tao-yu-b9b551a5/", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Hong Kong;University of Cambridge", "aff_unique_dep": ";", "aff_unique_url": "https://www.hku.hk;https://www.cam.ac.uk", "aff_unique_abbr": "HKU;Cambridge", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "Hong Kong SAR;Cambridge", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;United Kingdom" }, { "id": "UTMwcLMhso", "title": "Increasing Probability Mass on Answer Choices Does Not Always Improve Accuracy", "track": "main", "status": "Long Main", "tldr": "", "abstract": "When pretrained language models (LMs) are applied to discriminative tasks such as multiple-choice questions, they place probability mass on vocabulary tokens that aren't among the given answer choices. Spreading probability mass across multiple surface forms with identical meaning (such as \"bath\" and \"bathtub\") is thought to cause an underestimation of a model's true performance, referred to as the \"surface form competition\" (SFC) hypothesis. This has motivated the introduction of various probability normalization methods. However, many core questions remain unanswered. How do we measure SFC? Are there direct ways of reducing it, and does doing so improve task performance?\n\nWe propose a mathematical formalism for SFC which allows us to quantify and bound its impact for the first time. We identify a simple method for reducing it---namely, increasing probability mass on the given answer choices by a) including them in the prompt and b) using in-context learning with even just one example. We show this method eliminates the impact of SFC in the majority of instances. Our experiments on three diverse datasets and six LMs reveal several additional surprising findings. For example, both normalization and prompting methods for reducing SFC can be ineffective or even detrimental to task performance for some LMs. We conclude with practical insights for effectively prompting LMs for multiple-choice tasks.", "keywords": "analysis of language models; probability; surface form competition; multiple-choice tasks; text generation; few-shot prompting", "primary_area": "", "supplementary_material": "", "author": "Sarah Wiegreffe;Matthew Finlayson;Oyvind Tafjord;Peter Clark;Ashish Sabharwal", "authorids": "~Sarah_Wiegreffe1;~Matthew_Finlayson1;~Oyvind_Tafjord2;~Peter_Clark1;~Ashish_Sabharwal1", "gender": ";M;M;M;M", "homepage": ";https://mattf1n.github.io;;https://allenai.org/team/peterc;", "dblp": ";55/3614;178/8640;34/1184;13/154", "google_scholar": ";_ODwk4EAAAAJ;https://scholar.google.com/citations?hl=en;o-5vyEsAAAAJ;7VspfeAAAAAJ", "or_profile": "~Sarah_Wiegreffe1;~Matthew_Finlayson1;~Oyvind_Tafjord2;~Peter_Clark1;~Ashish_Sabharwal1", "aff": ";Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;Allen Institute for AI", "aff_domain": ";allenai.org;allenai.org;allenai.org;allenai.org", "position": ";Researcher;Researcher;Senior Research Manager;Principal Researcher", "bibtex": "@inproceedings{\nwiegreffe2023increasing,\ntitle={Increasing Probability Mass on Answer Choices Does Not Always Improve Accuracy},\nauthor={Sarah Wiegreffe and Matthew Finlayson and Oyvind Tafjord and Peter Clark and Ashish Sabharwal},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=UTMwcLMhso}\n}", "github": "", "project": "", "reviewers": "YTXm;owkT;3J8R;oE9K", "site": "https://openreview.net/forum?id=UTMwcLMhso", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;4;3;2", "excitement": "4;3;4;3", "reproducibility": "4;4;4;4", "correctness": "4;4;4;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.5, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-4190-5618;;", "linkedin": ";;;peter-clark-a8b556/;ashish-sabharwal-82a2b661", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Allen Institute for Artificial Intelligence;Allen Institute for AI", "aff_unique_dep": ";", "aff_unique_url": "https://allenai.org;https://allenai.org", "aff_unique_abbr": "AI2;AI2", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "UVoA0rALMC", "title": "Few-shot Unified Question Answering: Tuning Models or Prompts?", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Question-answering (QA) tasks often investigate specific question types, knowledge domains, or reasoning skills, leading to specialized models catering to specific categories of QA tasks. While recent research has explored the idea of unified QA models, such models are usually explored for high-resource scenarios and require re-training to extend their capabilities. To overcome these drawbacks, the paper explores the potential of two paradigms of tuning, model, and prompts, for unified QA under a low-resource setting. The paper provides an exhaustive analysis of their applicability using 16 QA datasets, revealing that prompt tuning can perform as well as model tuning in a few-shot setting with a good initialization. The study also shows that parameter-sharing results in superior few-shot performance, simple knowledge transfer techniques for prompt initialization can be effective, and prompt tuning achieves a significant performance boost from pre-training in a low-resource regime. The research offers insights into the advantages and limitations of prompt tuning for unified QA in a few-shot setting, contributing to the development of effective and efficient systems in low-resource scenarios.", "keywords": "unified question answering;QA;universal QA;paramter efficient QA;model tuning . prompt tuning", "primary_area": "", "supplementary_material": "", "author": "Srijan Bansal;Semih Yavuz;Bo Pang;Meghana Moorthy Bhat;Yingbo Zhou", "authorids": "~Srijan_Bansal1;~Semih_Yavuz1;~Bo_Pang4;~Meghana_Moorthy_Bhat1;~Yingbo_Zhou1", "gender": "M;F;;M;M", "homepage": ";https://github.com/meghu2791;;;", "dblp": ";234/8670;72/8614;16/6344;", "google_scholar": "q-r7dUAAAAAJ;xX1fYq0AAAAJ;H_6RQ7oAAAAJ;s9fNEVEAAAAJ;krh3p8AAAAAJ", "or_profile": "~Srijan_Bansal1;~Meghana_Moorthy_Bhat1;~Yingbo_Zhou1;~Bo_Pang1;~Semih_Yavuz2", "aff": "Carnegie Mellon University;Salesforce Research;Salesforce Research;University of California, Los Angeles;SalesForce.com", "aff_domain": "cmu.edu;salesforce.com;salesforce.com;ucla.edu;salesforce.com", "position": "MS student;Researcher;Research Scientist;PhD student;Research Scientist", "bibtex": "@inproceedings{\nbansal2023fewshot,\ntitle={Few-shot Unified Question Answering: Tuning Models or Prompts?},\nauthor={Srijan Bansal and Semih Yavuz and Bo Pang and Meghana Moorthy Bhat and Yingbo Zhou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=UVoA0rALMC}\n}", "github": "", "project": "", "reviewers": "Y5aV;6o8a;WoB3", "site": "https://openreview.net/forum?id=UVoA0rALMC", "pdf_size": 0, "rating": "2;2;2", "confidence": "3;3;4", "excitement": "3;4;2", "reproducibility": "4;4;3", "correctness": "3;5;2", "rating_avg": 2.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;yingbozhou/;;semih-yavuz-4303518b", "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "Carnegie Mellon University;Salesforce;University of California, Los Angeles", "aff_unique_dep": ";Salesforce Research;", "aff_unique_url": "https://www.cmu.edu;https://research.salesforce.com;https://www.ucla.edu", "aff_unique_abbr": "CMU;Salesforce;UCLA", "aff_campus_unique_index": "1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "UXSqUOMwbE", "title": "QA-NatVer: Question Answering for Natural Logic-based Fact Verification", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Fact verification systems assess a claim's veracity based on evidence. An important consideration in designing them is faithfulness, i.e. generating explanations that accurately reflect the reasoning of the model. Recent works have focused on natural logic, which operates directly on natural language by capturing the semantic relation of spans between an aligned claim with its evidence via set-theoretic operators. However, these approaches rely on substantial resources for training, which are only available for high-resource languages. To this end, we propose to use question answering to predict natural logic operators, taking advantage of the generalization capabilities of instruction-tuned language models. Thus, we obviate the need for annotated training data while still relying on a deterministic inference system. In a few-shot setting on FEVER, our approach outperforms the best baseline by 4.3 accuracy points, including a state-of-the-art pre-trained seq2seq natural logic system, as well as a state-of-the-art prompt-based classifier. Our system demonstrates its robustness and portability, achieving competitive performance on a counterfactual dataset and surpassing all approaches without further annotation on a Danish verification dataset. A human evaluation indicates that our approach produces more plausible proofs with fewer erroneous natural logic operators than previous natural logic-based systems.", "keywords": "fact-checking;fact extraction and verification;claim verification;natural logic;natural language inference;faithfulness", "primary_area": "", "supplementary_material": "", "author": "Rami Aly;Marek Strong;Andreas Vlachos", "authorids": "~Rami_Aly1;~Marek_Strong1;~Andreas_Vlachos1", "gender": "M;M;M", "homepage": ";https://marekstrong.github.io/;http://andreasvlachos.github.io/", "dblp": "242/8351.html;359/3900.html;18/1071-1", "google_scholar": "dbzGY5YAAAAJ;3TCIX-UAAAAJ;https://scholar.google.es/citations?user=XjWnyM4AAAAJ", "or_profile": "~Rami_Aly1;~Marek_Strong1;~Andreas_Vlachos1", "aff": "Amazon (AWS);University of Cambridge;University of Cambridge", "aff_domain": "amazon.com;cam.ac.uk;cam.ac.uk", "position": "Intern;PhD student;Full Professor", "bibtex": "@inproceedings{\naly2023qanatver,\ntitle={{QA}-NatVer: Question Answering for Natural Logic-based Fact Verification},\nauthor={Rami Aly and Marek Strong and Andreas Vlachos},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=UXSqUOMwbE}\n}", "github": "", "project": "", "reviewers": "5v7q;Esj4;5F8N", "site": "https://openreview.net/forum?id=UXSqUOMwbE", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;4;3", "reproducibility": "3;3;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0008-0277-6175;0000-0003-2123-5071", "linkedin": ";marekstrong/;andreas-vlachos-70ab391", "aff_unique_index": "0;1;1", "aff_unique_norm": "Amazon;University of Cambridge", "aff_unique_dep": "Amazon Web Services;", "aff_unique_url": "https://aws.amazon.com;https://www.cam.ac.uk", "aff_unique_abbr": "AWS;Cambridge", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "UaZe4SwQF2", "title": "Gender Biases in Automatic Evaluation Metrics for Image Captioning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Model-based evaluation metrics (e.g., CLIPScore and GPTScore) have demonstrated decent correlations with human judgments in various language generation tasks. However, their impact on fairness remains largely unexplored. It is widely recognized that pretrained models can inadvertently encode societal biases, thus employing these models for evaluation purposes may inadvertently perpetuate and amplify biases. For example, an evaluation metric may favor the caption \u201ca woman is calculating an account book\u201d over \u201ca man is calculating an account book,\u201d even if the image only shows male accountants. In this paper, we conduct a systematic study of gender biases in model-based automatic evaluation metrics for image captioning tasks. We start by curating a dataset comprising profession, activity, and object concepts associated with stereotypical gender associations. Then, we demonstrate the negative consequences of using these biased metrics, including the inability to differentiate between biased and unbiased generations, as well as the propagation of biases to generation models through reinforcement learning. Finally, we present a simple and effective way to mitigate the metric bias without hurting the correlations with human judgments. Our dataset and framework lay the foundation for understanding the potential harm of model-based evaluation metrics, and facilitate future works to develop more inclusive evaluation metrics.", "keywords": "Automatic Evaluation Metrics;Fairness", "primary_area": "", "supplementary_material": "", "author": "Haoyi Qiu;Zi-Yi Dou;Tianlu Wang;Asli Celikyilmaz;Nanyun Peng", "authorids": "~Haoyi_Qiu1;~Zi-Yi_Dou1;~Tianlu_Wang1;~Asli_Celikyilmaz1;~Nanyun_Peng1", "gender": "F;;F;F;F", "homepage": "https://haoyiq114.github.io/;https://zdou0830.github.io/;https://tianlu-wang.github.io/;https://asli.us;https://violetpeng.github.io/", "dblp": "348/5711;205/8985;185/5529;15/3724;117/4036", "google_scholar": "https://scholar.google.com/citations?hl=en;RWogNsEAAAAJ;inzQqX8AAAAJ;https://scholar.google.com/citations?hl=en;XxRXvX0AAAAJ", "or_profile": "~Haoyi_Qiu1;~Zi-Yi_Dou1;~Tianlu_Wang1;~Asli_Celikyilmaz1;~Nanyun_Peng1", "aff": "UCLA Computer Science Department, University of California, Los Angeles;University of California, Los Angeles;Meta;FAIR ;University of California, Los Angeles", "aff_domain": "cs.ucla.edu;ucla.edu;meta.com;meta.com;ucla.edu", "position": "MS student;PhD student;Researcher;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nqiu2023gender,\ntitle={Gender Biases in Automatic Evaluation Metrics for Image Captioning},\nauthor={Haoyi Qiu and Zi-Yi Dou and Tianlu Wang and Asli Celikyilmaz and Nanyun Peng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=UaZe4SwQF2}\n}", "github": "", "project": "", "reviewers": "FLYJ;KXSc;g9rh", "site": "https://openreview.net/forum?id=UaZe4SwQF2", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;2;4", "excitement": "4;3;3", "reproducibility": "4;3;4", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;aslicelikyilmaz/;", "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "University of California, Los Angeles;Meta", "aff_unique_dep": "Computer Science Department;Meta Platforms, Inc.", "aff_unique_url": "https://www.ucla.edu;https://meta.com", "aff_unique_abbr": "UCLA;Meta", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "Ud2UQ9ZCep", "title": "Open Domain Multi-document Summarization: A Comprehensive Study of Model Brittleness under Retrieval", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Multi-document summarization (MDS) assumes a set of topic-related documents are provided as input. In practice, this document set is not always available; it would need to be retrieved given an information need, i.e. a question or topic statement, a setting we dub \"open-domain\" MDS. We study this more challenging setting by formalizing the task and bootstrapping it using existing datasets, retrievers and summarizers. Via extensive automatic and human evaluation, we determine: (1) state-of-the-art summarizers suffer large reductions in performance when applied to open-domain MDS, (2) additional training in the open-domain setting can reduce this sensitivity to imperfect retrieval, and (3) summarizers are insensitive to the retrieval of duplicate documents and the order of retrieved documents, but highly sensitive to other errors, like the retrieval of irrelevant documents. Based on our results, we provide practical guidelines to enable future work on open-domain MDS, e.g. how to choose the number of retrieved documents to summarize. Our results suggest that new retrieval and summarization methods and annotated resources for training and evaluation are necessary for further progress in the open-domain setting.", "keywords": "summarization;multi-document summarization;retrieval;open-domain", "primary_area": "", "supplementary_material": "", "author": "John Michael Giorgi;Luca Soldaini;BO WANG;Gary D. Bader;Kyle Lo;Lucy Lu Wang;Arman Cohan", "authorids": "~John_Michael_Giorgi1;~Luca_Soldaini1;~BO_WANG11;~Gary_D._Bader1;~Kyle_Lo1;~Lucy_Lu_Wang1;~Arman_Cohan1", "gender": "M;Non-Binary;M;;F;M;M", "homepage": "https://github.com/JohnGiorgi;https://soldaini.net;https://wanglab.ai/;https://kyleclo.github.io/;https://llwang.net/;http://www.armancohan.com;https://baderlab.org/", "dblp": ";160/1741;;220/2020;220/2575;160/1727;35/2796", "google_scholar": "TNFEhK4AAAAJ;3KPvwcgAAAAJ;37FDILIAAAAJ;VJS12uMAAAAJ;REBtJOYAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.ca/citations?user=22M9eisAAAAJ", "or_profile": "~John_Michael_Giorgi1;~Luca_Soldaini1;~BO_WANG11;~Kyle_Lo1;~Lucy_Lu_Wang1;~Arman_Cohan1;~Gary_Bader1", "aff": "Toronto University;Allen Institute for Artificial Intelligence;Vector Institute;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;University of Toronto", "aff_domain": "utoronto.ca;allenai.org;vectorinstitute.ai;allenai.org;allenai.org;allenai.org;utoronto.ca", "position": "PhD student;Researcher;Assistant Professor;Researcher;Researcher;Research Scientist;Full Professor", "bibtex": "@inproceedings{\ngiorgi2023open,\ntitle={Open Domain Multi-document Summarization: A Comprehensive Study of Model Brittleness under Retrieval},\nauthor={John Michael Giorgi and Luca Soldaini and BO WANG and Gary D. Bader and Kyle Lo and Lucy Lu Wang and Arman Cohan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Ud2UQ9ZCep}\n}", "github": "", "project": "", "reviewers": "ABNP;GCwU;7Smv", "site": "https://openreview.net/forum?id=Ud2UQ9ZCep", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;2;4", "reproducibility": "3;4;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-9621-5046;0000-0001-6998-9863;;;0000-0001-8752-6635;;0000-0003-0185-8861", "linkedin": "john-giorgi/;soldni/;;kylelo/;lucylw/;;gary-bader-a08673/", "aff_unique_index": "0;1;2;1;1;1;0", "aff_unique_norm": "University of Toronto;Allen Institute for Artificial Intelligence;Vector Institute", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utoronto.ca;https://allenai.org;https://vectorinstitute.ai/", "aff_unique_abbr": "U of T;AI2;Vector Institute", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;1;1;0", "aff_country_unique": "Canada;United States" }, { "id": "Ue9i6qgiCw", "title": "DecipherPref: Analyzing Influential Factors in Human Preference Judgments via GPT-4", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Human preference judgments are pivotal in guiding large language models (LLMs) to produce outputs that align with human values. Human evaluations are also used in summarization tasks to compare outputs from various systems, complementing existing automatic metrics. Despite their significance, however, there has been limited research probing these pairwise or $k$-wise comparisons. The collective impact and relative importance of factors such as output length, informativeness, fluency, and factual consistency are still not well understood. It is also unclear if there are other hidden factors influencing human judgments. In this paper, we conduct an in-depth examination of a collection of pairwise human judgments released by OpenAI. Utilizing the Bradley-Terry-Luce (BTL) model, we reveal the inherent preferences embedded in these human judgments. We find that the most favored factors vary across tasks and genres, whereas the least favored factors tend to be consistent, e.g., outputs are too brief, contain excessive off-focus content or hallucinated facts. Our findings have implications on the construction of balanced datasets in human preference evaluations, which is a crucial step in shaping the behaviors of future LLMs.", "keywords": "Large Language Models;Summarization Evaluation;Human Preference Judgments;GPT-4", "primary_area": "", "supplementary_material": "", "author": "Yebowen Hu;Kaiqiang Song;Sangwoo Cho;Xiaoyang Wang;Hassan Foroosh;Fei Liu", "authorids": "~Yebowen_Hu1;~Kaiqiang_Song2;~Sangwoo_Cho1;~Xiaoyang_Wang1;~Hassan_Foroosh3;~Fei_Liu4", "gender": "M;M;;;F;M", "homepage": "https://yebowenhu.github.io/;http://i2u.world/kqsong/;https://sangwoo3.github.io;https://cil.cs.ucf.edu/;https://www.cs.emory.edu/~fliu40/;https://xyang0.github.io/", "dblp": "348/6096;;75/1848;55/1119;64/1350-4.html;81/1832-1", "google_scholar": "AHHMtKAAAAAJ;PHoJwakAAAAJ;T8mGzuoAAAAJ;vNHN42cAAAAJ;22ohn6AAAAAJ;EeppWmkAAAAJ", "or_profile": "~Yebowen_Hu1;~Kaiqiang_Song2;~Sangwoo_Cho1;~Hassan_Foroosh3;~Fei_Liu4;~Xiaoyang_Wang2", "aff": "University of Central Florida;Tencent AI Lab;Tencent AI Lab;University of Central Florida;Emory University;Tencent AI Lab", "aff_domain": "ucf.edu;tencent.com;tencent.com;ucf.edu;emory.edu;tencent.com", "position": "PhD student;Senior Researcher;Researcher;Full Professor;Associate Professor;Senior Researcher", "bibtex": "@inproceedings{\nhu2023decipherpref,\ntitle={DecipherPref: Analyzing Influential Factors in Human Preference Judgments via {GPT}-4},\nauthor={Yebowen Hu and Kaiqiang Song and Sangwoo Cho and Xiaoyang Wang and Hassan Foroosh and Fei Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Ue9i6qgiCw}\n}", "github": "", "project": "", "reviewers": "ZwsG;gQAc;u9Ct", "site": "https://openreview.net/forum?id=Ue9i6qgiCw", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;4;4", "reproducibility": "4;4;5", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-3798-2437;;0000-0002-4875-2565;;;0000-0002-0746-1059", "linkedin": "huye/;;chosangwoo/;;feiliu01;xiaoyang-wang-001", "aff_unique_index": "0;1;1;0;2;1", "aff_unique_norm": "University of Central Florida;Tencent;Emory University", "aff_unique_dep": ";Tencent AI Lab;", "aff_unique_url": "https://www.ucf.edu;https://ai.tencent.com;https://www.emory.edu", "aff_unique_abbr": "UCF;Tencent AI Lab;Emory", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0;1", "aff_country_unique": "United States;China" }, { "id": "UhuizFH1Hx", "title": "GSAP-NER: A Novel Task, Corpus, and Baseline for Scholarly Entity Extraction Focused on Machine Learning Models and Datasets", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Named Entity Recognition (NER) models play a crucial role in various NLP tasks, including information extraction (IE) and text understanding. In academic writing, references to machine learning models and datasets are fundamental components of various computer science publications and necessitate accurate models for identification. Despite the advancements in NER, existing ground truth datasets do not treat fine-grained types like ML model and model architecture as separate entity types, and consequently, baseline models cannot recognize them as such. In this paper, we release a corpus of 100 manually annotated full-text scientific publications and a first baseline model for 10 entity types centered around ML models and datasets. In order to provide a nuanced understanding of how ML models and datasets are mentioned and utilized, our dataset also contains annotations for informal mentions like \"our BERT-based model\" or \"an image CNN\". You can find the ground truth dataset and code to replicate model training at https://data.gesis.org/gsap/gsap-ner.", "keywords": "Information Extraction (IE);ML models;datasets;Named Entity Recognition (NER)", "primary_area": "", "supplementary_material": "", "author": "Wolfgang Otto;Matth\u00e4us Zloch;Lu Gan;Saurav Karmakar;Stefan Dietze", "authorids": "~Wolfgang_Otto1;~Matth\u00e4us_Zloch1;~Lu_Gan1;~Saurav_Karmakar1;~Stefan_Dietze1", "gender": ";M;F;M;", "homepage": "https://www.gesis.org/en/institute/about-us/staff/person/Wolfgang.Otto;https://www.researchgate.net/profile/Matthaeus-Zloch;https://www.gesis.org/institut/mitarbeitendenverzeichnis/person/Lu.Gan;;", "dblp": "09/1722-2;;45/3353-4.html;53/8965;", "google_scholar": "FHLdUPgAAAAJ;;;https://scholar.google.co.in/citations?user=5THErgsAAAAJ;", "or_profile": "~Wolfgang_Otto1;~Matth\u00e4us_Zloch1;~Lu_Gan1;~Saurav_Karmakar1;~Stefan_Dietze1", "aff": "gesis;GESIS - Leibniz-Institute for the Social Sciences;GESIS \u2013 Leibniz Institute for the Social Sciences;GESIS;", "aff_domain": "gesis.org;gesis.org;gesis.org;gesis.org;", "position": "PhD student;Senior Researcher;Postdoc;Postdoc;", "bibtex": "@inproceedings{\notto2023gsapner,\ntitle={{GSAP}-{NER}: A Novel Task, Corpus, and Baseline for Scholarly Entity Extraction Focused on Machine Learning Models and Datasets},\nauthor={Wolfgang Otto and Matth{\\\"a}us Zloch and Lu Gan and Saurav Karmakar and Stefan Dietze},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=UhuizFH1Hx}\n}", "github": "", "project": "", "reviewers": "CSpT;Z9Pn;RRze;AUcQ", "site": "https://openreview.net/forum?id=UhuizFH1Hx", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;4;4;4", "excitement": "2;3;2;3", "reproducibility": "3;4;3;4", "correctness": "3;4;4;3", "rating_avg": 3.0, "confidence_avg": 3.75, "excitement_avg": 2.5, "reproducibility_avg": 3.5, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-9530-3631;;;0009-0007-0124-5316;", "linkedin": "wolfgang-otto-134020a7/;;;;", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "GESIS - Leibniz-Institut f\u00fcr Sozialwissenschaften;GESIS - Leibniz-Institute for the Social Sciences;Leibniz Institute for the Social Sciences;GESIS - Leibniz-Institut f\u00fcr Sozialwissenschaften e.V.", "aff_unique_dep": ";Social Sciences;Social Sciences;", "aff_unique_url": "https://www.gesis.org/;https://www.gesis.org;https://www.gesis.org;https://www.gesis.org/", "aff_unique_abbr": "GESIS;GESIS;GESIS;GESIS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "id": "UixzK8evk5", "title": "DistillCSE: Distilled Contrastive Learning for Sentence Embeddings", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "This paper proposes the DistillCSE framework, which performs contrastive learning under the self-training paradigm with knowledge distillation. The potential advantage of DistillCSE is its self-enhancing feature: using a base model to provide additional supervision signals, a stronger model may be learned through knowledge distillation. However, the vanilla DistillCSE through the standard implementation of knowledge distillation only achieves marginal improvements. The quantitative analyses demonstrate its reason that the standard knowledge distillation exhibits a relatively large variance of the teacher model's logits due to the essence of contrastive learning. To mitigate the issue induced by high variance, this paper accordingly proposed two simple yet effective solutions for knowledge distillation: a Group-P shuffling strategy as an implicit regularization and the averaging logits from multiple teacher components. Experiments on standard benchmarks demonstrate that the proposed DistillCSE outperforms many strong baseline methods and yields a new state-of-the-art performance.", "keywords": "sentence embedding;self-training;contrastive learning", "primary_area": "", "supplementary_material": "", "author": "Jiahao Xu;Wei Shao;Lihui Chen;Lemao Liu", "authorids": "~Jiahao_Xu1;~Wei_Shao5;~Lihui_Chen1;~Lemao_Liu3", "gender": ";M;Unspecified;M", "homepage": "http://jiahao004.github.io/;;;https://lemaoliu.github.io/homepage/", "dblp": "205/4200-1;;56/1277;41/10887.html", "google_scholar": "FlsBVrIAAAAJ;4o57IEAAAAAJ;;", "or_profile": "~Jiahao_Xu1;~Wei_Shao5;~Lihui_Chen1;~lemao_liu1", "aff": "Nanyang Technological University;City University of Hong Kong;Nanyang Technological University;Tencent", "aff_domain": "ntu.edu.sg;cityu.edu.hk;ntu.edu.sg;tencent.com", "position": "PhD student;PhD student;Associate Professor;Researcher", "bibtex": "@inproceedings{\nxu2023distillcse,\ntitle={Distill{CSE}: Distilled Contrastive Learning for Sentence Embeddings},\nauthor={Jiahao Xu and Wei Shao and Lihui Chen and Lemao Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=UixzK8evk5}\n}", "github": "", "project": "", "reviewers": "f9XB;JpHQ;jiJV", "site": "https://openreview.net/forum?id=UixzK8evk5", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "4;3;3", "reproducibility": "3;1;0", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 1.3333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-6776-8215;;;", "linkedin": ";;;", "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Nanyang Technological University;City University of Hong Kong;Tencent", "aff_unique_dep": ";;Tencent Holdings Limited", "aff_unique_url": "https://www.ntu.edu.sg;https://www.cityu.edu.hk;https://www.tencent.com", "aff_unique_abbr": "NTU;CityU;Tencent", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "Singapore;China" }, { "id": "UjOPUHPoTM", "title": "Blackbird language matrices (BLM), a new task for rule-like generalization in neural networks: Can Large Language Models pass the test?", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "How do we evaluate Large Language Models (LLMs) and determine the aspects and limits of their intelligent behaviour?\n\nIt is currently conjectured that shortcomings of LLMs in multi-linguality and reasoning are due to a lack of ability to generalize.\nIt has been argued that, instead, humans are better at generalization because they have a tendency at extracting rules from complex data. \nWe propose a method to evaluate LLMs ability to rule-based generalization.\n\nWhen exposed to tests of analytic intelligence, for example the visual RAVEN IQ test, human problem-solvers identify the relevant objects in the picture and their relevant attributes and reason based on rules applied to them. Based on the induced rules, they are able to provide a generalisation and a solution to the test.\n\nAn analogous language task has recently been proposed (called BLM) for LLM. \nIn this paper, we argue that we can use this task to investigate what linguistic reasoning LLM develop, by asking them to solve some simple variants of the BLM task.\n \nWe find that current state-of-the-art generative models, such as ChatGPT, can handle the task in the sense that they easily understand the instructions and can provide step-by-step reasoning that shows that it can solve two of the main cognitive hurdles: correspondence finding (object and attribute identification) and item novelty. However, overall they cannot find the correct answer, even with considerable help. In particular, they never identify the structure of the problem, exhibiting, we hypothesize, a lack of goal and subgoal management abilities, an ability that has been argued to measure differential abilities in humans. \n\nWe argue that this finding supports the usefulness of the task as a method to test the limits and specific properties of generalisation ability in Large Language Models, providing an intrinsic evaluation method inspired by tests of human intelligence.", "keywords": "Rule-based learning;generalisation;intrinsic evaluation;cognitive modelling;benchmarking", "primary_area": "", "supplementary_material": "", "author": "Paola Merlo", "authorids": "~Paola_Merlo1", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\nmerlo2023blackbird,\ntitle={Blackbird language matrices ({BLM}), a new task for rule-like generalization in neural networks: Can Large Language Models pass the test?},\nauthor={Paola Merlo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=UjOPUHPoTM}\n}", "github": "", "project": "", "reviewers": "djfD;jZnL;6RnV", "site": "https://openreview.net/forum?id=UjOPUHPoTM", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;2;4", "excitement": "3;3;3", "reproducibility": "4;5;0", "correctness": "2;3;4", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 1, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0 }, { "id": "UlewKJFkUV", "title": "Memorisation Cartography: Mapping out the Memorisation-Generalisation Continuum in Neural Machine Translation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "When training a neural network, it will quickly memorise some source-target mappings from your dataset but never learn some others.\nYet, memorisation is not easily expressed as a binary feature that is good or bad: individual datapoints lie on a memorisation-generalisation continuum.\nWhat determines a datapoint's position on that spectrum, and how does that spectrum influence neural models' performance?\nWe address these two questions for neural machine translation (NMT) models. We use the counterfactual memorisation metric to\n(1) build a resource that places 5M NMT datapoints on a memorisation-generalisation map,\n(2) illustrate how the datapoints' surface-level characteristics and a models' per-datum training signals are predictive of memorisation in NMT,\n(3) and describe the influence that subsets of that map have on NMT systems' performance.", "keywords": "interpretability;neural machine translation;memorization", "primary_area": "", "supplementary_material": "", "author": "Verna Dankers;Ivan Titov;Dieuwke Hupkes", "authorids": "~Verna_Dankers1;~Ivan_Titov1;~Dieuwke_Hupkes1", "gender": "F;;", "homepage": "https://vernadankers.com;http://ivan-titov.org;https://github.com/google/BIG-bench", "dblp": "242/7711;08/5391;184/8838", "google_scholar": "https://scholar.google.nl/citations?hl=en;https://scholar.google.nl/citations?user=FKUc3vsAAAAJ;https://scholar.google.nl/citations?user=tAtSMTcAAAAJ", "or_profile": "~Verna_Dankers1;~Ivan_Titov1;~Dieuwke_Hupkes1", "aff": "University of Edinburgh;University of Amsterdam;Meta Facebook", "aff_domain": "ed.ac.uk;uva.nl;facebook.com", "position": "PhD student;Associate Professor;Research Scientist", "bibtex": "@inproceedings{\ndankers2023memorisation,\ntitle={Memorisation Cartography: Mapping out the Memorisation-Generalisation Continuum in Neural Machine Translation},\nauthor={Verna Dankers and Ivan Titov and Dieuwke Hupkes},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=UlewKJFkUV}\n}", "github": "", "project": "", "reviewers": "5nce;Jciq;pPXn;cDFT", "site": "https://openreview.net/forum?id=UlewKJFkUV", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "4;4;3;3", "excitement": "4;4;3;4", "reproducibility": "3;4;3;3", "correctness": "4;4;4;4", "rating_avg": 5.0, "confidence_avg": 3.5, "excitement_avg": 3.75, "reproducibility_avg": 3.25, "correctness_avg": 4.0, "replies_avg": 13, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "verna-dankers-27396511b/;;", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Edinburgh;University of Amsterdam;Meta", "aff_unique_dep": ";;Meta Platforms, Inc.", "aff_unique_url": "https://www.ed.ac.uk;https://www.uva.nl;https://meta.com", "aff_unique_abbr": "Edinburgh;UvA;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "United Kingdom;Netherlands;United States" }, { "id": "UlgNWOzMz2", "title": "Isotropic Representation Can Improve Zero-Shot Cross-Lingual Transfer on Multilingual Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "With the development of multilingual pre-trained language models (mPLMs), zero-shot cross-lingual transfer shows great potential.\nTo further improve the performance of cross-lingual transfer, many studies have explored representation misalignment caused by morphological differences but neglected the misalignment caused by the anisotropic distribution of contextual representations.\nIn this work, we propose enhanced isotropy and constrained code-switching for zero-shot cross-lingual transfer to alleviate the problem of misalignment caused by the anisotropic representations and maintain syntactic structural knowledge.\nExtensive experiments on three zero-shot cross-lingual transfer tasks demonstrate that our method gains significant improvements over strong mPLM backbones and further improves the state-of-the-art methods.\\footnote{Our code will be available at \\url{https://github.com/Dereck0602/IsoZCL}.}", "keywords": "isotropic representation;cross-lingual;multilingual", "primary_area": "", "supplementary_material": "", "author": "Yixin Ji;Jikai Wang;Juntao Li;Hai Ye;Min Zhang", "authorids": "~Yixin_Ji2;~Jikai_Wang1;~Juntao_Li2;~Hai_Ye2;~Min_Zhang9", "gender": "M;M;M;M;M", "homepage": "https://github.com/Dereck0602;;https://lijuntaopku.github.io/;;https://zhangmin-nlp-ai.github.io/", "dblp": ";;;190/;83/5342-5", "google_scholar": "I3UQhtIAAAAJ;https://scholar.google.com.hk/citations?view_op=list_works;sZSygsYAAAAJ;_dQWEzEAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Yixin_Ji2;~Jikai_Wang1;~Juntao_Li2;~Hai_Ye2;~Min_Zhang9", "aff": "Soochow University;Soochow University;Soochow University, China;National University of Singapore;Harbin Institute of Technology, Shenzhen", "aff_domain": "suda.edu.cn;suda.edu.cn;suda.edu.cn;nus.edu.sg;hit.edu.cn", "position": "PhD student;Undergrad student;Associate Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nji2023isotropic,\ntitle={Isotropic Representation Can Improve Zero-Shot Cross-Lingual Transfer on Multilingual Language Models},\nauthor={Yixin Ji and Jikai Wang and Juntao Li and Hai Ye and Min Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=UlgNWOzMz2}\n}", "github": "", "project": "", "reviewers": "AcYD;7ZXD;u81d", "site": "https://openreview.net/forum?id=UlgNWOzMz2", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "3;4;4", "reproducibility": "4;5;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-0729-5115;0000-0002-6286-7529;;", "linkedin": ";;;;", "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Soochow University;National University of Singapore;Harbin Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.soochow.edu.cn;https://www.nus.edu.sg;http://en.hhit.edu.cn/", "aff_unique_abbr": "Soochow U;NUS;HIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;Singapore" }, { "id": "UmKaHvjkiu", "title": "PreWoMe: Exploiting Presuppositions as Working Memory for Long Form Question Answering", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Information-seeking questions in long-form question answering (LFQA) often prove misleading due to ambiguity or false presupposition in the question. While many existing approaches handle misleading questions, they are tailored to limited questions, which are insufficient in a real-world setting with unpredictable input characteristics. In this work, we propose PreWoMe, a unified approach capable of handling any type of information-seeking question. The key idea of PreWoMe involves extracting presuppositions in the question and exploiting them as working memory to generate feedback and action about the question. Our experiment shows that PreWoMe is effective not only in tackling misleading questions but also in handling normal ones, thereby demonstrating the effectiveness of leveraging presuppositions, feedback, and action for real-world QA settings.", "keywords": "Long-Form QA;Large Language Models;Presuppositions", "primary_area": "", "supplementary_material": "", "author": "Wookje Han;Jinsol Park;Kyungjae Lee", "authorids": "~Wookje_Han1;~Jinsol_Park1;~Kyungjae_Lee2", "gender": "M;F;M", "homepage": "https://wookjehan.github.io/;;https://lkj0509.github.io/", "dblp": "319/4219;;13/7265-2", "google_scholar": "NNS8zUsAAAAJ;22AYRbkAAAAJ;https://scholar.google.co.kr/citations?user=bGeInhoAAAAJ", "or_profile": "~Wookje_Han1;~Jinsol_Park1;~Kyungjae_Lee2", "aff": "Seoul National University;Seoul National University;", "aff_domain": "snu.ac.kr;snu.ac.kr;", "position": "Undergrad student;Undergrad student;", "bibtex": "@inproceedings{\nhan2023prewome,\ntitle={PreWoMe: Exploiting Presuppositions as Working Memory for Long Form Question Answering},\nauthor={Wookje Han and Jinsol Park and Kyungjae Lee},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=UmKaHvjkiu}\n}", "github": "", "project": "", "reviewers": "QSHM;4fGt;M53a", "site": "https://openreview.net/forum?id=UmKaHvjkiu", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;5;4", "excitement": "3;4;3", "reproducibility": "4;5;4", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-0586-3748", "linkedin": "wookje-han-2052691a6;;", "aff_unique_index": "0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "Upk6WrdJYM", "title": "Efficient k-NN Search with Cross-Encoders using Adaptive Multi-Round CUR Decomposition", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Cross-encoder models, which jointly encode and score a query-item pair, are prohibitively expensive for direct k-nearest neighbor (k-NN) search. Consequently, k-NN search typically employs a fast approximate retrieval (e.g. using BM25 or dual-encoder vectors), followed by reranking with a cross-encoder; however, the retrieval approximation often has detrimental recall regret. This problem is tackled by ANNCUR (Yadav et al., 2022), a recent work that employs a cross-encoder only, making search efficient using a relatively small number of anchor items, and a CUR matrix factorization. While ANNCUR\u2019s one-time selection of anchors tends to approximate the cross-encoder distances on average, doing so forfeits the capacity to accurately estimate distances to items near the query, leading to regret in the crucial end-task: recall of top-k items. In this paper, we propose ADACUR, a method that adaptively, iteratively, and efficiently minimizes the approximation error for the practically important top-k neighbors. It does so by iteratively performing k-NN search using the anchors available so far, then adding these retrieved nearest neighbors to the anchor set for the next round. Empirically, on multiple datasets, in comparison to previous traditional and state-of-the-art methods such as ANNCUR and dual-encoder-based retrieve-and-rerank, our proposed approach ADACUR consistently reduces recall error\u2014by up to 70% on the important k = 1 setting\u2014while using no more compute than its competitors.", "keywords": "cross-encoder;nearest neighbor search;k-NN;retrieval", "primary_area": "", "supplementary_material": "", "author": "Nishant Yadav;Nicholas Monath;Manzil Zaheer;Andrew McCallum", "authorids": "~Nishant_Yadav1;~Nicholas_Monath1;~Manzil_Zaheer1;~Andrew_McCallum1", "gender": "M;M;M;M", "homepage": "https://people.cs.umass.edu/~nishantyadav/;https://nmonath.github.io/;https://www.aclweb.org/anthology/people/m/manzil-zaheer/;http://www.cs.umass.edu/~mccallum", "dblp": "230/4155;131/4309;40/10701;m/AndrewMcCallum", "google_scholar": "Korn2JAAAAAJ;PTfhfCQAAAAJ;A33FhJMAAAAJ;yILa1y0AAAAJ", "or_profile": "~Nishant_Yadav1;~Nicholas_Monath1;~Manzil_Zaheer1;~Andrew_McCallum1", "aff": "Department of Computer Science, University of Massachusetts, Amherst;Google;Google DeepMind;University of Massachusetts Amherst", "aff_domain": "cs.umass.edu;google.com;deepmind.com;cs.umass.edu", "position": "PhD student;Researcher;Researcher;Distinguished Professor", "bibtex": "@inproceedings{\nyadav2023efficient,\ntitle={Efficient k-{NN} Search with Cross-Encoders using Adaptive Multi-Round {CUR} Decomposition},\nauthor={Nishant Yadav and Nicholas Monath and Manzil Zaheer and Andrew McCallum},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Upk6WrdJYM}\n}", "github": "", "project": "", "reviewers": "sVVR;6YK8;zs87", "site": "https://openreview.net/forum?id=Upk6WrdJYM", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "4;3;3", "reproducibility": "4;3;4", "correctness": "3;2;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-5135-2423;;0009-0004-5487-2848", "linkedin": ";nicholas-monath-8627581aa/;;andrew-mccallum-a412", "aff_unique_index": "0;1;1;0", "aff_unique_norm": "University of Massachusetts Amherst;Google", "aff_unique_dep": "Department of Computer Science;Google", "aff_unique_url": "https://www.umass.edu;https://www.google.com", "aff_unique_abbr": "UMass Amherst;Google", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Amherst;Mountain View;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "Uuqv7iSNif", "title": "New Datasets and Controllable Iterative Data Augmentation Method for Code-switching ASR Error Correction", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "With the wide use of automatic speech recognition(ASR) systems, researchers pay more attention to the ASR error correction task to improve the quality of recognition results. In particular, ASR in bilingual or multilingual settings, namely code-switching ASR, has greater challenges and research value. In this paper, we first present code-switching ASR correction datasets obtained from solid ASR systems and automatic annotators. The datasets contain Chinese-English code-switching dialogues of bilingual speakers in Singapore, Malaysia, and Hong Kong. Based on this task, we propose a controllable iterative (CI) data augmentation method for improving the performance of mainstream ASR error correction systems. With a small amount of training data, our proposed method has the ability to iteratively produce abundant pseudo parallel data from the monolingual corpus for Chinese-English code-switching ASR correction. Results of experiments show that our method achieves the best performance compared with the rule-based, back-translation-based data augmentation methods and large language model ChatGPT.", "keywords": "ASR Error Correction;Code Switching;Data Augmentation", "primary_area": "", "supplementary_material": "", "author": "Zhaohong Wan;Xiaojun Wan;Wei Peng;Rongjun Li", "authorids": "~Zhaohong_Wan1;~Xiaojun_Wan1;~Wei_Peng6;~Rongjun_Li1", "gender": "M;M;M;M", "homepage": "https://wanxiaojun.github.io;https://www.rmit.edu.au/profiles/p/wei-peng3;;", "dblp": "07/1521;;51/4069;", "google_scholar": "lTTeBdkAAAAJ;;;https://scholar.google.com/citations?hl=en", "or_profile": "~Xiaojun_Wan1;~Wei_Peng6;~Rongjun_Li1;~Wan_Zhaohong1", "aff": "Peking University;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Peking University", "aff_domain": "pku.edu.cn;huawei.com;huawei.com;pku.edu.cn", "position": "Full Professor;Principal Researcher;Researcher;PhD student", "bibtex": "@inproceedings{\nwan2023new,\ntitle={New Datasets and Controllable Iterative Data Augmentation Method for Code-switching {ASR} Error Correction},\nauthor={Zhaohong Wan and Xiaojun Wan and Wei Peng and Rongjun Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Uuqv7iSNif}\n}", "github": "", "project": "", "reviewers": "XFMs;BZT9;6CD8", "site": "https://openreview.net/forum?id=Uuqv7iSNif", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "3;3;3", "reproducibility": "3;3;3", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";wei-peng-phd-in-ai-4515ba22/?originalSubdomain=au;rongjun-li-98b37363/;", "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Peking University;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "http://www.pku.edu.cn;https://www.huawei.com", "aff_unique_abbr": "Peking U;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "UxdVVhWVq2", "title": "Knowledge-Selective Pretraining for Attribute Value Extraction", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Attribute Value Extraction (AVE) aims to retrieve the values of attributes from the product profiles. The state-of-the-art methods tackle the AVE task through a question-answering (QA) paradigm, where the value is predicted from the context (i.e. product profile) given a query (i.e. attributes). Despite of the substantial advancements that have been made, the performance of existing methods on rare attributes is still far from satisfaction, and they cannot be easily extended to unseen attributes due to the poor generalization ability. In this work, we propose to leverage pretraining and transfer learning to address the aforementioned weaknesses. We first collect the product information from various E-commerce stores and retrieve a large number of (profile, attribute, value) triples, which will be used as the pretraining corpus. To more effectively utilize the retrieved corpus, we further design a Knowledge-Selective Framework (KSelF) based on query expansion that can be closely combined with the pretraining corpus to boost the performance. Meanwhile, considering the public AE-pub dataset contains considerable noise, we construct and contribute a larger benchmark EC-AVE collected from E-commerce websites. We conduct evaluation on both of these datasets. The experimental results demonstrate that our proposed KSelF achieves new state-of-the-art performance without pretraining. When incorporated with the pretraining corpus, the performance of KSelF can be further improved, particularly on the attributes with limited training resources.", "keywords": "attribute value extraction;pretraining", "primary_area": "", "supplementary_material": "", "author": "Hui Liu;Qingyu Yin;Zhengyang Wang;Chenwei Zhang;Haoming Jiang;Yifan Gao;Zheng Li;Xian Li;Chao Zhang;Bing Yin;William Yang Wang;Xiaodan Zhu", "authorids": "~Hui_Liu3;~Qingyu_Yin2;~Zhengyang_Wang1;~Chenwei_Zhang1;~Haoming_Jiang1;~Yifan_Gao1;~Zheng_Li9;~Xian_Li3;~Chao_Zhang15;~Bing_Yin1;~William_Yang_Wang2;~Xiaodan_Zhu1", "gender": ";M;M;M;M;Not Specified;F;;M;M;M;M", "homepage": "https://layneins.github.io;;;https://www.cwzhang.com;https://hmjianggatech.github.io;http://yifan-gao.github.io;https://scholar.google.com/citations?user=6-Xx0IoAAAAJ&hl=en;http://chaozhang.org/;;http://www.xiaodanzhu.com;https://www.cs.ucsb.edu/~william/;https://hsqmlzno1.github.io/", "dblp": "93/4010-33.html;179/2542;;133/3207;230/3684;79/3190-1;;94/3019-14;;93/310.html;08/9282;10/1143-18", "google_scholar": "brfcskMAAAAJ;P-mBKNYAAAAJ;A4fNBtEAAAAJ;u_bIiBQAAAAJ;XaFhuG8AAAAJ;https://scholar.google.com.hk/citations?user=erdMFJwAAAAJ;6-Xx0IoAAAAJ;https://scholar.google.com/citations?hl=en;qSOxydEAAAAJ;https://scholar.google.ca/citations?user=a6MYnuUAAAAJ;gf8Ms_8AAAAJ;https://scholar.google.com.hk/citations?user=P6fwn4AAAAAJ", "or_profile": "~Hui_Liu3;~Qingyu_Yin2;~Zhengyang_Wang1;~Chenwei_Zhang1;~Haoming_Jiang1;~Yifan_Gao1;~Xian_Li3;~Chao_Zhang15;~Bing_Yin1;~Xiaodan_Zhu1;~William_Wang1;~zheng_li4", "aff": "Queens University;Amazon;Amazon;Amazon;Amazon;Amazon;Amazon;Georgia Institute of Technology;Amazon;Queen's University;UC Santa Barbara;Amazon", "aff_domain": "queensu.ca;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;gatech.edu;amazon.com;queensu.ca;ucsb.edu;amazon.com", "position": "PhD student;Researcher;Researcher;Researcher;Principal Researcher;Researcher;Applied Scientist;Assistant Professor;Senior Science Manager;Associate Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nliu2023knowledgeselective,\ntitle={Knowledge-Selective Pretraining for Attribute Value Extraction},\nauthor={Hui Liu and Qingyu Yin and Zhengyang Wang and Chenwei Zhang and Haoming Jiang and Yifan Gao and Zheng Li and Xian Li and Chao Zhang and Bing Yin and William Yang Wang and Xiaodan Zhu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=UxdVVhWVq2}\n}", "github": "", "project": "", "reviewers": "Z4E8;a4W9;YCV3", "site": "https://openreview.net/forum?id=UxdVVhWVq2", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;5;4", "excitement": "4;3;2", "reproducibility": "4;5;3", "correctness": "4;4;4", "rating_avg": 2.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 12, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-5146-2884;;;;;0000-0003-3009-598X;0000-0002-5890-0031;0000-0003-3856-3696;;", "linkedin": ";;;;;yi-fan-gao/;xianl/;;bingyin;xiaodan-zhu-066833101/?originalSubdomain=ca;;", "aff_unique_index": "0;1;1;1;1;1;1;2;1;3;4;1", "aff_unique_norm": "Queens University;Amazon;Georgia Institute of Technology;Queen's University;University of California, Santa Barbara", "aff_unique_dep": ";Amazon.com, Inc.;;;", "aff_unique_url": "https://www.queensu.ca;https://www.amazon.com;https://www.gatech.edu;https://www.queensu.ca;https://www.ucsb.edu", "aff_unique_abbr": "Queen's U;Amazon;Georgia Tech;Queen's;UCSB", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Barbara", "aff_country_unique_index": "0;1;1;1;1;1;1;1;1;0;1;1", "aff_country_unique": "Canada;United States" }, { "id": "UyLaqZ6PHA", "title": "How Do Large Language Models Capture the Ever-changing World Knowledge? A Review of Recent Advances", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Although large language models (LLMs) are impressive in solving various tasks, they can quickly be outdated after deployment. Maintaining their up-to-date status is a pressing concern in the current era. This paper provides a comprehensive review of recent advances in aligning deployed LLMs with the ever-changing world knowledge. We categorize research works systemically and provide in-depth comparisons and discussions. We also discuss existing challenges and highlight future directions to facilitate research in this field.", "keywords": "large language models;survey;knowledge", "primary_area": "", "supplementary_material": "", "author": "Zihan Zhang;Meng Fang;Ling Chen;Mohammad Reza Namazi Rad;Jun Wang", "authorids": "~Zihan_Zhang3;~Meng_Fang1;~Ling_Chen5;~Mohammad_Reza_Namazi_Rad2;~Jun_Wang2", "gender": "M;M;F;M;M", "homepage": "https://zhangzihangit.github.io/;;https://profiles.uts.edu.au/Ling.Chen;https://www.linkedin.com/in/mo-namazi/;http://www0.cs.ucl.ac.uk/staff/jun.wang/", "dblp": ";67/463;17/1237-6;;w/JunWang12", "google_scholar": "https://scholar.google.com.au/citations?hl=en;IcNYP1oAAAAJ;https://scholar.google.com.au/citations?user=L5aYWQcAAAAJ;https://scholar.google.com.au/citations?user=uoGBVTYAAAAJ;https://scholar.google.co.uk/citations?user=wIE1tY4AAAAJ", "or_profile": "~Zihan_Zhang3;~Meng_Fang1;~Ling_Chen5;~Mohammad_Reza_Namazi_Rad2;~Jun_Wang2", "aff": "University of Technology Sydney;Eindhoven University of Technology;University of Technology Sydney;;University College London", "aff_domain": "uts.edu.au;tue.nl;uts.edu.au;;ucl.ac.uk", "position": "PhD student;Assistant Professor;Full Professor;;Professor", "bibtex": "@inproceedings{\nzhang2023how,\ntitle={How Do Large Language Models Capture the Ever-changing World Knowledge? A Review of Recent Advances},\nauthor={Zihan Zhang and Meng Fang and Ling Chen and Mohammad Reza Namazi Rad and Jun Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=UyLaqZ6PHA}\n}", "github": "", "project": "", "reviewers": "CDPY;PyMv;j2Rw", "site": "https://openreview.net/forum?id=UyLaqZ6PHA", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;1", "excitement": "4;4;4", "reproducibility": "", "correctness": "3;4;3", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-6468-5729;;", "linkedin": "zihan-zhang-a40855172/;;;;", "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of Technology Sydney;Eindhoven University of Technology;University College London", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uts.edu.au;https://www.tue.nl;https://www.ucl.ac.uk", "aff_unique_abbr": "UTS;TU/e;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2", "aff_country_unique": "Australia;Netherlands;United Kingdom" }, { "id": "V3O0NNaPNW", "title": "Speaking Style Conversion in the Waveform Domain Using Discrete Self-Supervised Units", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We introduce DISSC, a novel, lightweight method that converts the rhythm, pitch contour and timbre of a recording to a target speaker in a textless manner. Unlike DISSC, most voice conversion (VC) methods focus primarily on timbre, and ignore people's unique speaking style (prosody). The proposed approach uses a pretrained, self-supervised model for encoding speech to discrete units, which makes it simple, effective, and fast to train. All conversion modules are only trained on reconstruction like tasks, thus suitable for any-to-many VC with no paired data. We introduce a suite of quantitative and qualitative evaluation metrics for this setup, and empirically demonstrate that DISSC significantly outperforms the evaluated baselines. Code and samples are available at https://pages.cs.huji.ac.il/adiyoss-lab/dissc/.", "keywords": "Voice Conversion;Audio Processing;Self-Supervised Learning", "primary_area": "", "supplementary_material": "", "author": "Gallil Maimon;Yossi Adi", "authorids": "~Gallil_Maimon1;~Yossi_Adi1", "gender": ";M", "homepage": "https://pages.cs.huji.ac.il/gallilmaimon/;http://adiyoss.github.io/", "dblp": "322/9010;171/0957.html", "google_scholar": "x9iloggAAAAJ;https://scholar.google.co.il/citations?user=4W-HuYYAAAAJ", "or_profile": "~Gallil_Maimon1;~Yossi_Adi1", "aff": "Hebrew University of Jerusalem;Meta", "aff_domain": "huji.ac.il;meta.com", "position": "PhD student;Research Scientist", "bibtex": "@inproceedings{\nmaimon2023speaking,\ntitle={Speaking Style Conversion in the Waveform Domain Using Discrete Self-Supervised Units},\nauthor={Gallil Maimon and Yossi Adi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=V3O0NNaPNW}\n}", "github": "", "project": "", "reviewers": "GxPi;RPWj;rc2g", "site": "https://openreview.net/forum?id=V3O0NNaPNW", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;5", "excitement": "4;4;1", "reproducibility": "4;4;0", "correctness": "4;4;1", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-6400-3954;0000-0003-2237-3898", "linkedin": "gallil-maimon-969143255;yossi-adi-31a32858?trk=nav_responsive_tab_profile_pic", "aff_unique_index": "0;1", "aff_unique_norm": "Hebrew University of Jerusalem;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.huji.ac.il;https://meta.com", "aff_unique_abbr": "HUJI;Meta", "aff_campus_unique_index": "0", "aff_campus_unique": "Jerusalem;", "aff_country_unique_index": "0;1", "aff_country_unique": "Israel;United States" }, { "id": "V49Jx2Lj04", "title": "IfQA: A Dataset for Open-domain Question Answering under Counterfactual Presuppositions", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Although counterfactual reasoning is a fundamental aspect of intelligence, the lack of large-scale counterfactual open-domain question-answering (QA) benchmarks makes it difficult to evaluate and improve models on this ability. To address this void, we introduce the first such dataset, named IfQA, where each question is based on a counterfactual presupposition via an ``if'' clause. Such questions require models to go beyond retrieving direct factual knowledge from the Web: they must identify the right information to retrieve and reason about an imagined situation that may even go against the facts built into their parameters. The IfQA dataset contains 3,800 questions that were annotated by crowdworkers on relevant Wikipedia passages. Empirical analysis reveals that the IfQA dataset is highly challenging for existing open-domain QA methods, including supervised retrieve-then-read pipeline methods (F1 score 44.5), as well as recent few-shot approaches such as chain-of-thought prompting with ChatGPT (F1 score 57.2). We hope the unique challenges posed by IfQA will push open-domain QA research on both retrieval and reasoning fronts, while also helping endow counterfactual reasoning abilities to today's language understanding models.", "keywords": "open-domain question answering;counterfactual reasoning", "primary_area": "", "supplementary_material": "", "author": "Wenhao Yu;Meng Jiang;Peter Clark;Ashish Sabharwal", "authorids": "~Wenhao_Yu2;~Meng_Jiang3;~Peter_Clark1;~Ashish_Sabharwal1", "gender": "M;M;M;M", "homepage": "https://wyu97.github.io/;http://www.meng-jiang.com/;https://allenai.org/team/peterc;", "dblp": "159/8117-2.html;69/339-1;34/1184;13/154", "google_scholar": "z4qSdX8AAAAJ;LZIPfCkAAAAJ;o-5vyEsAAAAJ;7VspfeAAAAAJ", "or_profile": "~Wenhao_Yu2;~Meng_Jiang3;~Peter_Clark1;~Ashish_Sabharwal1", "aff": "University of Notre Dame;University of Notre Dame;Allen Institute for Artificial Intelligence;Allen Institute for AI", "aff_domain": "nd.edu;nd.edu;allenai.org;allenai.org", "position": "PhD student;Assistant Professor;Senior Research Manager;Principal Researcher", "bibtex": "@inproceedings{\nyu2023ifqa,\ntitle={If{QA}: A Dataset for Open-domain Question Answering under Counterfactual Presuppositions},\nauthor={Wenhao Yu and Meng Jiang and Peter Clark and Ashish Sabharwal},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=V49Jx2Lj04}\n}", "github": "", "project": "", "reviewers": "sGbp;hauT;8qae", "site": "https://openreview.net/forum?id=V49Jx2Lj04", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;3", "excitement": "3;4;4", "reproducibility": "3;0;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-4075-5980;0000-0002-3009-519X;;", "linkedin": ";meng-jiang-94b10916/;peter-clark-a8b556/;ashish-sabharwal-82a2b661", "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of Notre Dame;Allen Institute for Artificial Intelligence;Allen Institute for AI", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nd.edu;https://allenai.org;https://allenai.org", "aff_unique_abbr": "Notre Dame;AI2;AI2", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "V76kMIJI37", "title": "Retrieval-Generation Alignment for End-to-End Task-Oriented Dialogue System", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Developing an efficient retriever to retrieve knowledge from a large-scale knowledge base (KB) is critical for task-oriented dialogue systems to effectively handle localized and specialized tasks. However, widely used generative models such as T5 and ChatGPT often struggle to differentiate subtle differences among the retrieved KB records when generating responses, resulting in suboptimal quality of generated responses. In this paper, we propose the application of maximal marginal likelihood to train a perceptive retriever by utilizing signals from response generation for supervision. In addition, our approach goes beyond considering solely retrieved entities and incorporates various meta knowledge to guide the generator, thus improving the utilization of knowledge. We evaluate our approach on three task-oriented dialogue datasets using T5 and ChatGPT as the backbone models. The results demonstrate that when combined with meta knowledge, the response generator can effectively leverage high-quality knowledge records from the retriever and enhance the quality of generated responses. The code of this work is available at https://github.com/shenwzh3/MK-TOD.", "keywords": "Knowledge Retrieval;End-to-End Task-Oriented Dialogue System", "primary_area": "", "supplementary_material": "", "author": "Weizhou Shen;Yingqi Gao;Canbin Huang;Fanqi Wan;Xiaojun Quan;Wei Bi", "authorids": "~Weizhou_Shen1;~Yingqi_Gao2;~Canbin_Huang1;~Fanqi_Wan1;~Xiaojun_Quan1;~Wei_Bi1", "gender": "M;M;M;M;M;F", "homepage": ";https://github.com/G17-Ki;https://github.com/OnewayLab;https://fanqiwan.github.io/;https://sites.google.com/site/xiaojunquan/;https://scholar.google.com.hk/citations?hl=en&user=aSJcgQMAAAAJ&view_op=list_works&sortby=pubdate#d=gsc_md_iad&u=%2Fcitations%3Fview_op%3Dimport_lookup%26hl%3Den%26imq%3DWei%2BBi%26json%3D%26btnA%3D1", "dblp": "245/3622;;359/1198;347/8267;90/5936;38/1163", "google_scholar": "387Sg1wAAAAJ;;;AeS1tmEAAAAJ;dRpg4t8AAAAJ;https://scholar.google.com.hk/citations?hl=en", "or_profile": "~Weizhou_Shen1;~Yingqi_Gao2;~Canbin_Huang1;~Fanqi_Wan1;~Xiaojun_Quan1;~Wei_Bi1", "aff": "SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;Hong Kong University of Science and Technology", "aff_domain": "sysu.edu.cn;sysu.edu.cn;sysu.edu.cn;sysu.edu.cn;sysu.edu.cn;ust.hk", "position": "PhD student;Undergrad student;Undergrad student;MS student;Full Professor;PhD student", "bibtex": "@inproceedings{\nshen2023retrievalgeneration,\ntitle={Retrieval-Generation Alignment for End-to-End Task-Oriented Dialogue System},\nauthor={Weizhou Shen and Yingqi Gao and Canbin Huang and Fanqi Wan and Xiaojun Quan and Wei Bi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=V76kMIJI37}\n}", "github": "", "project": "", "reviewers": "ybw6;YzME;b5KD;myc6", "site": "https://openreview.net/forum?id=V76kMIJI37", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;4;3;4", "excitement": "3;3;3;2", "reproducibility": "4;4;3;4", "correctness": "4;4;4;3", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 2.75, "reproducibility_avg": 3.75, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0001-8457-0630", "linkedin": ";;;fanqiwan/;;", "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Sun Yat-sen University;Hong Kong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.sysu.edu.cn;https://www.ust.hk", "aff_unique_abbr": "SYSU;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "V9xsOja2oC", "title": "Homophone Disambiguation Reveals Patterns of Context Mixing in Speech Transformers", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Transformers have become a key architecture in speech processing, but our understanding of how they build up representations of acoustic and linguistic structure is limited. In this study, we address this gap by investigating how measures of 'context-mixing' developed for text models can be adapted and applied to models of spoken language. We identify a linguistic phenomenon that is ideal for such a case study: homophony in French (e.g. livre vs livres), where a speech recognition model has to attend to syntactic cues such as determiners and pronouns in order to disambiguate spoken words with identical pronunciations and transcribe them while respecting grammatical agreement. We perform a series of controlled experiments and probing analyses on Transformer-based speech models. Our findings reveal that representations in encoder-only models effectively incorporate these cues to identify the correct transcription, whereas encoders in encoder-decoder models mainly relegate the task of capturing contextual dependencies to decoder modules.", "keywords": "speech transformers;context mixing;model interpretability for spoken language", "primary_area": "", "supplementary_material": "", "author": "Hosein Mohebbi;Grzegorz Chrupa\u0142a;Willem Zuidema;Afra Alishahi", "authorids": "~Hosein_Mohebbi1;~Grzegorz_Chrupa\u0142a1;~Willem_Zuidema1;~Afra_Alishahi2", "gender": "M;M;M;F", "homepage": "https://hmohebbi.github.io/;http://grzegorz.chrupala.me;https://staff.fnwi.uva.nl/w.zuidema/;http://afra.alishahi.name/", "dblp": "289/6362;19/1379;67/1016;https://dblp.uni-trier.de/pid/69/8699.html", "google_scholar": "tWOBwigAAAAJ;https://scholar.google.nl/citations?user=p6m63xoAAAAJ;MBkG_FYAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Hosein_Mohebbi1;~Grzegorz_Chrupa\u0142a1;~Willem_Zuidema1;~Afra_Alishahi2", "aff": "Tilburg University;Tilburg University;University of Amsterdam;Tilburg University", "aff_domain": "tilburguniversity.edu;tilburguniversity.edu;uva.nl;tilburguniversity.edu", "position": "PhD student;Associate Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nmohebbi2023homophone,\ntitle={Homophone Disambiguation Reveals Patterns of Context Mixing in Speech Transformers},\nauthor={Hosein Mohebbi and Grzegorz Chrupa{\\l}a and Willem Zuidema and Afra Alishahi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=V9xsOja2oC}\n}", "github": "", "project": "", "reviewers": "EiPu;RLmt;AWuy;vPyo", "site": "https://openreview.net/forum?id=V9xsOja2oC", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "3;4;4;4", "excitement": "4;4;4;4", "reproducibility": "4;4;5;4", "correctness": "3;5;5;5", "rating_avg": 5.0, "confidence_avg": 3.75, "excitement_avg": 4.0, "reproducibility_avg": 4.25, "correctness_avg": 4.5, "replies_avg": 13, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-8184-7825;0000-0001-9498-6912;0000-0002-2362-5447;", "linkedin": "hosein-mohebbi/;;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Tilburg University;University of Amsterdam", "aff_unique_dep": ";", "aff_unique_url": "https://www.tilburguniversity.edu/;https://www.uva.nl", "aff_unique_abbr": "Tilburg U;UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Netherlands" }, { "id": "VC2vPPetCU", "title": "Open-ended Commonsense Reasoning with Unrestricted Answer Candidates", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Open-ended Commonsense Reasoning is defined as solving a commonsense question without providing 1) a short list of answer candidates and 2) a pre-defined answer scope. Conventional ways of formulating the commonsense question into a question-answering form or utilizing external knowledge to learn retrieval-based methods are less applicable in the open-ended setting due to an inherent challenge. Without pre-defining an answer scope or a few candidates, open-ended commonsense reasoning entails predicting answers by searching over an extremely large searching space. Moreover, most questions require implicit multi-hop reasoning, which presents even more challenges to our problem. In this work, we leverage pre-trained language models to iteratively retrieve reasoning paths on the external knowledge base, which does not require task-specific supervision. The reasoning paths can help to identify the most precise answer to the commonsense question. We conduct experiments on two commonsense benchmark datasets. Compared to other approaches, our proposed method achieves better performance both quantitatively and qualitatively.", "keywords": "Commonsense Reasoning;Question Answering", "primary_area": "", "supplementary_material": "", "author": "Chen Ling;Xuchao Zhang;Xujiang Zhao;Yanchi Liu;Wei Cheng;Mika Oishi;Takao Osaki;Katsushi Matsuda;Haifeng Chen;Liang Zhao", "authorids": "~Chen_Ling3;~Xuchao_Zhang1;~Xujiang_Zhao1;~Yanchi_Liu1;~Wei_Cheng1;~Mika_Oishi1;~Takao_Osaki1;~Katsushi_Matsuda1;~Haifeng_Chen1;~Liang_Zhao6", "gender": ";M;M;M;F;M;M;;M;M", "homepage": "https://xuczhang.github.io/;https://zxj32.github.io/;;https://chengw07.github.io/;https://www.instagram.com/2mika4/?hl=en;https://www.researchgate.net/scientific-contributions/Takao-Osaki-2171202477;https://ieeexplore.ieee.org/author/37088216732;https://haifengchen.gitlab.io/intro/;https://cs.emory.edu/~lzhao41/;https://lingchen0331.github.io/", "dblp": ";221/5767;62/8146;89/2506-2.html;;;;08/57-1.html;63/5422-2;", "google_scholar": ";k2-JcFAAAAAJ;faLmr-YAAAAJ;PRrGVmoAAAAJ;;;;QzakB68AAAAJ;qnvyqtwAAAAJ;275NKcEAAAAJ", "or_profile": "~Xuchao_Zhang1;~Xujiang_Zhao1;~Yanchi_Liu1;~Wei_Cheng1;~Mika_Oishi1;~Takao_Osaki1;~Katsushi_Matsuda1;~Haifeng_Chen1;~Liang_Zhao6;~Chen_LING2", "aff": "Microsoft;NEC Labs America;NEC-Labs;NEC-Labs;NEC;NEC;NEC;NEC-Labs;Emory University;Emory University", "aff_domain": "microsoft.com;nec-labs.com;nec-labs.com;nec-labs.com;nec.com;nec.com;nec.com;nec-labs.com;emory.edu;emory.edu", "position": "Researcher;Researcher;Researcher;Principal Researcher;Researcher;Researcher;Researcher;Researcher;Associate Professor;PhD student", "bibtex": "@inproceedings{\nling2023openended,\ntitle={Open-ended Commonsense Reasoning with Unrestricted Answer Candidates},\nauthor={Chen Ling and Xuchao Zhang and Xujiang Zhao and Yanchi Liu and Wei Cheng and Mika Oishi and Takao Osaki and Katsushi Matsuda and Haifeng Chen and Liang Zhao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=VC2vPPetCU}\n}", "github": "", "project": "", "reviewers": "UBfG;keAw;R9n2", "site": "https://openreview.net/forum?id=VC2vPPetCU", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "3;4;3", "reproducibility": "4;4;3", "correctness": "4;3;4", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 10, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;;0000-0002-2648-9989;0000-0001-8044-6026", "linkedin": ";zxj32/;;wei-cheng-ml/;;;;;;", "aff_unique_index": "0;1;2;2;3;3;3;2;4;4", "aff_unique_norm": "Microsoft;NEC Labs America;NEC Laboratories;NEC Corporation;Emory University", "aff_unique_dep": "Microsoft Corporation;;;;", "aff_unique_url": "https://www.microsoft.com;https://www.nec-labs.com;https://www.nec-labs.com;https://www.nec.com;https://www.emory.edu", "aff_unique_abbr": "Microsoft;NEC LA;NEC-Labs;NEC;Emory", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;1;1;0;0;0", "aff_country_unique": "United States;Japan" }, { "id": "VCyOXC8RfQ", "title": "Measuring bias in Instruction-Following models with P-AT", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Instruction-Following Language Models (IFLMs) are promising and versatile tools for solving many downstream, information-seeking tasks. Given their success, there is an urgent need to have a shared resource to determine whether existing and new IFLMs are prone to produce biased language interactions. In this paper, we propose Prompt Association Test (P-AT): a new resource for testing the presence of social biases in IFLMs. \nP-AT stems from WEAT (Caliskan et al., 2017) and generalizes the notion of measuring social biases to IFLMs. Basically, we cast WEAT word tests in promptized classification tasks, and we associate a metric - the bias score. Our resource consists of 2310 prompts. We then experimented with several families of IFLMs discovering gender and race biases in all the analyzed models.\nWe expect P-AT to be an important tool for quantifying bias across different dimensions and, therefore, for encouraging the creation of fairer IFLMs before their distortions have consequences in the real world.", "keywords": "Large Language Models;Instruction-Following;Bias", "primary_area": "", "supplementary_material": "", "author": "Dario Onorati;Elena Sofia Ruzzetti;Davide Venditti;Leonardo Ranaldi;Fabio Massimo Zanzotto", "authorids": "~Dario_Onorati1;~Elena_Sofia_Ruzzetti1;~Davide_Venditti1;~Leonardo_Ranaldi1;~Fabio_Massimo_Zanzotto1", "gender": "M;F;M;M;M", "homepage": "https://phd.uniroma1.it/web/DARIO-ONORATI_nP2000368_EN.aspx;;;;http://art.uniroma2.it/zanzotto", "dblp": ";302/4055;;278/7831;32/797", "google_scholar": ";XRi2_woAAAAJ;;https://scholar.google.com/citations?hl=ien;https://scholar.google.it/citations?user=azv7Qr4AAAAJ", "or_profile": "~Dario_Onorati1;~Elena_Sofia_Ruzzetti1;~Davide_Venditti1;~Leonardo_Ranaldi1;~Fabio_Massimo_Zanzotto1", "aff": "\"La Sapienza\" University of Rome;Universit\u00e0 degli Studi di Roma Tor Vergata;Universit\u00e0 degli studi Roma Tor Vergata;Universit\u00e0 degli studi Roma Tor Vergata;University of Rome Tor Vergata", "aff_domain": "uniroma1.it;uniroma2.it;uniroma2.it;uniroma2.it;uniroma2.it", "position": "PhD student;PhD student;PhD student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nonorati2023measuring,\ntitle={Measuring bias in Instruction-Following models with P-{AT}},\nauthor={Dario Onorati and Elena Sofia Ruzzetti and Davide Venditti and Leonardo Ranaldi and Fabio Massimo Zanzotto},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=VCyOXC8RfQ}\n}", "github": "", "project": "", "reviewers": "Ckmh;egsY;UNmW", "site": "https://openreview.net/forum?id=VCyOXC8RfQ", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;4", "excitement": "3;2;4", "reproducibility": "3;4;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-8488-4146;0000-0002-7301-3596", "linkedin": "https://it.linkedin.com/in/dario-onorati-63b52bb2;;davide-venditti-b163a9201/;;fabio-massimo-zanzotto-b027831/", "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "La Sapienza University of Rome;Universit\u00e0 degli Studi di Roma Tor Vergata;University of Rome Tor Vergata", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uniroma1.it;https://www.uniroma2.it;https://www.uniroma2.it", "aff_unique_abbr": "Sapienza;Uniroma2;UniRoma2", "aff_campus_unique_index": "0;1;1;1;1", "aff_campus_unique": "Rome;Tor Vergata", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Italy" }, { "id": "VGb2RhMFAI", "title": "Air-Decoding: Attribute Distribution Reconstruction for Decoding-Time Controllable Text Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Controllable text generation (CTG) aims to generate text with desired attributes, and decoding-time-based methods have shown promising performance on this task. However, in this paper, we identify the phenomenon of Attribute Collapse for the first time. It causes the fluency of generated text to rapidly decrease when the control strength exceeds a critical value, rendering the text completely unusable. This limitation hinders the effectiveness of decoding methods in achieving high levels of controllability. To address this problem, we propose a novel lightweight decoding framework named Air-Decoding. Its main idea is reconstructing the attribute distributions to balance the weights between attribute words and non-attribute words to generate more fluent text. Specifically, we train prefixes by prefix-tuning to obtain attribute distributions. Then we design a novel attribute distribution reconstruction method to balance the obtained distributions and use the reconstructed distributions to guide language models for generation, effectively avoiding the issue of Attribute Collapse. Experiments on multiple CTG tasks prove that our method achieves a new state-of-the-art control performance.", "keywords": "controllable;text generation;decoding-time", "primary_area": "", "supplementary_material": "", "author": "Tianqi Zhong;Quan Wang;Jingxuan Han;Yongdong Zhang;Zhendong Mao", "authorids": "~Tianqi_Zhong2;~Quan_Wang7;~Jingxuan_Han1;~Yongdong_Zhang2;~Zhendong_Mao1", "gender": "M;F;M;M;", "homepage": "https://github.com/R1047;;https://github.com/hjx999222;https://imcc.ustc.edu.cn/_upload/tpl/0d/13/3347/template3347/zhangyongdong.html;", "dblp": ";;;z/YongdongZhang;", "google_scholar": ";l2yEbhAAAAAJ;;https://scholar.google.com.hk/citations?user=hxGs4ukAAAAJ;", "or_profile": "~Tianqi_Zhong2;~Quan_Wang7;~Jingxuan_Han1;~Yongdong_Zhang2;~Zhendong_Mao1", "aff": "University of Science and Technology of China;Beijing University of Posts and Telecommunications;University of Science and Technology of China;University of Science and Technology of China;", "aff_domain": "ustc.edu.cn;bupt.edu.cn;ustc.edu.cn;ustc.edu.cn;", "position": "MS student;Associate Professor;PhD student;Full Professor;", "bibtex": "@inproceedings{\nzhong2023airdecoding,\ntitle={Air-Decoding: Attribute Distribution Reconstruction for Decoding-Time Controllable Text Generation},\nauthor={Tianqi Zhong and Quan Wang and Jingxuan Han and Yongdong Zhang and Zhendong Mao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=VGb2RhMFAI}\n}", "github": "", "project": "", "reviewers": "pdMZ;Afd2;cf4V;yX2p", "site": "https://openreview.net/forum?id=VGb2RhMFAI", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;5;4;3", "excitement": "4;3;4;3", "reproducibility": "4;5;4;5", "correctness": "4;4;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.5, "reproducibility_avg": 4.5, "correctness_avg": 3.75, "replies_avg": 13, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-0066-3448;", "linkedin": ";;;;", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Science and Technology of China;Beijing University of Posts and Telecommunications", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;http://www.bupt.edu.cn/", "aff_unique_abbr": "USTC;BUPT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "VIDDZO2f0A", "title": "Editing Common Sense in Transformers", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Editing model parameters directly in Transformers makes updating open-source transformer-based models possible without re-training. However, these editing methods have only been evaluated on statements about encyclopedic knowledge with a single correct answer. \nCommonsense knowledge with multiple correct answers, e.g., an apple can be green or red but not transparent, has not been studied but is as essential for enhancing transformers' reliability and usefulness. In this paper, we investigate whether commonsense judgments are causally associated with localized, editable parameters in Transformers, and we provide an affirmative answer. We find that directly applying the MEMIT editing algorithm results in sub-par performance and improve it for the commonsense domain by varying edit tokens and improving the layer selection strategy, i.e., $MEMIT_{CSK}$. GPT-2 Large and XL models edited using $MEMIT_{CSK}$ outperform best-fine-tuned baselines by 10.97% and 10.73% F1 scores on PEP3k and 20Q datasets. In addition, we propose a novel evaluation dataset, $PROBE\\ SET$, that contains unaffected and affected neighborhoods, affected paraphrases, and affected reasoning challenges. $MEMIT_{CSK}$ performs well across the metrics while fine-tuning baselines show significant trade-offs between unaffected and affected metrics. These results suggest a compelling future direction for incorporating feedback about common sense into Transformers through direct model editing.", "keywords": "Commonsense;Transformers;Language Models;Model Editing;GPT;Plausibility Judgements", "primary_area": "", "supplementary_material": "", "author": "Anshita Gupta;Debanjan Mondal;Akshay Krishna Sheshadri;Wenlong Zhao;Xiang Lorraine Li;Sarah Wiegreffe;Niket Tandon", "authorids": "~Anshita_Gupta1;~Debanjan_Mondal2;~Akshay_Krishna_Sheshadri1;~Wenlong_Zhao1;~Xiang_Lorraine_Li1;~Sarah_Wiegreffe1;~Niket_Tandon2", "gender": "F;M;M;;;;M", "homepage": ";;;;;;https://niket.tandon.info", "dblp": "239/7181;;283/5432;03/4555-1;;;29/9923", "google_scholar": ";F7j71ZsAAAAJ;4T5db2gAAAAJ;i0lW2EAAAAAJ;;;9uWuZkUAAAAJ", "or_profile": "~Anshita_Gupta1;~Debanjan_Mondal2;~Akshay_Krishna_Sheshadri1;~Wenlong_Zhao1;~Xiang_Lorraine_Li1;~Sarah_Wiegreffe1;~Niket_Tandon2", "aff": "Department of Computer Science, University of Massachusetts at Amherst;Department of Computer Science, University of Massachusetts at Amherst;Department of Computer Science, University of Massachusetts at Amherst;University of Massachusetts at Amherst;;;Allen Institute for Artificial Intelligence", "aff_domain": "cs.umass.edu;cs.umass.edu;cs.umass.edu;cs.umass.edu;;;allenai.org", "position": "MS student;MS student;MS student;PhD student;;;Researcher", "bibtex": "@inproceedings{\ngupta2023editing,\ntitle={Editing Common Sense in Transformers},\nauthor={Anshita Gupta and Debanjan Mondal and Akshay Krishna Sheshadri and Wenlong Zhao and Xiang Lorraine Li and Sarah Wiegreffe and Niket Tandon},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=VIDDZO2f0A}\n}", "github": "", "project": "", "reviewers": "4FU4;bRLr;wc3N", "site": "https://openreview.net/forum?id=VIDDZO2f0A", "pdf_size": 0, "rating": "5;5;5", "confidence": "2;4;5", "excitement": "4;3;2", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-3253-7705;;;;;;", "linkedin": ";demon702/;akshay-krishna-sheshadri-23b405181/;wenlong-zhao/;;;", "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "University of Massachusetts Amherst;Allen Institute for Artificial Intelligence", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.umass.edu;https://allenai.org", "aff_unique_abbr": "UMass Amherst;AI2", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Amherst;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "VJenYElbmY", "title": "Addressing Linguistic Bias through a Contrastive Analysis of Academic Writing in the NLP Domain", "track": "main", "status": "Long Main", "tldr": "", "abstract": "It has been well documented that a reviewer\u2019s opinion of the nativeness of expression in an academic paper affects the likelihood of it being accepted for publication. Previous works have also shone a light on the stress and anxiety authors who are non-native English speakers experience when attempting to publish in international venues. We explore how this might be a concern in the field of Natural Language Processing (NLP) through conducting a comprehensive statistical analysis of NLP paper abstracts, identifying how authors of different linguistic backgrounds differ in the lexical, morphological, syntactic and cohesive aspects of their writing. Through our analysis, we identify that there are a number of characteristics that are highly variable across the different corpora examined in this paper. This indicates potential for the presence of linguistic bias. Therefore, we outline a set of recommendations to publishers of academic journals and conferences regarding their guidelines and resources for prospective authors in order to help enhance inclusivity and fairness.", "keywords": "contrastive analysis;linguistic bias;lexis;morphology;syntax;cohesion", "primary_area": "", "supplementary_material": "", "author": "Robert Ridley;Zhen Wu;Jianbing Zhang;Shujian Huang;Xinyu Dai", "authorids": "~Robert_Ridley1;~Zhen_Wu2;~Jianbing_Zhang1;~Shujian_Huang1;~Xinyu_Dai1", "gender": "M;M;M;M;M", "homepage": ";https://wuzhen247.github.io/;https://cs.nju.edu.cn/zhangjb/;http://nlp.nju.edu.cn/huangsj/;http://cs.nju.edu.cn/daixinyu", "dblp": ";16/4485-2;11/6084;57/8451;39/5815", "google_scholar": "-0Rh_sgAAAAJ;IoGlgtoAAAAJ;;HF3-E9kAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Robert_Ridley1;~Zhen_Wu2;~Jianbing_Zhang1;~Shujian_Huang1;~Xinyu_Dai1", "aff": "Nanjing University;Nanjing University;Nanjing University;Nanjing University;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn;nju.edu.cn", "position": "PhD student;Researcher;Associate Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nridley2023addressing,\ntitle={Addressing Linguistic Bias through a Contrastive Analysis of Academic Writing in the {NLP} Domain},\nauthor={Robert Ridley and Zhen Wu and Jianbing Zhang and Shujian Huang and Xinyu Dai},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=VJenYElbmY}\n}", "github": "", "project": "", "reviewers": "5giR;QejG;Nr5L", "site": "https://openreview.net/forum?id=VJenYElbmY", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;4;4", "reproducibility": "4;5;3", "correctness": "2;5;3", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-7678-103X;;;", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Nanjing University", "aff_unique_dep": "", "aff_unique_url": "https://www.nju.edu.cn", "aff_unique_abbr": "Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "VKHWtusV6H", "title": "DSI++: Updating Transformer Memory with New Documents", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Differentiable Search Indices (DSIs) encode a corpus of documents in the parameters of a model and use the same model to map queries directly to relevant document identifiers. Despite the solid performance of DSI models, successfully deploying them in scenarios where document corpora change with time is an open problem. In this work, we introduce DSI++, a continual learning challenge for DSI with the goal of continuously indexing new documents while being able to answer queries related to both previously and newly indexed documents. Across different model scales and document identifier representations, we show that continual indexing of new documents leads to considerable forgetting of previously indexed documents. We also hypothesize and verify that the model experiences forgetting events during training, leading to unstable learning. To mitigate these issues, we investigate two approaches. The first focuses on modifying the training dynamics. Flatter minima implicitly alleviates forgetting, so we explicitly optimize for flatter loss basins and show that the model stably memorizes more documents (+12\\%).\nNext, we introduce a parametric memory to generate pseudo-queries for documents and supplement them during incremental indexing to prevent forgetting for the retrieval task. Extensive experiments on a novel continual indexing benchmark based on Natural Questions demonstrate that our proposed solution mitigates the forgetting in DSI++ by a significant margin and improves the average Hits@10 by +21.1\\% over competitive baselines.", "keywords": "Differentiable Search Index;Transformer Memory;Catastrophic Forgetting;Continual Learning;Lifelong Learning;Semi-Supervised Learning", "primary_area": "", "supplementary_material": "", "author": "Sanket Vaibhav Mehta;Jai Gupta;Yi Tay;Mostafa Dehghani;Vinh Q. Tran;Jinfeng Rao;Marc Najork;Emma Strubell;Donald Metzler", "authorids": "~Sanket_Vaibhav_Mehta2;~Jai_Gupta1;~Yi_Tay1;~Mostafa_Dehghani1;~Vinh_Q._Tran1;~Jinfeng_Rao2;~Marc_Najork1;~Emma_Strubell1;~Donald_Metzler1", "gender": "M;M;M;M;M;;M;Non-Binary;M", "homepage": "https://sanketvmehta.github.io;;http://yitay.net;http://mostafadehghani.com/;https://vqtran.github.io;;http://marc.najork.org/;http://strubell.github.io;https://research.google/people/DonaldMetzler/", "dblp": "225/7804;154/6787-1;;125/4062;77/2885-2.html;;n/MarcNajork;153/2253;95/2272", "google_scholar": "H4pn-ogAAAAJ;;VBclY_cAAAAJ;https://scholar.google.nl/citations?user=MiHOX3QAAAAJ;ot3WsOwAAAAJ;;7HeAnjwAAAAJ;UCDMtM0AAAAJ;bmXpOd8AAAAJ", "or_profile": "~Sanket_Vaibhav_Mehta2;~Jai_Gupta1;~Yi_Tay1;~Mostafa_Dehghani1;~Vinh_Q._Tran1;~Jinfeng_Rao2;~Marc_Najork1;~Emma_Strubell1;~Donald_Metzler1", "aff": "Carnegie Mellon University;Google Inc;Google;Google DeepMind;Google;;Google Research;Allen Institute for Artificial Intelligence;Google", "aff_domain": "cmu.edu;google.com;google.com;google.com;google.com;;google.com;allenai.org;google.com", "position": "PhD student;Researcher;Research Scientist;Research Scientist;Researcher;;Director, Research Engineering;Visiting Researcher;Research Scientist", "bibtex": "@inproceedings{\nmehta2023dsi,\ntitle={{DSI}++: Updating Transformer Memory with New Documents},\nauthor={Sanket Vaibhav Mehta and Jai Gupta and Yi Tay and Mostafa Dehghani and Vinh Q. Tran and Jinfeng Rao and Marc Najork and Emma Strubell and Donald Metzler},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=VKHWtusV6H}\n}", "github": "", "project": "", "reviewers": "Gobp;mZs9;fVVm", "site": "https://openreview.net/forum?id=VKHWtusV6H", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;4", "excitement": "4;3;3", "reproducibility": "3;4;3", "correctness": "3;3;3", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-1809-4685;;;;;;0000-0003-1423-0854;;0000-0003-4276-6269", "linkedin": "sanketvmehta/;;;;vinh-tran-32597468/;;najork/;;donmetzler/", "aff_unique_index": "0;1;1;1;1;1;2;1", "aff_unique_norm": "Carnegie Mellon University;Google;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.cmu.edu;https://www.google.com;https://allenai.org", "aff_unique_abbr": "CMU;Google;AI2", "aff_campus_unique_index": "1;1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;1;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "VLrtaSXOWP", "title": "Continual Named Entity Recognition without Catastrophic Forgetting", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Continual Named Entity Recognition (CNER) is a burgeoning area, which involves updating an existing model by incorporating new entity types sequentially. Nevertheless, continual learning approaches are often severely afflicted by catastrophic forgetting. This issue is intensified in CNER due to the consolidation of old entity types from previous steps into the non-entity type at each step, leading to what is known as the semantic shift problem of the non-entity type. In this paper, we introduce a pooled feature distillation loss that skillfully navigates the trade-off between retaining knowledge of old entity types and acquiring new ones, thereby more effectively mitigating the problem of catastrophic forgetting. Additionally, we develop a confidence-based pseudo-labeling for the non-entity type, i.e., predicting entity types using the old model to handle the semantic shift of the non-entity type. Following the pseudo-labeling process, we suggest an adaptive re-weighting type-balanced learning strategy to handle the issue of biased type distribution. We carried out comprehensive experiments on ten CNER settings using three different datasets. The results illustrate that our method significantly outperforms prior state-of-the-art approaches, registering an average improvement of 6.3% and 8.0% in Micro and Macro F1 scores, respectively.", "keywords": "Continual Named Entity Recognition without Catastrophic Forgetting", "primary_area": "", "supplementary_material": "", "author": "Duzhen Zhang;Wei Cong;Jiahua Dong;Yahan Yu;Xiuyi Chen;Yonggang Zhang;Zhen Fang", "authorids": "~Duzhen_Zhang1;~Wei_Cong1;~Jiahua_Dong1;~Yahan_Yu1;~Xiuyi_Chen1;~Yonggang_Zhang1;~Zhen_Fang2", "gender": "M;F;;;M;M;M", "homepage": "https://bladedancer957.github.io/;;;;;https://yonggangzhangben.github.io/index.html;https://fang-zhen.github.io/index.html", "dblp": "235/0398.html;125/1768;;;218/7190;27/6859-3;", "google_scholar": "o0jlAfwAAAAJ;9ldB__MAAAAJ;;QPcH1boAAAAJ;https://scholar.google.com/citations?hl=en;XSbEr98AAAAJ;OzD6WJcAAAAJ", "or_profile": "~Duzhen_Zhang1;~Wei_Cong1;~Jiahua_Dong1;~Yahan_Yu1;~Xiuyi_Chen1;~Yonggang_Zhang1;~Zhen_Fang2", "aff": "Institute of Automation, Chinese Academy of Sciences;Chinese Academy of Sciences, Shenyang Institute of Automation;;Institute of Automation, Chinese Academy of Sciences;Baidu;Hong Kong Baptist University;University of Technology Sydney", "aff_domain": "ia.ac.cn;sia.cn;;ia.ac.cn;baidu.com;hkbu.edu.hk;uts.edu.au", "position": "PhD student;PhD student;;MS student;Researcher;Postdoc;Postdoc", "bibtex": "@inproceedings{\nzhang2023continual,\ntitle={Continual Named Entity Recognition without Catastrophic Forgetting},\nauthor={Duzhen Zhang and Wei Cong and Jiahua Dong and Yahan Yu and Xiuyi Chen and Yonggang Zhang and Zhen Fang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=VLrtaSXOWP}\n}", "github": "", "project": "", "reviewers": "xCyC;nTza;CtRi", "site": "https://openreview.net/forum?id=VLrtaSXOWP", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;3;3", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-4280-431X;;;;;0000-0002-4080-7592;0000-0003-0602-6255", "linkedin": ";;;;;;", "aff_unique_index": "0;0;0;1;2;3", "aff_unique_norm": "Chinese Academy of Sciences;Baidu;Hong Kong Baptist University;University of Technology Sydney", "aff_unique_dep": "Institute of Automation;Baidu, Inc.;;", "aff_unique_url": "http://www.ia.cas.cn;https://www.baidu.com;https://www.hkbu.edu.hk;https://www.uts.edu.au", "aff_unique_abbr": "CAS;Baidu;HKBU;UTS", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Shenyang;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "China;Australia" }, { "id": "VN298kRz91", "title": "Romanization-based Large-scale Adaptation of Multilingual Language Models", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Large multilingual pretrained language models (mPLMs) have become the de facto state of the art for cross-lingual transfer in NLP. However, their large-scale deployment to many languages, besides pretraining data scarcity, is also hindered by the increase in vocabulary size and limitations in their parameter budget. In order to boost the capacity of mPLMs to deal with low-resource and unseen languages, we explore the potential of leveraging transliteration on a massive scale. In particular, we explore the UROMAN transliteration tool, which provides mappings from UTF-8 to Latin characters for all the writing systems, enabling inexpensive romanization for virtually any language. We first focus on establishing how UROMAN compares against other language-specific and manually curated transliterators for adapting multilingual PLMs. We then study and compare a plethora of data- and parameter-efficient strategies for adapting the mPLMs to romanized and non-romanized corpora of 14 diverse low-resource languages. Our results reveal that UROMAN-based transliteration can offer strong performance for many languages, with particular gains achieved in the most challenging setups: on languages with unseen scripts and with limited training data without any vocabulary augmentation. Further analyses reveal that an improved tokenizer based on romanized data can even outperform non-transliteration-based methods in the majority of languages.", "keywords": "Multilinguality;Parameter Efficiency;Transliteration", "primary_area": "", "supplementary_material": "", "author": "Sukannya Purkayastha;Sebastian Ruder;Jonas Pfeiffer;Iryna Gurevych;Ivan Vuli\u0107", "authorids": "~Sukannya_Purkayastha1;~Sebastian_Ruder2;~Jonas_Pfeiffer1;~Iryna_Gurevych1;~Ivan_Vuli\u01071", "gender": "F;;M;;M", "homepage": ";;https://pfeiffer.ai;;https://sites.google.com/site/ivanvulic/", "dblp": "255/8545;;222/9866.html;;77/9768", "google_scholar": "SAhTZJIAAAAJ;;https://scholar.google.com/citations?hl=en;;ZX8js60AAAAJ", "or_profile": "~Sukannya_Purkayastha1;~Sebastian_Ruder2;~Jonas_Pfeiffer1;~Iryna_Gurevych1;~Ivan_Vuli\u01071", "aff": "Technische Universit\u00e4t Darmstadt;;Google DeepMind;;PolyAI Limited", "aff_domain": "tu-darmstadt.de;;google.com;;poly-ai.com", "position": "PhD student;;Researcher;;Senior Scientist", "bibtex": "@inproceedings{\npurkayastha2023romanizationbased,\ntitle={Romanization-based Large-scale Adaptation of Multilingual Language Models},\nauthor={Sukannya Purkayastha and Sebastian Ruder and Jonas Pfeiffer and Iryna Gurevych and Ivan Vuli{\\'c}},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=VN298kRz91}\n}", "github": "", "project": "", "reviewers": "PTYq;oGZa;SUm1", "site": "https://openreview.net/forum?id=VN298kRz91", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;5", "excitement": "3;3;3", "reproducibility": "3;4;5", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "sukannya-purkayastha-5144a3118/;;jonas-pfeiffer/;;ivan-vuli%C4%87-286b4a81/", "aff_unique_index": "0;1;2", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt;Google;PolyAI Limited", "aff_unique_dep": ";Google DeepMind;", "aff_unique_url": "https://www.tu-darmstadt.de;https://deepmind.com;https://www.poly.ai", "aff_unique_abbr": "TUD;DeepMind;PolyAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Germany;United Kingdom" }, { "id": "VQQeyiAqtv", "title": "Data Selection Curriculum for Abstractive Text Summarization", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Abstractive Text Summarization (ATS) models are commonly trained using large-scale data that is randomly shuffled. However, the impact of data selection and data ordering on ATS models remains a relatively unexplored research area, where a significant challenge lies in accurately assessing the learning difficulty of each training instance. This study introduces a Data Selection Curriculum (DSC) scoring system that incorporates both the difficulty of improving ATS model via an instance and the expected performance on this instance. By selectively excluding excessively simple and overly complex instances, the training efficiency can be optimized. Furthermore, curriculum learning is integrated to accelerate convergence and improve performance by gradually increasing the learning difficulty, inspired by human learners. Experimental results on the CNN/DailyMail dataset demonstrate that our approach surpasses potent baselines, utilizing a mere 20\\% of the available instances.", "keywords": "Data Selection;Curriculum Learning;Abstractive Text Summarization", "primary_area": "", "supplementary_material": "", "author": "Shichao Sun;Ruifeng Yuan;Jianfei He;Ziqiang Cao;Wenjie Li;Xiaohua Jia", "authorids": "~Shichao_Sun1;~Ruifeng_Yuan1;~Jianfei_He1;~Ziqiang_Cao2;~Wenjie_Li1;~Xiaohua_Jia1", "gender": "M;M;M;M;F;M", "homepage": "https://shichaosun.github.io;http://www4.comp.polyu.edu.hk/~csryuan/#;https://scholars.cityu.edu.hk/en/persons/jianfei-he(a2c18133-82d1-40b7-999c-195791800882).html;;https://web.comp.polyu.edu.hk/cswjli/;http://www.cs.cityu.edu.hk/~jia", "dblp": ";;;148/4447;33/3999-2.html;j/XiaohuaJia.html", "google_scholar": "https://scholar.google.com/citations?hl=en;;;https://scholar.google.com/citations?hl=zh-CN;Rx5swD4AAAAJ;", "or_profile": "~Shichao_Sun1;~Ruifeng_Yuan1;~Jianfei_He1;~Ziqiang_Cao2;~Wenjie_Li1;~Xiaohua_Jia1", "aff": "The Hong Kong Polytechnic University;Hong Kong Polytechnic University;City University of Hong Kong;Soochow University, China;The Hong Kong Polytechnic University, The Hong Kong Polytechnic University;City University of Hong Kong", "aff_domain": "polyu.edu.hk;polyu.edu.hk;cityu.edu.hk;suda.edu.cn;comp.polyu.edu.hk;cityu.edu.hk", "position": "PhD student;PhD student;PhD student;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nsun2023data,\ntitle={Data Selection Curriculum for Abstractive Text Summarization},\nauthor={Shichao Sun and Ruifeng Yuan and Jianfei He and Ziqiang Cao and Wenjie Li and Xiaohua Jia},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=VQQeyiAqtv}\n}", "github": "", "project": "", "reviewers": "pCuk;r3Wo;qyJh;HkHS", "site": "https://openreview.net/forum?id=VQQeyiAqtv", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;4;5", "excitement": "3;3;3;3", "reproducibility": "4;4;5;2", "correctness": "3;3;2;4", "rating_avg": 3.0, "confidence_avg": 4.25, "excitement_avg": 3.0, "reproducibility_avg": 3.75, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-1077-9033;0000-0002-7360-8864;0000-0001-8702-8302", "linkedin": ";;;;;", "aff_unique_index": "0;0;1;2;0;1", "aff_unique_norm": "Hong Kong Polytechnic University;City University of Hong Kong;Soochow University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.polyu.edu.hk;https://www.cityu.edu.hk;https://www.soochow.edu.cn", "aff_unique_abbr": "PolyU;CityU;Soochow U", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "VSBBOEUcmD", "title": "LLM-enhanced Self-training for Cross-domain Constituency Parsing", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Self-training has proven to be an effective approach for cross-domain tasks, and in this study, we explore its application to cross-domain constituency parsing. Traditional self-training methods rely on limited and potentially low-quality raw corpora. To overcome this limitation, we propose enhancing self-training with the large language model (LLM) to generate domain-specific raw corpora iteratively. For the constituency parsing, we introduce grammar rules that guide the LLM in generating raw corpora and establish criteria for selecting pseudo instances. Our experimental results demonstrate that self-training for constituency parsing, equipped with an LLM, outperforms traditional methods regardless of the LLM's performance. Moreover, the combination of grammar rules and confidence criteria for pseudo-data selection yields the highest performance in the cross-domain constituency parsing.", "keywords": "constituency parsing", "primary_area": "", "supplementary_material": "", "author": "jianling li;Meishan Zhang;Peiming Guo;Min Zhang;Yue Zhang", "authorids": "~jianling_li1;~Meishan_Zhang1;~Peiming_Guo1;~Min_Zhang9;~Yue_Zhang7", "gender": "F;M;M;M;M", "homepage": ";https://zhangmeishan.github.io/;https://zhangmin-nlp-ai.github.io/;http://frcchang.github.io;https://github.com/guopeiming", "dblp": "90/2428;127/0273;83/5342-5;47/722-4;335/4743", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;;", "or_profile": "~jianling_li1;~Meishan_Zhang1;~Min_Zhang9;~Yue_Zhang7;~Guo_Peiming1", "aff": "Tianjin University;Tianjin University, China;Harbin Institute of Technology, Shenzhen;Westlake University;Tianjin University", "aff_domain": "tju.edu.cn;tju.edu.cn;hit.edu.cn;westlake.edu.cn;tju.edu.cn", "position": "PhD student;Associate Professor;Full Professor;Full Professor;MS student", "bibtex": "@inproceedings{\nli2023llmenhanced,\ntitle={{LLM}-enhanced Self-training for Cross-domain Constituency Parsing},\nauthor={jianling li and Meishan Zhang and Peiming Guo and Min Zhang and Yue Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=VSBBOEUcmD}\n}", "github": "", "project": "", "reviewers": "imit;wYfy;8ZMA", "site": "https://openreview.net/forum?id=VSBBOEUcmD", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;2;4", "excitement": "4;3;3", "reproducibility": "3;4;3", "correctness": "3;4;3", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-5214-2268;0000-0002-0890-2290", "linkedin": ";;;;", "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Tianjin University;Harbin Institute of Technology;Westlake University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.tju.edu.cn;http://en.hhit.edu.cn/;https://www.westlake.edu.cn", "aff_unique_abbr": "TJU;HIT;WU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "VTWWvYtF1R", "title": "Reasoning with Language Model is Planning with World Model", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) have shown remarkable reasoning capabilities, particularly with Chain-of-Thought-style prompts. However, LLMs can still struggle with problems that are easy for humans, such as generating action plans for executing tasks or performing complex math or logical reasoning. This is due to LLMs' absence of an internal world model for predicting world states (e.g., environment status, variable values) and simulating long-term action outcomes of actions. This prevents LLMs from performing deliberate planning akin to human brains, which involves exploring alternative reasoning paths, anticipating future states and rewards, and iteratively refining existing reasoning steps. To overcome the limitations, we propose a new LLM reasoning framework, Reasoning via Planning (RAP). RAP repurposes the LLM as both a world model and a reasoning agent, and incorporates a principled planning algorithm (based on Monte Carlo Tree Search) for strategic exploration in the vast reasoning space. During reasoning, the LLM (as agent) incrementally builds a reasoning tree under the guidance of the LLM (as world model) and task-specific rewards, properly balancing exploration v.s. exploitation to achieve a high-reward reasoning path efficiently. We apply RAP to a variety of challenging reasoning problems, such as plan generation, math reasoning, and logical inference. Empirical results demonstrate the superiority of RAP over various strong baselines, including CoT and least-to-most prompting with self-consistency, e.g., RAP on LLaMA-33B surpasses CoT on GPT-4 with 33\\% relative improvement in plan generation.", "keywords": "Large Language Model;Reasoning", "primary_area": "", "supplementary_material": "", "author": "Shibo Hao;Yi Gu;Haodi Ma;Joshua Jiahua Hong;Zhen Wang;Daisy Zhe Wang;Zhiting Hu", "authorids": "~Shibo_Hao1;~Yi_Gu4;~Haodi_Ma1;~Joshua_Jiahua_Hong1;~Zhen_Wang6;~Daisy_Zhe_Wang1;~Zhiting_Hu3", "gender": "M;M;M;M;M;F;M", "homepage": "https://ber666.github.io/;https://wu-qing-157.github.io/;https://5fc46e77daf69.site123.me/;;https://zhenwang9102.github.io;https://dsr.cise.ufl.edu/daisyw/index.html;http://zhiting.ucsd.edu", "dblp": "302/1341;;https://dblp.org/search?q=Haodi+Ma;;78/6727;;134/4031", "google_scholar": "xwbHbUQAAAAJ;https://scholar.google.com/citations?hl=en;Cc94HYkAAAAJ;;asBaytUAAAAJ;;N7_xhHoAAAAJ", "or_profile": "~Shibo_Hao1;~Yi_Gu4;~Haodi_Ma1;~Joshua_Jiahua_Hong1;~Zhen_Wang6;~Daisy_Zhe_Wang1;~Zhiting_Hu3", "aff": "University of California, San Diego;Mohamed bin Zayed University of Artificial Intelligence;University of Florida;University of California, San Diego;University of California, San Diego;University of Florida;Amazon", "aff_domain": "ucsd.edu;mbzuai.ac.ae;ufl.edu;ucsd.edu;ucsd.edu;ufl.edu;amazon.com", "position": "PhD student;Researcher;PhD student;Undergrad student;Postdoc;Associate Professor;Researcher", "bibtex": "@inproceedings{\nhao2023reasoning,\ntitle={Reasoning with Language Model is Planning with World Model},\nauthor={Shibo Hao and Yi Gu and Haodi Ma and Joshua Jiahua Hong and Zhen Wang and Daisy Zhe Wang and Zhiting Hu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=VTWWvYtF1R}\n}", "github": "", "project": "", "reviewers": "rY4T;eX3j;p5Yn", "site": "https://openreview.net/forum?id=VTWWvYtF1R", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "3;4;4", "reproducibility": "3;4;5", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-3539-0259;;;0000-0001-7407-5118;;", "linkedin": ";;;joshua--hong/;zhenwang9102/;;", "aff_unique_index": "0;1;2;0;0;2;3", "aff_unique_norm": "University of California, San Diego;Mohamed bin Zayed University of Artificial Intelligence;University of Florida;Amazon", "aff_unique_dep": ";;;Amazon.com, Inc.", "aff_unique_url": "https://www.ucsd.edu;https://mbzuai.ac.ae;https://www.ufl.edu;https://www.amazon.com", "aff_unique_abbr": "UCSD;MBZUAI;UF;Amazon", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;1;0;0;0;0;0", "aff_country_unique": "United States;United Arab Emirates" }, { "id": "VWFKRxsgt3", "title": "ZeroSCROLLS: A Zero-Shot Benchmark for Long Text Understanding", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We introduce ZeroSCROLLS, a zero-shot benchmark for natural language understanding over long texts, which contains only test and small validation sets, without training data. We adapt six tasks from the SCROLLS benchmark, and add four new datasets, including two novel information fusing tasks, such as aggregating the percentage of positive reviews. Using ZeroSCROLLS, we conduct a comprehensive evaluation of both open-source and closed large language models, finding that Claude outperforms ChatGPT, and that GPT-4 achieves the\nhighest average score. However, there is still room for improvement on multiple open challenges in ZeroSCROLLS, such as aggregation\ntasks, where models struggle to pass the naive baseline. As the state of the art is a moving target, we invite researchers to evaluate their\nideas on the live ZeroSCROLLS leaderboard.", "keywords": "Long lexts;summarization;question answering;benchmark;zero-shot", "primary_area": "", "supplementary_material": "", "author": "Uri Shaham;Maor Ivgi;Avia Efrat;Jonathan Berant;Omer Levy", "authorids": "~Uri_Shaham2;~Maor_Ivgi2;~Avia_Efrat1;~Jonathan_Berant1;~Omer_Levy1", "gender": ";M;F;M;M", "homepage": ";https://mivg.github.io/;;http://www.cs.tau.ac.il/~joberant/;", "dblp": ";275/3578;;31/8178;117/4866", "google_scholar": ";https://scholar.google.com/citations?hl=en;4QZmEqsAAAAJ;https://scholar.google.co.il/citations?user=xCYHonIAAAAJ;PZVd2h8AAAAJ", "or_profile": "~Uri_Shaham2;~Maor_Ivgi2;~Avia_Efrat1;~Jonathan_Berant1;~Omer_Levy1", "aff": ";Tel Aviv University;Tel Aviv University;Tel Aviv University;Tel Aviv University", "aff_domain": ";tau.ac.il;tau.ac.il;tau.ac.il;tau.ac.il", "position": ";PhD student;PhD student;Associate Professor;Senior Lecturer", "bibtex": "@inproceedings{\nshaham2023zeroscrolls,\ntitle={Zero{SCROLLS}: A Zero-Shot Benchmark for Long Text Understanding},\nauthor={Uri Shaham and Maor Ivgi and Avia Efrat and Jonathan Berant and Omer Levy},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=VWFKRxsgt3}\n}", "github": "", "project": "", "reviewers": "vUFG;PkAz;Pt2m", "site": "https://openreview.net/forum?id=VWFKRxsgt3", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "2;2;2", "reproducibility": "4;4;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.0, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-4114-3836;;0000-0001-7300-8191", "linkedin": ";maor-ivgi-a3314111b;;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tel Aviv University", "aff_unique_dep": "", "aff_unique_url": "https://www.tau.ac.il", "aff_unique_abbr": "TAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Israel" }, { "id": "VacjehPkIU", "title": "Superlim: A Swedish Language Understanding Evaluation Benchmark", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We present Superlim, a multi-task NLP benchmark and analysis platform for evaluating Swedish language models, a counterpart to the English-language (Super)GLUE suite. We describe the dataset, the tasks, the leaderboard and report the baseline results yielded by a reference implementation. The tested models do not approach ceiling performance on any of the tasks, which suggests that Superlim is truly difficult, a desirable quality for a benchmark. We address methodological challenges, such as mitigating the Anglocentric bias when creating datasets for a less-resourced language; choosing the most appropriate measures; documenting the datasets and making the leaderboard convenient and transparent. We also highlight other potential usages of the dataset, such as, for instance, the evaluation of cross-lingual transfer learning.", "keywords": "Swedish;benchmark;large language models;natural language understanding;transfer learning;evaluation", "primary_area": "", "supplementary_material": "", "author": "Aleksandrs Berdicevskis;Gerlof Bouma;Robin Kurtz;Felix Morger;Joey \u00d6hman;Yvonne Adesam;Lars Borin;Dana Dann\u00e9lls;Markus Forsberg;Tim Isbister;Anna Lindahl;Martin Malmsten;Faton Rekathati;Magnus Sahlgren;Elena Volodina;Love B\u00f6rjeson;Simon Hengchen;Nina Tahmasebi", "authorids": "~Aleksandrs_Berdicevskis1;~Gerlof_Bouma1;~Robin_Kurtz1;~Felix_Morger1;~Joey_\u00d6hman1;~Yvonne_Adesam1;~Lars_Borin1;~Dana_Dann\u00e9lls1;~Markus_Forsberg2;~Tim_Isbister1;~Anna_Lindahl1;~Martin_Malmsten1;~Faton_Rekathati1;~Magnus_Sahlgren1;~Elena_Volodina1;~Love_B\u00f6rjeson1;~Simon_Hengchen1;~Nina_Tahmasebi1", "gender": ";M;;M;M;F;;F;;M;F;M;M;M;F;;Not Specified;F", "homepage": "https://sites.google.com/view/sasha-berdicevskis/home;;https://kb-labb.github.io/about.html#robin-kurtz-data-scientist;https://spraakbanken.gu.se/om/personal/felixmorger;;https://spraakbanken.gu.se/en/about/staff/yvonne;https://spraakbanken.gu.se/om/personal/lars;https://spraakbanken.gu.se/om/personal/dana;https://spraakbanken.gu.se/en/about/staff/markus;;https://spraakbanken.gu.se/om/personal/annalindahl;;https://github.com/Lauler;;https://spraakbanken.gu.se/en/about/staff/elena;https://kb-labb.github.io/about.html;https://hengchen.net;https://www.tahmasebi.se", "dblp": "211/9895;04/8157;135/0970;;;146/3989.html;;61/2039;;;;;;76/3617;126/8843;;182/1916;81/7513", "google_scholar": "5gBa2RQAAAAJ;dFT7ONkAAAAJ;https://scholar.google.de/citations?user=dAZj0B4AAAAJ;;;eRY5mnsAAAAJ;;zC9U76UAAAAJ;;https://scholar.google.se/citations?user=9a7Qgg8AAAAJ;;QEi1fgwAAAAJ;;Nf2NNVwAAAAJ;YcpEL2YAAAAJ;https://scholar.google.se/citations?user=Fakm1UQAAAAJ;https://scholar.google.fi/citations?user=rCT26bwAAAAJ;https://scholar.google.se/citations?user=nCH9mlUAAAAJ", "or_profile": "~Aleksandrs_Berdicevskis1;~Gerlof_Bouma1;~Robin_Kurtz1;~Felix_Morger1;~Joey_\u00d6hman1;~Yvonne_Adesam1;~Lars_Borin1;~Dana_Dann\u00e9lls1;~Markus_Forsberg2;~Tim_Isbister1;~Anna_Lindahl1;~Martin_Malmsten1;~Faton_Rekathati1;~Magnus_Sahlgren1;~Elena_Volodina1;~Love_B\u00f6rjeson1;~Simon_Hengchen1;~Nina_Tahmasebi1", "aff": "Gothenburg University;University of Gothenburg;National Libray of Sweden;G\u00f6teborg University;AI Sweden;G\u00f6teborg University;University of Gothenburg;G\u00f6teborg University ;G\u00f6teborg University;Ai Sweden;G\u00f6teborg University;;;AI Sweden;University of Gothenburg;;University of Geneva;Dept of Swedish, Spr\u00e5kbanken (The swedish language bank, an NLP Lab)", "aff_domain": "gu.se;gu.se;kb.se;gu.se;ai.se;gu.se;gu.se;gu.se;gu.se;ai.se;gu.se;;;ai.se;gu.se;;unige.ch;gu.se", "position": "Researcher;Researcher;Researcher;PhD student;Researcher;Researcher;Full Professor;Researcher;Associate Professor;ML Engineer;PhD student;;;Principal Researcher;Principal Researcher;;Lecturer;Associate Professor", "bibtex": "@inproceedings{\nberdicevskis2023superlim,\ntitle={Superlim: A Swedish Language Understanding Evaluation Benchmark},\nauthor={Aleksandrs Berdicevskis and Gerlof Bouma and Robin Kurtz and Felix Morger and Joey {\\\"O}hman and Yvonne Adesam and Lars Borin and Dana Dann{\\'e}lls and Markus Forsberg and Tim Isbister and Anna Lindahl and Martin Malmsten and Faton Rekathati and Magnus Sahlgren and Elena Volodina and Love B{\\\"o}rjeson and Simon Hengchen and Nina Tahmasebi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=VacjehPkIU}\n}", "github": "", "project": "", "reviewers": "kbFp;kEqu;4JdJ", "site": "https://openreview.net/forum?id=VacjehPkIU", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;5;4", "excitement": "4;3;4", "reproducibility": "4;5;5", "correctness": "5;4;5", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.666666666666667, "correctness_avg": 4.666666666666667, "replies_avg": 11, "authors#_avg": 18, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-3290-7179;;;;;0000-0002-9413-512X;;0000-0002-3338-2979;;;;0000-0001-9827-1367;;0000-0001-5100-0535;0000-0003-1935-1321;0000-0003-1328-4164;;0000-0003-1688-1845", "linkedin": ";;;;joeyohman/;;;dana-dannells-baa47693/;;tim-isbister-3581a052/;;;;magnus-sahlgren-0a12b2/;https://se.linkedin.com/in/elena-volodina-117bb442;;;https://linkedin.com/in/nina-tahmasebi-6694054", "aff_unique_index": "0;1;2;1;3;1;1;1;1;3;1;3;1;4;5", "aff_unique_norm": "Gothenburg University;University of Gothenburg;National Library of Sweden;AI Sweden;University of Geneva;Spr\u00e5kbanken", "aff_unique_dep": ";;;;;Dept of Swedish", "aff_unique_url": "https://www.gu.se;https://www.gu.se;https://www.kb.se;https://www.aisweden.org;https://www.unige.ch;https://www.spraakbanken.gu.se", "aff_unique_abbr": "GU;GU;KB;AI Sweden;UNIGE;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;1;0", "aff_country_unique": "Sweden;Switzerland" }, { "id": "VdvdRfwTtk", "title": "Background Summarization of Event Timelines", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Generating concise summaries of news events is a challenging natural language processing task. While journalists often curate timelines to highlight key sub-events, newcomers to a news event face challenges in catching up on its historical context. In this paper, we address this need by introducing the task of background news summarization, which complements each timeline update with a background summary of relevant preceding events. We construct a dataset by merging existing timeline datasets and asking human annotators to write a background summary for each timestep of each news event. We establish strong baseline performance using state-of-the-art summarization systems and propose a query-focused variant to generate background summaries. To evaluate background summary quality, we present a question-answering-based evaluation metric, Background Utility Score (BUS), which measures the percentage of questions about a current event timestep that a background summary answers. Our experiments show the effectiveness of instruction fine-tuned systems such as Flan-T5, in addition to strong zero-shot performance using GPT-3.5.", "keywords": "text summarization;events;timelines;dataset;evaluation metrics", "primary_area": "", "supplementary_material": "", "author": "Adithya Pratapa;Kevin Small;Markus Dreyer", "authorids": "~Adithya_Pratapa1;~Kevin_Small1;~Markus_Dreyer1", "gender": "M;M;", "homepage": "https://adithya7.github.io/;http://www.kevinsmall.org/;https://markusdreyer.org/", "dblp": "222/9370;82/6573.html;37/4227", "google_scholar": "BAT6abIAAAAJ;https://scholar.google.com/citations?hl=en;0a1AxxQAAAAJ", "or_profile": "~Adithya_Pratapa1;~Kevin_Small1;~Markus_Dreyer1", "aff": "Carnegie Mellon University;Amazon;Amazon", "aff_domain": "cmu.edu;amazon.com;amazon.com", "position": "PhD student;Principal Researcher;Principal Researcher", "bibtex": "@inproceedings{\npratapa2023background,\ntitle={Background Summarization of Event Timelines},\nauthor={Adithya Pratapa and Kevin Small and Markus Dreyer},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=VdvdRfwTtk}\n}", "github": "", "project": "", "reviewers": "EUZX;o6pZ;Lm5f", "site": "https://openreview.net/forum?id=VdvdRfwTtk", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;3;4", "reproducibility": "4;4;5", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;mdreyer/", "aff_unique_index": "0;1;1", "aff_unique_norm": "Carnegie Mellon University;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.cmu.edu;https://www.amazon.com", "aff_unique_abbr": "CMU;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "VeBoHwiA7g", "title": "SmartSpanNER: Making SpanNER Robust in Low Resource Scenarios", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Named Entity Recognition (NER) is one of the most fundamental tasks in natural language processing. Span-level prediction (SpanNER) is more naturally suitable for nested NER than sequence labeling (SeqLab). However, according to our experiments, the SpanNER method is more sensitive to the amount of training data, i.e., the F1 score of SpanNER drops much more than that of SeqLab when the amount of training data drops. In order to improve the robustness of SpanNER in low resource scenarios, we propose a simple and effective method SmartSpanNER, which introduces a Named Entity Head (NEH) prediction task to SpanNER and performs multi-task learning together with the task of span classification. Experimental results demonstrate that the robustness of SpanNER could be greatly improved by SmartSpanNER in low resource scenarios constructed on the CoNLL03, Few-NERD, GENIA and ACE05 standard benchmark datasets.", "keywords": "SpanNER;Named Entity Head;SmartSpanNER;Multi-task Learning", "primary_area": "", "supplementary_material": "", "author": "Min Zhang;Xiaosong Qiao;Yanqing Zhao;shimin tao;Hao Yang", "authorids": "~Min_Zhang10;~Xiaosong_Qiao1;~Yanqing_Zhao1;~shimin_tao1;~Hao_Yang7", "gender": "M;M;F;M;M", "homepage": ";;;;https://github.com/yanghaocsg", "dblp": "83/5342-10.html;;;;54/4089-7", "google_scholar": "oVvAyCUAAAAJ;;;Q5T8jbgAAAAJ;lOsjM5sAAAAJ", "or_profile": "~Min_Zhang10;~Xiaosong_Qiao1;~Yanqing_Zhao1;~shimin_tao1;~Hao_Yang7", "aff": "Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.", "aff_domain": "huawei.com;huawei.com;huawei.com;huawei.com;huawei.com", "position": "Researcher;Researcher;Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nzhang2023smartspanner,\ntitle={SmartSpan{NER}: Making Span{NER} Robust in Low Resource Scenarios},\nauthor={Min Zhang and Xiaosong Qiao and Yanqing Zhao and shimin tao and Hao Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=VeBoHwiA7g}\n}", "github": "", "project": "", "reviewers": "o3MJ;f3si;Y6rd", "site": "https://openreview.net/forum?id=VeBoHwiA7g", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "4;3;4", "reproducibility": "4;3;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-9624-6851;my-orcid?orcid=0000-0003-2494-579X;0009-0005-5738-7490;;0000-0001-8861-7010", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Huawei", "aff_unique_dep": "Huawei Technologies", "aff_unique_url": "https://www.huawei.com", "aff_unique_abbr": "Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "VecgMidd4I", "title": "Find-2-Find: Multitask Learning for Anaphora Resolution and Object Localization", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In multimodal understanding tasks, visual and linguistic ambiguities can arise. Visual ambiguity can occur when visual objects require a model to ground a referring expression in a video without strong supervision, while linguistic ambiguity can occur from changes in entities in action flows. As an example from the cooking domain, \"oil\" mixed with \"salt\" and \"pepper\" could later be referred to as a \"mixture\". Without a clear visual-linguistic alignment, we cannot know which among several objects shown is referred to by the language expression \u201cmixture\u201d, and without resolved antecedents, we cannot pinpoint what the mixture is. We define this chicken-and-egg problem as Visual-linguistic Ambiguity. In this paper, we present Find2Find, a joint anaphora resolution and object localization dataset targeting the problem of \\emph{visual-linguistic ambiguity}, consisting of 500 anaphora-annotated recipes with corresponding videos. We present experimental results of a novel end-to-end joint multitask learning framework for Find2Find that fuses visual and textual information and shows improvements both for anaphora resolution and object localization with one joint model in multitask learning, as compared to a strong single-task baseline.", "keywords": "Anaphora Resolution;Object Localization", "primary_area": "", "supplementary_material": "", "author": "Cennet Oguz;Pascal Denis;Emmanuel Vincent;Simon Ostermann;Josef van Genabith", "authorids": "~Cennet_Oguz1;~Pascal_Denis1;~Emmanuel_Vincent1;~Simon_Ostermann1;~Josef_van_Genabith1", "gender": "F;M;M;M;M", "homepage": ";http://researchers.lille.inria.fr/~pdenis/;https://members.loria.fr/EVincent/;https://simonost.github.io/home/;", "dblp": ";18/4078;55/3279;33/7684-2;82/3447", "google_scholar": "xMnsNJoAAAAJ;Y1nQ6eUAAAAJ;https://scholar.google.fr/citations?user=RpYDOJQAAAAJ;kOHpHZsAAAAJ;rl8S6a8AAAAJ", "or_profile": "~Cennet_Oguz1;~Pascal_Denis1;~Emmanuel_Vincent1;~Simon_Ostermann1;~Josef_van_Genabith1", "aff": "German Research Center for AI;INRIA;INRIA;German Research Center for AI;Universit\u00e4t des Saarlandes", "aff_domain": "dfki.de;inria.fr;inria.fr;dfki.de;uni-saarland.de", "position": "PhD student;Researcher;Principal Researcher;Senior Researcher;Full Professor", "bibtex": "@inproceedings{\noguz2023findfind,\ntitle={Find-2-Find: Multitask Learning for Anaphora Resolution and Object Localization},\nauthor={Cennet Oguz and Pascal Denis and Emmanuel Vincent and Simon Ostermann and Josef van Genabith},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=VecgMidd4I}\n}", "github": "", "project": "", "reviewers": "pwhj;b6CE;RhkM", "site": "https://openreview.net/forum?id=VecgMidd4I", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-6817-4133;0000-0003-4121-6337;0000-0002-0183-7289;0000-0002-0899-0657;", "linkedin": ";;;;", "aff_unique_index": "0;1;1;0;2", "aff_unique_norm": "German Research Center for Artificial Intelligence;INRIA;Universit\u00e4t des Saarlandes", "aff_unique_dep": ";;", "aff_unique_url": "https://www.dfki.de/;https://www.inria.fr;https://www.uni-saarland.de", "aff_unique_abbr": "DFKI;INRIA;UDS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "Germany;France" }, { "id": "VgLBPLvHuK", "title": "Revisiting Source Context in Nearest Neighbor Machine Translation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Nearest neighbor machine translation ($k$NN-MT), which interpolates target token probabilities with estimates derived from additional examples, has achieved significant improvements and attracted extensive interest in recent years. However, existing research does not explicitly consider the source context when retrieving similar examples, potentially leading to suboptimal performance. To address this, we comprehensively revisit the role of source context and propose a simple and effective method for improving neural machine translation via source context enhancement, demonstrating its crucial role in both retrieving superior examples and determining more suitable interpolation coefficients. Furthermore, we reveal that the probability estimation can be further optimized by incorporating a source-aware distance calibration module. Comprehensive experiments show that our proposed approach can be seamlessly integrated with representative $k$NN-MT baselines, resulting in substantial improvements over these strong baselines across a number of settings and domains. Remarkably, these improvements can reach up to 1.6 BLEU points.", "keywords": "nearest neighbor machine translation; source context; retrieval-augmented machine translation", "primary_area": "", "supplementary_material": "", "author": "Xuanhong Li;Peng Li;Po Hu", "authorids": "~Xuanhong_Li1;~Peng_Li2;~Po_Hu1", "gender": "M;M;", "homepage": ";http://www.lpeng.net/;https://dblp.org/pid/62/1925.html", "dblp": "362/7615;83/6353-30;", "google_scholar": "UYnaTnsAAAAJ;hgYzkOQAAAAJ;", "or_profile": "~Xuanhong_Li1;~Peng_Li2;~Po_Hu1", "aff": "Central China Normal University;Tsinghua University;Central China Normal University", "aff_domain": "ccnu.edu.cn;tsinghua.edu.cn;ccnu.edu.cn", "position": "MS student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nli2023revisiting,\ntitle={Revisiting Source Context in Nearest Neighbor Machine Translation},\nauthor={Xuanhong Li and Peng Li and Po Hu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=VgLBPLvHuK}\n}", "github": "", "project": "", "reviewers": "4RbX;qmbQ;aAZW", "site": "https://openreview.net/forum?id=VgLBPLvHuK", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;4;4", "excitement": "3;4;2", "reproducibility": "3;3;3", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-7503-6783;0000-0003-1374-5979;0000-0002-7968-2838", "linkedin": ";;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Central China Normal University;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "http://www.ccnu.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "CCNU;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "VhL4lZXY1U", "title": "Beyond Candidates : Adaptive Dialogue Agent Utilizing Persona and Knowledge", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "To build ultimate dialogue agents, previous studies suggest models that ground both persona and knowledge. However, applying the dialogue system directly to the usual conversation is still limited because the system requires a complete sentence-formed persona and knowledge candidate sets from the given dataset. In contrast to the dialogue setting in the dataset, humans utilize semantic concepts in their minds rather than a set of pre-defined candidate sentences. Following this manner of human dialogue, we suggest an adaptive dialogue system that is applicable to situations where complete sentence-formed candidates are not given. Our model generates consistent and relevant persona descriptions and identifies relevant knowledge for engaging and knowledgeable responses, even with fragmentary information. We show that our model outperforms previous baselines that utilize persona and knowledge candidate sentences and conduct the human evaluation on the machine-generated responses. In addition, we conduct ablation studies to demonstrate the effectiveness of each component of our model. Furthermore, we apply our model to other dialogue datasets that only ground knowledge or persona to showcase its adaptability. Our code is available at https://github.com/dlawjddn803/BeCand.", "keywords": "Dialogue System;Adaptive;Candidate-agnostic;Persona;Knowledge", "primary_area": "", "supplementary_material": "", "author": "Jungwoo Lim;Myunghoon Kang;Jinsung Kim;Jeongwook Kim;Yuna Hur;Heuiseok Lim", "authorids": "~Jungwoo_Lim1;~Myunghoon_Kang1;~Jinsung_Kim2;~Jeongwook_Kim1;~Yuna_Hur1;~Heuiseok_Lim1", "gender": "F;M;M;M;F;M", "homepage": "https://dlawjddn803.github.io/;;https://jin62304.github.io;https://uponthesky.tistory.com/;https://scholar.google.com/citations?user=A0zJLEMAAAAJ&hl=en;http://nlp.korea.ac.kr", "dblp": "277/9191;183/4671;;;291/4254;127/4881", "google_scholar": "ubIxtk8AAAAJ;sNu9_kUAAAAJ;au6e9uUAAAAJ;;A0zJLEMAAAAJ;HMTkz7oAAAAJ", "or_profile": "~Jungwoo_Lim1;~Myunghoon_Kang1;~Jinsung_Kim2;~Jeongwook_Kim1;~Yuna_Hur1;~Heuiseok_Lim1", "aff": "Korea University;Korea University;Korea University;Korea University;Korea University;Korea University", "aff_domain": "korea.ac.kr;korea.ac.kr;korea.ac.kr;korea.ac.kr;korea.ac.kr;korea.ac.kr", "position": "PhD student;PhD student;PhD student;MS student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nlim2023beyond,\ntitle={Beyond Candidates : Adaptive Dialogue Agent Utilizing Persona and Knowledge},\nauthor={Jungwoo Lim and Myunghoon Kang and Jinsung Kim and Jeongwook Kim and Yuna Hur and Heuiseok Lim},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=VhL4lZXY1U}\n}", "github": "", "project": "", "reviewers": "wnZx;n8H1;StNn", "site": "https://openreview.net/forum?id=VhL4lZXY1U", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "4;2;3", "reproducibility": "4;4;3", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-8988-2270;0000-0003-3257-3885;0000-0002-1587-0389;;;", "linkedin": "jungwoo-lim-3a5124202/;;jinsung-kim-703195178/;;;", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Korea University", "aff_unique_dep": "", "aff_unique_url": "https://www.korea.ac.kr", "aff_unique_abbr": "KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "VjSxQNhdKs", "title": "MCLF: A Multi-grained Contrastive Learning Framework for ASR-robust Spoken Language Understanding", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Enhancing the robustness towards Automatic Speech Recognition (ASR) errors is of great importance for Spoken Language Understanding (SLU). Trending ASR-robust SLU systems have witnessed impressive improvements through global contrastive learning. However, although most ASR errors occur only at local positions of utterances, they can easily lead to severe semantic changes, and utterance-level classification or comparison is difficult to distinguish such differences. To address the problem, we propose a two-stage multi-grained contrastive learning framework dubbed MCLF. Technically, we first adapt the pre-trained language models to downstream SLU datasets via the proposed multi-grained contrastive learning objective and then fine-tune it on the corresponding dataset. Besides, to facilitate contrastive learning in the pre-training stage, we explore several data augmentation methods to expand the training data. Experimental results and detailed analyses on four datasets and four BERT-like backbone models demonstrate the effectiveness of our approach.", "keywords": "Spoken Language Understanding;ASR Robustness;Multi-grained Contrastive Learning;Data Augmentation", "primary_area": "", "supplementary_material": "", "author": "Zhiqi Huang;Dongsheng Chen;Zhihong Zhu;Xuxin Cheng", "authorids": "~Zhiqi_Huang2;~Dongsheng_Chen1;~Zhihong_Zhu1;~Xuxin_Cheng3", "gender": "M;M;;", "homepage": "https://zhiqi-huang.github.io/;;;", "dblp": ";;;", "google_scholar": "5JGMGCsAAAAJ;https://scholar.google.com.hk/citations?user=2sI1wsoAAAAJ;;", "or_profile": "~Zhiqi_Huang2;~Dongsheng_Chen1;~Zhihong_Zhu1;~Xuxin_Cheng3", "aff": "Tencent Game;Peking University;;", "aff_domain": "tencent.com;pku.edu.cn;;", "position": "Researcher;MS student;;", "bibtex": "@inproceedings{\nhuang2023mclf,\ntitle={{MCLF}: A Multi-grained Contrastive Learning Framework for {ASR}-robust Spoken Language Understanding},\nauthor={Zhiqi Huang and Dongsheng Chen and Zhihong Zhu and Xuxin Cheng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=VjSxQNhdKs}\n}", "github": "", "project": "", "reviewers": "u9Ji;dEWS;7euW", "site": "https://openreview.net/forum?id=VjSxQNhdKs", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;3;3", "excitement": "3;4;3", "reproducibility": "5;3;3", "correctness": "4;4;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "zhiqi-huang-133499142/;;;", "aff_unique_index": "0;1", "aff_unique_norm": "Tencent;Peking University", "aff_unique_dep": "Tencent Game;", "aff_unique_url": "https://www.tencent.com;http://www.pku.edu.cn", "aff_unique_abbr": "Tencent;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "VmoWVc04KY", "title": "Linguistic Compression in Single-Sentence Human-Written Summaries", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Summarizing texts involves significant cognitive efforts to compress information. While advances in automatic summarization systems have drawn attention from the NLP and linguistics communities to this topic, there is a lack of computational studies of linguistic patterns in human-written summaries. This work presents a large-scale corpus study of human-written single-sentence summaries. We analyzed the linguistic compression patterns from source documents to summaries at different granularities, and we found that summaries are generally written with morphological expansion, increased lexical diversity, and similar positional arrangements of specific words compared to the source across different genres. We also studied how linguistic compressions of different factors affect reader judgments of quality through a human study, with the results showing that the use of morphological and syntactic changes by summary writers matches reader preferences while lexical diversity and word specificity preferences are not aligned between summary writers and readers.", "keywords": "summary writing;linguistic compression;text summarization", "primary_area": "", "supplementary_material": "", "author": "Fangcong Yin;Marten Van Schijndel", "authorids": "~Fangcong_Yin1;~Marten_Van_Schijndel1", "gender": "M;M", "homepage": "https://fangcong-yin-2.github.io/;https://vansky.github.io/", "dblp": "362/8553;127/0199", "google_scholar": "u_-1TRIAAAAJ;AtmDLewAAAAJ", "or_profile": "~Fangcong_Yin1;~Marten_Van_Schijndel1", "aff": "Cornell University;Cornell University", "aff_domain": "cornell.edu;cornell.edu", "position": "Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nyin2023linguistic,\ntitle={Linguistic Compression in Single-Sentence Human-Written Summaries},\nauthor={Fangcong Yin and Marten Van Schijndel},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=VmoWVc04KY}\n}", "github": "", "project": "", "reviewers": "fk6U;xvMe;XKkb", "site": "https://openreview.net/forum?id=VmoWVc04KY", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;3", "excitement": "4;3;3", "reproducibility": "4;5;4", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-9858-5881", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "VnMfQuDSgG", "title": "Analysis of Style-Shifting on Social Media: Using Neural Language Model Conditioned by Social Meanings", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In this paper, we propose a novel framework for evaluating style-shifting in social media conversations. Our proposed framework captures changes in an individual's conversational style based on surprisals predicted by a personalized neural language model for individuals. Our personalized language model integrates not only the linguistic contents of conversations but also non-linguistic factors, such as social meanings, including group membership, personal attributes, and individual beliefs. We incorporate these factors directly or implicitly into our model, leveraging large, pre-trained language models and feature vectors derived from a relationship graph on social media. Compared to existing models, our personalized language model demonstrated superior performance in predicting an individual's language in a test set. Furthermore, an analysis of style-shifting utilizing our proposed metric based on our personalized neural language model reveals a correlation between our metric and various conversation factors as well as human evaluation of style-shifting.", "keywords": "style-shifting;speech accommodation theory;neural language model", "primary_area": "", "supplementary_material": "", "author": "Seiya Kawano;Shota Kanezaki;Angel Fernando Garcia Contreras;Akishige Yuguchi;Marie Katsurai;Koichiro Yoshino", "authorids": "~Seiya_Kawano1;~Shota_Kanezaki1;~Angel_Fernando_Garcia_Contreras1;~Akishige_Yuguchi1;~Marie_Katsurai1;~Koichiro_Yoshino2", "gender": "M;M;;;F;", "homepage": "https://kwnsiy.github.io/publications-e.html;https://zakio10.github.io/;;;https://researchmap.jp/katsurai?lang=en;http://pomdp.net/", "dblp": "249/4687;;;;;", "google_scholar": "CZ2nUoQAAAAJ;;;https://scholar.google.com/citations?view_op=list_works;;zo90gZkAAAAJ", "or_profile": "~Seiya_Kawano1;~Shota_Kanezaki1;~Angel_Fernando_Garcia_Contreras1;~Akishige_Yuguchi1;~Marie_Katsurai1;~Koichiro_Yoshino2", "aff": "RIKEN;Doshisha University;;RIKEN;Doshisha University;RIKEN", "aff_domain": "riken.jp;doshisha.ac.jp;;riken.jp;doshisha.ac.jp;riken.jp", "position": "Researcher;MS student;;Postdoc;Associate Professor;Team Leader (PI)", "bibtex": "@inproceedings{\nkawano2023analysis,\ntitle={Analysis of Style-Shifting on Social Media: Using Neural Language Model Conditioned by Social Meanings},\nauthor={Seiya Kawano and Shota Kanezaki and Angel Fernando Garcia Contreras and Akishige Yuguchi and Marie Katsurai and Koichiro Yoshino},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=VnMfQuDSgG}\n}", "github": "", "project": "", "reviewers": "UhTx;3agR;gqZW", "site": "https://openreview.net/forum?id=VnMfQuDSgG", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;3;3", "reproducibility": "2;4;3", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";;;;;", "aff_unique_index": "0;1;0;1;0", "aff_unique_norm": "RIKEN;Doshisha University", "aff_unique_dep": ";", "aff_unique_url": "https://www.riken.jp;https://www.doshisha.ac.jp", "aff_unique_abbr": "RIKEN;Doshisha", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Japan" }, { "id": "Vp8WwRMWfv", "title": "Recurrent Neural Language Models as Probabilistic Finite-state Automata", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Studying language models (LMs) in terms of well-understood formalisms allows us to precisely characterize their abilities and limitations.\nPrevious work has investigated the expressive power of recurrent neural network (RNN) LMs in terms of their capacity to recognize unweighted formal languages.\nHowever, LMs do not describe unweighted formal languages---rather, they define probability distributions over strings.\nIn this work, we study what classes of such probability distributions RNN LMs can represent,\nwhich allows us to make more direct statements about their capabilities.\nWe show that simple RNNs are equivalent to a subclass of probabilistic finite-state automata, and can thus model a strict subset of probability distributions expressible by finite-state models.\nFurthermore, we study the space complexity of representing finite-state LMs with RNNs.\nWe show that, to represent an arbitrary deterministic finite-state LM with $N$ states over an alphabet $\\Sigma$, an RNN requires $\\Omega\\left(N |\\Sigma|\\right)$ neurons.\nThese results present a first step towards characterizing the classes of distributions RNN LMs can represent and thus help us understand their capabilities and limitations.", "keywords": "Language Models;Formal Language Theory;Recurrent Neural Networks;Finite-state Automata;Minsky", "primary_area": "", "supplementary_material": "", "author": "Anej Svete;Ryan Cotterell", "authorids": "~Anej_Svete1;~Ryan_Cotterell1", "gender": "M;Not Specified", "homepage": "https://anejsvete.github.io/;https://rycolab.io/", "dblp": "259/1164;146/4361.html", "google_scholar": "https://scholar.google.com/citations?hl=en;DexOqtoAAAAJ", "or_profile": "~Anej_Svete1;~Ryan_D_Cotterell1", "aff": "Department of Computer Science, ETHZ - ETH Zurich;Swiss Federal Institute of Technology", "aff_domain": "inf.ethz.ch;ethz.ch", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\nsvete2023recurrent,\ntitle={Recurrent Neural Language Models as Probabilistic Finite-state Automata},\nauthor={Anej Svete and Ryan Cotterell},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Vp8WwRMWfv}\n}", "github": "", "project": "", "reviewers": "bmNn;uUpK;R8qp", "site": "https://openreview.net/forum?id=Vp8WwRMWfv", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;2;4", "excitement": "4;4;4", "reproducibility": "0;0;5", "correctness": "5;5;5", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 1.6666666666666667, "correctness_avg": 5.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": "anej-svete-95a68616a;", "aff_unique_index": "0;1", "aff_unique_norm": "ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;ETH Zurich", "aff_campus_unique_index": "0", "aff_campus_unique": "Zurich;", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "id": "VqGX02f2lS", "title": "On the Transferability of Visually Grounded PCFGs", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "There has been a significant surge of interest in visually grounded grammar induction in recent times. While a variety of models have been developed for the task and have demonstrated impressive performance, they have not been evaluated on text domains that are different from the training domain, so it is unclear if the improvements brought by visual groundings are transferable. Our study aims to fill this gap and assess the degree of transferability. We start by extending VC-PCFG (short for Visually-grounded Compound PCFG [[Zhao and Titov, 2020](https://aclanthology.org/2020.emnlp-main.354/)]) in such a way that it can transfer across text domains. We consider a zero-shot transfer learning setting where a model is trained on the source domain and is directly applied to target domains, without any further training. Our experimental results suggest that: the benefits from using visual groundings transfer to text in a domain similar to the training domain but fail to transfer to remote domains. Further, we conduct data and result analysis; we find that the lexicon overlap between the source domain and the target domain is the most important factor in the transferability of VC-PCFG.", "keywords": "unsupervised grammar induction;grounded language learning;syntactic parsing;transfer learning", "primary_area": "", "supplementary_material": "", "author": "Yanpeng Zhao;Ivan Titov", "authorids": "~Yanpeng_Zhao1;~Ivan_Titov1", "gender": "Not Specified;", "homepage": ";http://ivan-titov.org", "dblp": "182/5860;08/5391", "google_scholar": "-T9FigIAAAAJ;https://scholar.google.nl/citations?user=FKUc3vsAAAAJ", "or_profile": "~Yanpeng_Zhao1;~Ivan_Titov1", "aff": "University of Edinburgh;University of Amsterdam", "aff_domain": "ed.ac.uk;uva.nl", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nzhao2023on,\ntitle={On the Transferability of Visually Grounded {PCFG}s},\nauthor={Yanpeng Zhao and Ivan Titov},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=VqGX02f2lS}\n}", "github": "", "project": "", "reviewers": "qPWB;jC7f;ZVfX", "site": "https://openreview.net/forum?id=VqGX02f2lS", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;2", "excitement": "3;3;3", "reproducibility": "4;4;3", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;1", "aff_unique_norm": "University of Edinburgh;University of Amsterdam", "aff_unique_dep": ";", "aff_unique_url": "https://www.ed.ac.uk;https://www.uva.nl", "aff_unique_abbr": "Edinburgh;UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;Netherlands" }, { "id": "VyIe1iVHZ4", "title": "TR-Rules: Rule-based Model for Link Forecasting on Temporal Knowledge Graph Considering Temporal Redundancy", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Temporal knowledge graph (TKG) has been proved to be an effective way for modeling dynamic facts in real world. Many efforts have been devoted into predicting future events i.e. extrapolation, on TKGs. Recently, rule-based knowledge graph completion methods which are considered to be more interpretable than embedding-based methods, have been transferred to temporal knowledge graph extrapolation. However, rule-based models suffer from temporal redundancy when leveraged under dynamic settings, which results in inaccurate rule confidence calculation. In this paper, we define the problem of temporal redundancy and propose TR-Rules which solves the temporal redundancy issues through a simple but effective strategy. Besides, to capture more information lurking in TKGs, apart from cyclic rules, TR-Rules also mines and properly leverages acyclic rules, which has not been explored by existing models. Experimental results on three benchmarks show that TR-Rules achieves state-of-the-art performance. Ablation study shows the impact of temporal redundancy and demonstrates the performance of acyclic rules is much more promising due to its higher sensitivity to the number of sampled walks during learning stage.", "keywords": "Knowledge Graph;Temporal Knowledge Graph;Link forecasting;Temporal Rules", "primary_area": "", "supplementary_material": "", "author": "Ningyuan Li;Haihong E;Shi Li;Mingzhi Sun;Tianyu Yao;Meina Song;Yong Wang;Haoran Luo", "authorids": "~Ningyuan_Li2;~Haihong_E1;~Shi_Li3;~Mingzhi_Sun1;~Tianyu_Yao1;~Meina_Song1;~Yong_Wang14;~Haoran_Luo1", "gender": "M;F;M;M;M;F;;M", "homepage": ";https://teacher.bupt.edu.cn/ehaihong/zh_CN/index.htm;https://www.linkedin.com/in/li-shi-422203139/;http://none;https://github.com/yao12315;http://teacher.bupt.edu.cn/songmeina/;;https://lhrlab.github.io/", "dblp": "183/6738;43/10222.html;;299/7222;324/5213;95/4440;;227/5902-1.html", "google_scholar": "D5Oz9T8AAAAJ;https://scholar.google.com.hk/citations?user=J4akh64AAAAJ;;;M3wrJAwAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com.hk/citations?user=Q9Nv9mcAAAAJ", "or_profile": "~Ningyuan_Li2;~Haihong_E1;~Shi_Li3;~Mingzhi_Sun1;~Tianyu_Yao1;~Meina_Song1;~Yong_Wang14;~Haoran_Luo1", "aff": "Beijing University of Posts and Telecommunications;Beijing University of Post and Telecommunication;;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;;Beijing University of Posts and Telecommunications", "aff_domain": "bupt.edu.cn;bupt.edu.cn;;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;;bupt.edu.cn", "position": "MS student;Full Professor;;PhD student;MS student;Full Professor;;PhD student", "bibtex": "@inproceedings{\nli2023trrules,\ntitle={{TR}-Rules: Rule-based Model for Link Forecasting on Temporal Knowledge Graph Considering Temporal Redundancy},\nauthor={Ningyuan Li and Haihong E and Shi Li and Mingzhi Sun and Tianyu Yao and Meina Song and Yong Wang and Haoran Luo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=VyIe1iVHZ4}\n}", "github": "", "project": "", "reviewers": "FUMh;cXsZ;uQVS", "site": "https://openreview.net/forum?id=VyIe1iVHZ4", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "3;2;3", "reproducibility": "5;3;3", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0003-4981-8268;;;;;0000-0001-6626-9932;;0000-0003-2727-0361", "linkedin": "https://www.linkedin.cn/incareer/in/%E6%B3%9E%E5%8E%9F-%E6%9D%8E-18950823a;;li-shi-422203139/;;;;https://www.linkedin.cn/incareer/in/ACoAAEQ6N-ABxbGj0JwHfH_CSA7ik4M3g8KsXt8;haoran-luo-88a96b255/", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications", "aff_unique_dep": "", "aff_unique_url": "http://www.bupt.edu.cn/", "aff_unique_abbr": "BUPT", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Beijing", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "VyjNXY2wgi", "title": "DialCoT Meets PPO: Decomposing and Exploring Reasoning Paths in Smaller Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Chain-of-Thought (CoT) prompting has successfully enhanced the reasoning capabilities of Large Language Models~(LLMs) with at least 100 billion parameters. However, it is ineffective, or even detrimental, to the performance on reasoning tasks in Smaller Language Models (SLMs) with less than 10 billion parameters. In this paper, we propose Dialogue-guided Chain-of-Thought (DialCoT) to improve the reasoning capabilities of SLMs, with the aim of generating intermediate reasoning steps in a dialogue format to guide the model to the final answer. Furthermore, we optimize the model to choose the optimal reasoning path through the Proximal Policy Optimization (PPO) algorithm, further enhancing its reasoning capabilities. Compared to previous methods, our advantages lie in: 1) We transform the process of solving complex reasoning problems into decomposing problems and solving a series of simpler sub-questions, significantly reducing task difficulty and making it more suitable for SLMs. 2) We optimize the model to choose the optimal reasoning path through the PPO algorithm. Comprehensive experiments on four arithmetic reasoning datasets show that our method can achieve significant performance gains over state-of-the-art competitors.", "keywords": "Chain-of-Thought;PPO;Reasoning", "primary_area": "", "supplementary_material": "", "author": "Chengcheng Han;Xiaowei Du;Che Zhang;Yixin Lian;Xiang Li;Ming Gao;Baoyuan Wang", "authorids": "~Chengcheng_Han1;~Xiaowei_Du1;~Che_Zhang1;~Yixin_Lian1;~Xiang_Li24;~Ming_Gao1;~Baoyuan_Wang3", "gender": "M;;M;M;M;M;M", "homepage": ";https://www.researchgate.net/profile/Du-Xiaowei;https://github.com/bammt;;https://lixiang3776.github.io;http://dase.ecnu.edu.cn/mgao/;", "dblp": "195/3644-4;;;;40/1491-67.html;71/4173-1;41/8869", "google_scholar": "kGlQ56YAAAAJ;;;QmAZSkYAAAAJ;JnxxNtsAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.co.jp/citations?user=OWa5rOEAAAAJ", "or_profile": "~Chengcheng_Han1;~Xiaowei_Du1;~Che_Zhang1;~Yixin_Lian1;~Xiang_Li24;~Ming_Gao1;~Baoyuan_Wang3", "aff": "East China Normal University;xiaobing technology company;Xiaobing;;East China Normal University;East China Normal University;Xiaobing.ai", "aff_domain": "ecnu.edu.cn;xiaoice.com;xiaobing.ai;;ecnu.edu.cn;ecnu.edu.cn;xiaobing.ai", "position": "PhD student;Researcher;Intern;;Full Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nhan2023dialcot,\ntitle={DialCoT Meets {PPO}: Decomposing and Exploring Reasoning Paths in Smaller Language Models},\nauthor={Chengcheng Han and Xiaowei Du and Che Zhang and Yixin Lian and Xiang Li and Ming Gao and Baoyuan Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=VyjNXY2wgi}\n}", "github": "", "project": "", "reviewers": "v8cq;orRw;Rgam", "site": "https://openreview.net/forum?id=VyjNXY2wgi", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;4;4", "excitement": "3;4;4", "reproducibility": "5;4;5", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.666666666666667, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-8138-9799;;;;0009-0003-0142-2483;0000-0002-5603-2680;", "linkedin": ";;;;;;", "aff_unique_index": "0;1;2;0;0;3", "aff_unique_norm": "East China Normal University;Xiaobing Technology Company;Xiaobing;Xiaobing.AI", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.ecnu.edu.cn;;;https://www.xiaobing.ai", "aff_unique_abbr": "ECNU;;;Xiaobing.ai", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China;" }, { "id": "W0WeKrnfbX", "title": "A Self-enhancement Multitask Framework for Unsupervised Aspect Category Detection", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Our work addresses the problem of unsupervised Aspect Category Detection using a small set of seed words. Recent works have focused on learning embedding spaces for seed words and sentences to establish similarities between sentences and aspects. However, aspect representations are limited by the quality of initial seed words, and model performances are compromised by noise. To mitigate this limitation, we propose a simple framework that automatically enhances the quality of initial seed words and selects high-quality sentences for training instead of using the entire dataset. Our main concepts are to add a number of seed words to the initial set and to treat the task of noise resolution as a task of augmenting data for a low-resource task. In addition, we jointly train Aspect Category Detection with Aspect Term Extraction and Aspect Term Polarity to further enhance performance. This approach facilitates shared representation learning, allowing Aspect Category Detection to benefit from the additional guidance offered by other tasks. Extensive experiments demonstrate that our framework surpasses strong baselines on standard datasets.", "keywords": "Sentiment Analysis;Aspect Category Detection;unsupervised learning;weakly supervised learning;Aspect-based Sentiment Analysis", "primary_area": "", "supplementary_material": "", "author": "Thi-Nhung Nguyen;Hoang Ngo;Kiem-Hieu Nguyen;Tuan-Dung Cao", "authorids": "~Thi-Nhung_Nguyen1;~Hoang_Ngo1;~Kiem-Hieu_Nguyen1;~Tuan-Dung_Cao1", "gender": "F;;M;", "homepage": "https://nhungnt7.github.io/;;https://users.soict.hust.edu.vn/hieunk/;", "dblp": "305/9765;;93/8371;49/1711.html", "google_scholar": "LvHj1fAAAAAJ;;HvpiYrcAAAAJ;", "or_profile": "~Thi-Nhung_Nguyen1;~Hoang_Ngo1;~Kiem-Hieu_Nguyen1;~Tuan-Dung_Cao1", "aff": "VinAI Research;;Hanoi University of Science and Technology;Hanoi University of Science and Technology", "aff_domain": "vinai.io;;hust.edu.vn;hust.edu.vn", "position": "Researcher;;Lecturer;Associate Professor", "bibtex": "@inproceedings{\nnguyen2023a,\ntitle={A Self-enhancement Multitask Framework for Unsupervised Aspect Category Detection},\nauthor={Thi-Nhung Nguyen and Hoang Ngo and Kiem-Hieu Nguyen and Tuan-Dung Cao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=W0WeKrnfbX}\n}", "github": "", "project": "", "reviewers": "UyNx;9v2c;3mKJ", "site": "https://openreview.net/forum?id=W0WeKrnfbX", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "3;3;3", "reproducibility": "3;4;4", "correctness": "3;3;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;1;1", "aff_unique_norm": "VinAI Research;Hanoi University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.vinai.io/;https://www.hust.edu.vn", "aff_unique_abbr": "VinAI;HUST", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hanoi", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Vietnam" }, { "id": "W1w2eovejY", "title": "Uncertainty-aware Parameter-Efficient Self-training for Semi-supervised Language Understanding", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The recent success of large pre-trained language models (PLMs) heavily hinges on massive labeled data, which typically produces inferior performance in low-resource scenarios. To remedy this dilemma, we study self-training as one of the predominant semi-supervised learning (SSL) approaches, which utilizes large-scale unlabeled data to generate synthetic examples. However, too many noisy labels will hurt the model performance, and the self-training procedure requires multiple training iterations making it more expensive if all the model parameters of the PLM are updated. This paper presents UPET, a novel Uncertainty-aware Parameter-Efficient self-Training framework to effectively and efficiently address the labeled data scarcity issue. Specifically, we incorporate Monte Carlo (MC) dropout in Bayesian neural network (BNN) to perform uncertainty estimation for the teacher model and then judiciously select reliable pseudo-labeled examples based on confidence and certainty. During the student training, we introduce multiple parameter-efficient learning (PEL) paradigms that allow optimizes only a small percentage of parameters. We also propose a novel Easy-Hard Contrastive Tuning to enhance the robustness and generalization. Extensive experiments over multiple downstream tasks demonstrate that UPET achieves a substantial improvement in terms of performance and efficiency. Our codes and data are released at https: //github.com/wjn1996/UPET.", "keywords": "Self-Training;Uncertainty Estimation;Pre-trained Language Models;Parameter-Efficient Learning", "primary_area": "", "supplementary_material": "", "author": "Jianing Wang;Qiushi Sun;Nuo Chen;Chengyu Wang;Jun Huang;Ming Gao;Xiang Li", "authorids": "~Jianing_Wang4;~Qiushi_Sun1;~Nuo_Chen4;~Chengyu_Wang1;~Jun_Huang4;~Ming_Gao1;~Xiang_Li24", "gender": "M;;M;M;M;M;M", "homepage": "https://qiushisun.github.io/;https://nuojohnchen.github.io/;https://chywang.github.io/;;http://dase.ecnu.edu.cn/mgao/;https://lixiang3776.github.io;https://github.com/wjn1996", "dblp": "247/8469;135/5622-2.html;135/5147-1;51/5022-7;71/4173-1;40/1491-67.html;", "google_scholar": "QgMkYFAAAAAJ;https://scholar.google.com/citations?hl=zh-CN;_AVfRnQAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;JnxxNtsAAAAJ;ccaimI8AAAAJ", "or_profile": "~Qiushi_Sun1;~Nuo_Chen4;~Chengyu_Wang1;~Jun_Huang4;~Ming_Gao1;~Xiang_Li24;~Jia-ning_Wang1", "aff": "Institute of infocomm research, A*STAR;The Chinese University of Hong Kong, Shenzhen;Alibaba Group;Alibaba Group;East China Normal University;East China Normal University;East China Normal University", "aff_domain": "i2r.a-star.edu.sg;cuhk.edu.cn;alibaba-inc.com;alibaba.com;ecnu.edu.cn;ecnu.edu.cn;ecnu.edu.cn", "position": "Intern;Researcher;Researcher;Researcher;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nwang2023uncertaintyaware,\ntitle={Uncertainty-aware Parameter-Efficient Self-training for Semi-supervised Language Understanding},\nauthor={Jianing Wang and Qiushi Sun and Nuo Chen and Chengyu Wang and Jun Huang and Ming Gao and Xiang Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=W1w2eovejY}\n}", "github": "", "project": "", "reviewers": "rnmD;exqi;a1vW", "site": "https://openreview.net/forum?id=W1w2eovejY", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;4;4", "reproducibility": "3;2;4", "correctness": "2;4;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-5207-818X;0000-0001-6563-1215;;;0000-0002-5603-2680;0009-0003-0142-2483;0000-0001-6006-053X", "linkedin": "qiushi-sun/;;;https://www.linkedin.cn/injobs/in/%E4%BF%8A-%E9%BB%84-b28b6612b;;;", "aff_unique_index": "0;1;2;2;3;3;3", "aff_unique_norm": "Institute of Infocomm Research;Chinese University of Hong Kong;Alibaba Group;East China Normal University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.i2r.a-star.edu.sg;https://www.cuhk.edu.cn;https://www.alibaba.com;http://www.ecnu.edu.cn", "aff_unique_abbr": "I2R;CUHK;Alibaba;ECNU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;1;1;1;1;1;1", "aff_country_unique": "Singapore;China" }, { "id": "W2ka7qsx1j", "title": "A Diffusion Weighted Graph Framework for New Intent Discovery", "track": "main", "status": "Long Main", "tldr": "", "abstract": "New Intent Discovery (NID) aims to recognize both new and known intents from unlabeled data with the aid of limited labeled data containing only known intents. Without considering structure relationships between samples, previous methods generate noisy supervisory signals which cannot strike a balance between quantity and quality, hindering the formation of new intent clusters and effective transfer of the pre-training knowledge. To mitigate this limitation, we propose a novel $\\textit{Diffusion Weighted Graph Framework}$ (DWGF) to capture both semantic similarities and structure relationships inherent in data, enabling more sufficient and reliable supervisory signals. \nSpecifically, for each sample, we diffuse neighborhood relationships along semantic paths guided by the nearest neighbors for multiple hops to characterize its local structure discriminately. Then, we sample its positive keys and weigh them based on semantic similarities and local structures for contrastive learning. During inference, we further propose $\\textit{Graph Smoothing Filter}$ (GSF) to explicitly utilize the structure relationships to filter high-frequency noise embodied in semantically ambiguous samples on the cluster boundary. Extensive experiments show that our method outperforms state-of-the-art models on all evaluation metrics across multiple benchmark datasets. Code and data will be made public.", "keywords": "New Intent Discovery;Contrastive Learning;Graph Structure Learning", "primary_area": "", "supplementary_material": "", "author": "Wenkai Shi;Wenbin An;Feng Tian;Qinghua Zheng;QianYing Wang;Ping Chen", "authorids": "~Wenkai_Shi1;~Wenbin_An1;~Feng_Tian4;~Qinghua_Zheng1;~QianYing_Wang1;~Ping_Chen1", "gender": "M;M;;;F;", "homepage": "https://github.com/yibai-shi;;;http://gr.xjtu.edu.cn/web/qhzheng;https://research.lenovo.com/webapp/view/home.html;http://www.cs.umb.edu/~pchen", "dblp": ";331/2394;;32/1858;86/11012;", "google_scholar": ";https://scholar.google.com.hk/citations?user=BpkQZGgAAAAJ;;;gXgWhfEAAAAJ;", "or_profile": "~Wenkai_Shi1;~Wenbin_An1;~Feng_Tian4;~Qinghua_Zheng1;~QianYing_Wang1;~Ping_Chen1", "aff": "Xi'an Jiaotong University;Xi'an Jiaotong University;;Xi'an Jiaotong University;lenovo group;University of Massachusetts, Boston", "aff_domain": "xjtu.edu.cn;xjtu.edu.cn;;xjtu.edu.cn;lenovo.com;umb.edu", "position": "MS student;PhD student;;Full Professor;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nshi2023a,\ntitle={A Diffusion Weighted Graph Framework for New Intent Discovery},\nauthor={Wenkai Shi and Wenbin An and Feng Tian and Qinghua Zheng and QianYing Wang and Ping Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=W2ka7qsx1j}\n}", "github": "", "project": "", "reviewers": "pSNe;DFjL;CQvg;zbJ8", "site": "https://openreview.net/forum?id=W2ka7qsx1j", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;3;2;3", "excitement": "2;3;4;4", "reproducibility": "4;4;4;4", "correctness": "2;3;3;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.25, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 13, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";;;;qianying-jane-wang-0255231/;", "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Xi'an Jiao Tong University;Lenovo Group;University of Massachusetts Boston", "aff_unique_dep": ";;", "aff_unique_url": "https://www.xjtu.edu.cn;https://www.lenovo.com;https://www.umb.edu", "aff_unique_abbr": "XJTU;Lenovo;UMass Boston", "aff_campus_unique_index": "1", "aff_campus_unique": ";Boston", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;United States" }, { "id": "W4GlqAnXqv", "title": "Frequency Balanced Datasets Lead to Better Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "This paper reports on the experiments aimed to improve our understanding of the role of the amount of data required for training attention-based transformer language models. Specifically, we investigate the impact of reducing the immense amounts of required pre-training data through sampling strategies that identify and reduce high-frequency tokens as different studies have indicated that the existence of very high-frequency tokens in pre-training data might bias learning, causing undesired effects. In this light, we describe our sampling algorithm that iteratively assesses token frequencies and removes sentences that contain still high-frequency tokens, eventually delivering a balanced, linguistically correct dataset. We evaluate the results in terms of model perplexity and fine-tuning linguistic probing tasks, NLP downstream tasks as well as more semantic SuperGlue tasks. The results show that pre-training with the resulting balanced dataset allows reducing up to three times the pre-training data.", "keywords": "Language Models;word frequency;pre-training corpus;low-resource languages", "primary_area": "", "supplementary_material": "", "author": "Rodolfo Joel Zevallos;Mireia Farr\u00fas;N\u00faria Bel", "authorids": "~Rodolfo_Joel_Zevallos1;~Mireia_Farr\u00fas1;~N\u00faria_Bel1", "gender": "M;;F", "homepage": ";http://clic.ub.edu/ca/users/mireia-farr%C3%BAs-cabeceran;https://www.upf.edu/web/nuria-bel", "dblp": ";;01/4474", "google_scholar": "https://scholar.google.com/citations?hl=es;rCvSHQ0AAAAJ;https://scholar.google.es/citations?user=AfK2EpIAAAAJ", "or_profile": "~Rodolfo_Joel_Zevallos1;~Mireia_Farr\u00fas1;~N\u00faria_Bel1", "aff": "Universitat Pompeu Fabra;Universitat de Barcelona;Universitat Pompeu Fabra", "aff_domain": "upf.edu;ub.edu;upf.edu", "position": "PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nzevallos2023frequency,\ntitle={Frequency Balanced Datasets Lead to Better Language Models},\nauthor={Rodolfo Joel Zevallos and Mireia Farr{\\'u}s and N{\\'u}ria Bel},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=W4GlqAnXqv}\n}", "github": "", "project": "", "reviewers": "zg87;VqbC;oSA3;gnCA", "site": "https://openreview.net/forum?id=W4GlqAnXqv", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;4;4;2", "excitement": "3;3;4;3", "reproducibility": "4;5;4;5", "correctness": "3;3;4;4", "rating_avg": 4.0, "confidence_avg": 3.25, "excitement_avg": 3.25, "reproducibility_avg": 4.5, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-0192-7740;0000-0002-7160-9513;0000-0001-9346-7803", "linkedin": "rodolfo-zevallos/;mireia-farr%C3%BAs-9b767b7/;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Universitat Pompeu Fabra;University of Barcelona", "aff_unique_dep": ";", "aff_unique_url": "https://www.upf.edu/;https://www.ub.edu", "aff_unique_abbr": "UPF;UB", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Spain" }, { "id": "W4Vk1ufh7l", "title": "TRIP: Accelerating Document-level Multilingual Pre-training via Triangular Document-level Pre-training on Parallel Data Triplets", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Despite the success of multilingual sequence-to-sequence pre-training, most existing approaches rely on document-level monolingual corpora in many different languages, sentence-level bilingual corpora,\\footnote{In this paper, we use bilingual corpora to denote parallel corpora with bilingual translation pairs in many different language pairs, each consisting of two sentences/documents with the same meaning written in different languages. We use trilingual corpora to denote parallel corpora with trilingual translation pairs in many different language combinations, each consisting of three sentences/documents.} and sometimes synthetic document-level bilingual corpora. This hampers the performance with cross-lingual document-level tasks such as document-level translation. Hence, we propose to mine and leverage document-level trilingual parallel corpora to improve sequence-to-sequence multilingual pre-training. We present \\textbf{Tri}angular Document-level \\textbf{P}re-training (\\textbf{TRIP}) as the first in the field to accelerate the conventional monolingual and bilingual objectives into a trilingual objective with a novel method called Grafting. Experiments show that TRIP achieves several strong state-of-the-art (SOTA) scores on three multilingual document-level machine translation benchmarks and one cross-lingual abstractive summarization benchmark, including consistent improvements by up to 3.11 d-BLEU points and 8.9 ROUGE-L points.", "keywords": "multilingual pre-training; machine translation; cross-lingual summarization", "primary_area": "", "supplementary_material": "", "author": "Hongyuan Lu;Haoyang Huang;Shuming Ma;Dongdong Zhang;Wai Lam;Zhaochuan Gao;Anthony Aue;Arul Menezes;Furu Wei", "authorids": "~Hongyuan_Lu2;~Haoyang_Huang1;~Shuming_Ma1;~Dongdong_Zhang4;~Wai_Lam1;~Zhaochuan_Gao1;~Anthony_Aue1;~Arul_Menezes1;~Furu_Wei1", "gender": "M;M;;M;M;M;M;M;M", "homepage": "https://dblp1.uni-trier.de/pid/139/4326.html;;https://www.microsoft.com/en-us/research/people/shumma/;https://www.microsoft.com/en-us/research/people/dozhang/;http://www.se.cuhk.edu.hk/~textmine;https://www.linkedin.com/in/zhaochuan-gao/;;https://www.linkedin.com/in/arulmenezes;https://www.microsoft.com/en-us/research/people/fuwei/", "dblp": "139/4326;;;02/621-1.html;48/1707;;a/AnthonyAue;89/2869;72/5870", "google_scholar": ";;;w2qu71oAAAAJ;ewA4NAcAAAAJ;;;DnhOg3YAAAAJ;G-V1VpwAAAAJ", "or_profile": "~Hongyuan_Lu2;~Haoyang_Huang1;~Shuming_Ma1;~Dongdong_Zhang4;~Wai_Lam1;~Zhaochuan_Gao1;~Anthony_Aue1;~Arul_Menezes1;~Furu_Wei1", "aff": "Microsoft Corporation;Microsoft Research Asia;Microsoft;Microsoft Research Asia;The Chinese University of Hong Kong;;Microsoft;Microsoft Research;Microsoft Research", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;microsoft.com;cuhk.edu.hk;;microsoft.com;research.microsoft.com;microsoft.com", "position": "Intern;FTE;Researcher;Researcher;Professor;;Researcher;Distinguished Engineer;Distinguished Scientist", "bibtex": "@inproceedings{\nlu2023trip,\ntitle={{TRIP}: Accelerating Document-level Multilingual Pre-training via Triangular Document-level Pre-training on Parallel Data Triplets},\nauthor={Hongyuan Lu and Haoyang Huang and Shuming Ma and Dongdong Zhang and Wai Lam and Zhaochuan Gao and Anthony Aue and Arul Menezes and Furu Wei},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=W4Vk1ufh7l}\n}", "github": "", "project": "", "reviewers": "fb6A;4LSK;WyK4", "site": "https://openreview.net/forum?id=W4Vk1ufh7l", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "3;3;4", "reproducibility": "4;3;3", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;;", "linkedin": "luke-lu-595b68136;%E6%B5%A9%E6%B4%8B-%E9%BB%84-77a59016a/;;;;zhaochuan-gao/;;arulmenezes;", "aff_unique_index": "0;0;0;0;1;0;0;0", "aff_unique_norm": "Microsoft;Chinese University of Hong Kong", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://www.cuhk.edu.hk", "aff_unique_abbr": "Microsoft;CUHK", "aff_campus_unique_index": "1;1;2", "aff_campus_unique": ";Asia;Hong Kong SAR", "aff_country_unique_index": "0;1;0;1;1;0;0;0", "aff_country_unique": "United States;China" }, { "id": "W6ijeWfHFU", "title": "Improving Factual Consistency for Knowledge-Grounded Dialogue Systems via Knowledge Enhancement and Alignment", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Pretrained language models (PLMs) based knowledge-grounded dialogue systems are prone to generate responses that are factually inconsistent with the provided knowledge source. In such inconsistent responses, the dialogue models fail to accurately express the external factual knowledge they rely upon. Inspired by previous work which identified that feedforward networks (FFNs) within Transformers are responsible for factual knowledge expressions, we investigate two methods to efficiently improve the factual expression capability of FFNs by knowledge enhancement and alignment respectively. We first propose K-Dial, which explicitly introduces extended FFNs in Transformers to enhance factual knowledge expressions given the specific patterns of knowledge-grounded dialogue inputs. Additionally, we apply the reinforcement learning for factual consistency (RLFC) method to implicitly adjust FFNs\u2019 expressions in responses by aligning with gold knowledge for the factual consistency preference. To comprehensively assess the factual consistency and dialogue quality of responses, we employ extensive automatic measures and human evaluations including sophisticated fine-grained NLI-based metrics. Experimental results on WoW and CMU\\_DoG datasets demonstrate that our methods efficiently enhance the ability of the FFN module to convey factual knowledge, validating the efficacy of improving factual consistency for knowledge-grounded dialogue systems.", "keywords": "knowledge-grounded dialogue system;factual consistency;knowledge enhancement", "primary_area": "", "supplementary_material": "", "author": "Boyang XUE;Weichao Wang;Hongru WANG;Fei Mi;Rui Wang;Yasheng Wang;Lifeng Shang;Xin Jiang;Qun Liu;Kam-Fai Wong", "authorids": "~Boyang_XUE1;~Weichao_Wang3;~Hongru_WANG1;~Fei_Mi1;~Rui_Wang30;~Yasheng_Wang1;~Lifeng_Shang1;~Xin_Jiang1;~Qun_Liu1;~Kam-Fai_Wong2", "gender": "M;M;M;M;M;M;M;M;M;M", "homepage": "https://amourwaltz.github.io/;;https://rulegreen.github.io/;https://mifei.github.io/;;;;;http://liuquncn.github.io/;http://www.se.cuhk.edu.hk/~kfwong", "dblp": "285/5446.html;;72/1462-3;161/0068;06/2293-92;57/8493;70/4288;42/4142-2;75/4402-1;w/KamFaiWong", "google_scholar": "S0BbF6wAAAAJ;HZnZBdcAAAAJ;s6UtVYUAAAAJ;gX3493QAAAAJ;https://scholar.google.com/citations?view_op=list_works;x-UYeJ4AAAAJ;https://scholar.google.com.hk/citations?user=jMQIjYoAAAAJ;DUfcez0AAAAJ;2HhiGzcAAAAJ;", "or_profile": "~Boyang_XUE1;~Weichao_Wang3;~Hongru_WANG1;~Fei_Mi1;~Rui_Wang30;~Yasheng_Wang1;~Lifeng_Shang1;~Xin_Jiang1;~Qun_Liu1;~Kam-Fai_Wong2", "aff": "Chinese University of Hong Kong, The Chinese University of Hong Kong;Huawei Technologies Ltd.;University of Edinburgh;;Harbin Institute of Technology;;Huawei Technologies Ltd.;Noah\u2019s Ark Lab, Huawei Technologies;Huawei Noah's Ark Lab;The Chinese University of Hong Kong", "aff_domain": "se.cuhk.edu.hk;huawei.com;ed.ac.uk;;hit.edu.cn;;huawei.com;huawei.com;huawei.com;cuhk.edu.hk", "position": "PhD student;Researcher;Visiting Student;;MS student;;Researcher;Principal Researcher;Chief Scientist of Speech and Language Computing;Full Professor", "bibtex": "@inproceedings{\nxue2023improving,\ntitle={Improving Factual Consistency for Knowledge-Grounded Dialogue Systems via Knowledge Enhancement and Alignment},\nauthor={Boyang XUE and Weichao Wang and Hongru WANG and Fei Mi and Rui Wang and Yasheng Wang and Lifeng Shang and Xin Jiang and Qun Liu and Kam-Fai Wong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=W6ijeWfHFU}\n}", "github": "", "project": "", "reviewers": "RZpp;j1xH;BZCd;waXu", "site": "https://openreview.net/forum?id=W6ijeWfHFU", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;3;3", "excitement": "2;4;2;2", "reproducibility": "4;4;3;2", "correctness": "3;4;2;3", "rating_avg": 3.0, "confidence_avg": 3.5, "excitement_avg": 2.5, "reproducibility_avg": 3.25, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 10, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-0731-3977;;0000-0001-5027-0138;;;;;0000-0002-9117-8247;0000-0002-7000-1792;0000-0002-9427-5659", "linkedin": "boyang-xue-506561208/;;;;;;;xin-jiang-9577b76/;qunliu/;", "aff_unique_index": "0;1;2;3;1;1;1;0", "aff_unique_norm": "Chinese University of Hong Kong;Huawei;University of Edinburgh;Harbin Institute of Technology", "aff_unique_dep": ";Huawei Technologies;;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.huawei.com;https://www.ed.ac.uk;http://www.hit.edu.cn/", "aff_unique_abbr": "CUHK;Huawei;Edinburgh;HIT", "aff_campus_unique_index": "0;2;0", "aff_campus_unique": "Hong Kong SAR;;Harbin", "aff_country_unique_index": "0;0;1;0;0;0;0;0", "aff_country_unique": "China;United Kingdom" }, { "id": "W76aMA1x9l", "title": "Detecting Erroneously Recognized Handwritten Byzantine Text", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Handwritten text recognition (HTR) yields textual output that comprises errors, which are considerably more compared to that of recognised printed (OCRed) text. Post-correcting methods can eliminate such errors but may also introduce errors. In this study, we investigate the issues arising from this reality in Byzantine Greek. We investigate the properties of the texts that lead post-correction systems to this adversarial behaviour and we experiment with text classification systems that learn to detect incorrect recognition output. A large masked language model, pre-trained in modern and fine-tuned in Byzantine Greek, achieves an Average Precision score of 95%. The score improves to 97% when using a model that is pre-trained in modern and then in ancient Greek, the two language forms Byzantine Greek combines elements from. A century-based analysis shows that the advantage of the classifier that is further-pre-trained in ancient Greek concerns texts of older centuries. The application of this classifier before a neural post-corrector on HTRed text reduced significantly the post-correction mistakes.", "keywords": "text classification;error detection;handwritten text recognition", "primary_area": "", "supplementary_material": "", "author": "John Pavlopoulos;Vasiliki Kougia;Paraskevi Platanou;Holger Essler", "authorids": "~John_Pavlopoulos1;~Vasiliki_Kougia1;~Paraskevi_Platanou1;~Holger_Essler1", "gender": "M;F;;", "homepage": "https://ipavlopoulos.github.io/;;;", "dblp": "09/269;241/9532;;", "google_scholar": "niKjjdEAAAAJ;rhl4uksAAAAJ;;", "or_profile": "~John_Pavlopoulos1;~Vasiliki_Kougia1;~Paraskevi_Platanou1;~Holger_Essler1", "aff": "Athens University of Economics and Business;Universit\u00e4t Vienna;University of Athens;University of Venice", "aff_domain": "aueb.gr;univie.ac.at;uoa.gr;unive.it", "position": "Researcher;PhD student;MS student;Associate Professor", "bibtex": "@inproceedings{\npavlopoulos2023detecting,\ntitle={Detecting Erroneously Recognized Handwritten Byzantine Text},\nauthor={John Pavlopoulos and Vasiliki Kougia and Paraskevi Platanou and Holger Essler},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=W76aMA1x9l}\n}", "github": "", "project": "", "reviewers": "nm3Z;PgK8;ubKi", "site": "https://openreview.net/forum?id=W76aMA1x9l", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;2", "excitement": "3;3;4", "reproducibility": "5;4;3", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-9188-7425;0000-0002-0172-6917;;0000-0003-4759-7716", "linkedin": "itpavlopoulos/;;;", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Athens University of Economics and Business;University of Vienna;University of Athens;University of Venice", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.aueb.gr;https://univie.ac.at;https://www.uoa.gr;https://www.unive.it", "aff_unique_abbr": "AUEB;UV;UoA;Unive", "aff_campus_unique_index": "0", "aff_campus_unique": "Athens;", "aff_country_unique_index": "0;1;0;2", "aff_country_unique": "Greece;Austria;Italy" }, { "id": "WAhhZcaA3R", "title": "Enhancing Biomedical Lay Summarisation with External Knowledge Graphs", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Previous approaches for automatic lay summarisation are exclusively reliant on the source article that, given it is written for a technical audience (e.g., researchers), is unlikely to explicitly define all technical concepts or state all of the background information that is relevant for a lay audience. We address this issue by augmenting eLife, an existing biomedical lay summarisation dataset, with article-specific knowledge graphs, each containing detailed information on relevant biomedical concepts. Using both automatic and human evaluations, we systematically investigate the effectiveness of three different approaches for incorporating knowledge graphs within lay summarisation models, with each method targeting a distinct area of the encoder-decoder model architecture. \nOur results confirm that integrating graph-based domain knowledge can significantly benefit lay summarisation by substantially increasing the readability of generated text and improving the explanation of technical concepts.", "keywords": "Summarisation;Knowledge Graphs", "primary_area": "", "supplementary_material": "", "author": "Tomas Goldsack;Zhihao Zhang;Chen Tang;Carolina Scarton;Chenghua Lin", "authorids": "~Tomas_Goldsack1;~Zhihao_Zhang3;~Chen_Tang5;~Carolina_Scarton1;~Chenghua_Lin1", "gender": "M;M;;;", "homepage": "https://tgoldsack1.github.io/;;;https://carolscarton.github.io;", "dblp": ";;;23/8672;", "google_scholar": "SpGQaT0AAAAJ;;;e6YOuiQAAAAJ;", "or_profile": "~Tomas_Goldsack1;~Zhihao_Zhang3;~Chen_Tang5;~Carolina_Scarton1;~Chenghua_Lin1", "aff": "University of Sheffield;Beihang University;;University of Sheffield;", "aff_domain": "sheffield.ac.uk;buaa.edu.cn;;sheffield.ac.uk;", "position": "PhD student;PhD student;;Lecturer;", "bibtex": "@inproceedings{\ngoldsack2023enhancing,\ntitle={Enhancing Biomedical Lay Summarisation with External Knowledge Graphs},\nauthor={Tomas Goldsack and Zhihao Zhang and Chen Tang and Carolina Scarton and Chenghua Lin},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=WAhhZcaA3R}\n}", "github": "", "project": "", "reviewers": "cPxM;6F5o;sfG5", "site": "https://openreview.net/forum?id=WAhhZcaA3R", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-2205-8193;0000-0002-8860-0881;;0000-0002-0103-4072;", "linkedin": "tomas-goldsack-729190152/;;;carolina-scarton/;", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Sheffield;Beihang University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sheffield.ac.uk;http://www.buaa.edu.cn/", "aff_unique_abbr": "Sheffield;BUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;China" }, { "id": "WC1jbtEwRS", "title": "Flatness-Aware Prompt Selection Improves Accuracy and Sample Efficiency", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "With growing capabilities of large language models, prompting them has become the dominant way to access them. \nThis has motivated the development of strategies for automatically selecting effective language prompts. \nIn this paper, we introduce **pFlat** (prompt flatness), a new metric to quantify the expected utility of a language prompt. This metric is \ninspired by *flatness* regularization in statistical learning that quantifies the robustness of the model towards its parameter perturbations. We provide theoretical foundations for this metric and its relationship with other prompt selection metrics, providing a comprehensive understanding of existing methods. \nEmpirically, we show that\ncombining **pFlat** with existing metrics\nimproves\nboth performance and sample efficiency. \nOur metric outperforms the previous prompt selection metrics with an average increase of 10% in Pearson correlation across 6 classification benchmarks, and the prompt selected by our metric gains 5% higher accuracy than previous metrics across the benchmarks.", "keywords": "prompt selection;flatness of prompt", "primary_area": "", "supplementary_material": "", "author": "Lingfeng Shen;Weiting Tan;Boyuan Zheng;Daniel Khashabi", "authorids": "~Lingfeng_Shen1;~Weiting_Tan1;~Boyuan_Zheng1;~Daniel_Khashabi2", "gender": "M;M;M;M", "homepage": ";https://steventan0110.github.io/;https://boyuanzheng010.github.io/;http://danielkhashabi.com/", "dblp": "240/5490.html;208/0745;;71/10515", "google_scholar": "PoSTdLAAAAAJ;hD8E4gYAAAAJ;amEL4n8AAAAJ;pK2kQvgAAAAJ", "or_profile": "~Lingfeng_Shen1;~Weiting_Tan1;~Boyuan_Zheng1;~Daniel_Khashabi2", "aff": "Johns Hopkins University;Johns Hopkins University;Johns Hopkins University;Johns Hopkins University", "aff_domain": "jh.edu;jhu.edu;jhu.edu;jhu.edu", "position": "MS student;MS student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nshen2023flatnessaware,\ntitle={Flatness-Aware Prompt Selection Improves Accuracy and Sample Efficiency},\nauthor={Lingfeng Shen and Weiting Tan and Boyuan Zheng and Daniel Khashabi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=WC1jbtEwRS}\n}", "github": "", "project": "", "reviewers": "TSnL;CMk4;wwPW", "site": "https://openreview.net/forum?id=WC1jbtEwRS", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;3", "excitement": "2;4;4", "reproducibility": "3;4;5", "correctness": "3;4;4", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";weiting-steven-tan-30bb4a175/;boyuan-zheng-602238183/;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "WC9yjSosSA", "title": "ESPVR: Entity Spans Position Visual Regions for Multimodal Named Entity Recognition", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Multimodal Named Entity Recognition (MNER) uses visual information to improve the performance of text-only Named Entity Recognition (NER). However, existing methods for acquiring local visual information suffer from certain limitations: (1) using an attention-based method to extract visual regions related to the text from visual regions obtained through convolutional architectures (e.g., ResNet), attention is distracted by the entire image, rather than being fully focused on the visual regions most relevant to the text; (2) using an object detection-based (e.g., Mask R-CNN) method to detect visual object regions related to the text, object detection has a limited range of recognition categories. Moreover, the visual regions obtained by object detection may not correspond to the entities in the text. In summary, the goal of these methods is not to extract the most relevant visual regions for the entities in the text. The visual regions obtained by these methods may be redundant or insufficient for the entities in the text. In this paper, we propose an Entity Spans Position Visual Regions (ESPVR) module to obtain the most relevant visual regions corresponding to the entities in the text. Experiments show that our proposed approach can achieve the SOTA on Twitter-2017 and competitive results on Twitter-2015.", "keywords": "Multimodal named entity recognition;Local visual information;Global visual information", "primary_area": "", "supplementary_material": "", "author": "Xiujiao Li;Guanglu Sun;Xinyu Liu", "authorids": "~Xiujiao_Li1;~Guanglu_Sun1;~Xinyu_Liu11", "gender": "F;;F", "homepage": "https://blog.csdn.net/qq_52371455?spm=1018.2226.3001.5343;http://graduate.hrbust.edu.cn/2023/0327/c4155a85091/page.htm;https://blog.csdn.net/qq_43752906?type=blog", "dblp": ";36/8938;", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;", "or_profile": "~Xiujiao_Li1;~Guanglu_Sun1;~Xinyu_Liu11", "aff": "Harbin University of Science and Technology;Harbin University of Science and Technology;Harbin University of Science and Technology", "aff_domain": "hrbust.edu.cn;hrbust.edu.cn;hrbust.edu.cn", "position": "MS student;Full Professor;PhD student", "bibtex": "@inproceedings{\nli2023espvr,\ntitle={{ESPVR}: Entity Spans Position Visual Regions for Multimodal Named Entity Recognition},\nauthor={Xiujiao Li and Guanglu Sun and Xinyu Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=WC9yjSosSA}\n}", "github": "", "project": "", "reviewers": "Xr7Z;mY78;b5zS;YD3t", "site": "https://openreview.net/forum?id=WC9yjSosSA", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;3;3;2", "excitement": "3;3;4;4", "reproducibility": "3;4;4;4", "correctness": "2;2;3;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.5, "reproducibility_avg": 3.75, "correctness_avg": 2.5, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-2589-1164;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Harbin University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hust.edu.cn", "aff_unique_abbr": "HUST", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Harbin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "WCxfj3PsWb", "title": "Multi-level Adaptive Contrastive Learning for Knowledge Internalization in Dialogue Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Knowledge-grounded dialogue generation aims to mitigate the issue of text degeneration by incorporating external knowledge to supplement the context. However, the model often fails to internalize this information into responses in a human-like manner. Instead, it simply inserts segments of the provided knowledge into generic responses. As a result, the generated responses tend to be tedious, incoherent, and in lack of interactivity which means the degeneration problem is still unsolved. In this work, we first find that such copying-style degeneration is primarily due to the weak likelihood objective, which allows the model to \"cheat\" the objective by merely duplicating knowledge segments in a superficial pattern matching based on overlap. To overcome this challenge, we then propose a Multi-level Adaptive Contrastive Learning (MACL) framework that dynamically samples negative examples and subsequently penalizes degeneration behaviors at both the token-level and sequence-level. Extensive experiments on the WoW dataset demonstrate the effectiveness of our approach across various pre-trained models and decoding strategies.", "keywords": "knowledge-grounded dialogue generation;contrastive learning;text degeneration;pre-trained language model", "primary_area": "", "supplementary_material": "", "author": "Chenxu Yang;Zheng Lin;Lanrui Wang;Chong Tian;Liang Pang;Jiangnan Li;Qirong Ho;Yanan Cao;Weiping Wang", "authorids": "~Chenxu_Yang1;~Zheng_Lin5;~Lanrui_Wang1;~Chong_Tian2;~Liang_Pang1;~Jiangnan_Li2;~Qirong_Ho1;~Yanan_Cao1;~Weiping_Wang4", "gender": "M;M;M;M;M;;F;M;F", "homepage": "https://iie-ycx.github.io/;https://github.com/wanglanrui737;https://github.com/RefrainTC;https://pl8787.github.io/;;;;https://teacher.ucas.ac.cn/~0012246;http://people.ucas.edu.cn/~linzheng", "dblp": "316/8012.html;331/0849;;37/11078;;13/7590;97/5152-1;72/4134-5.html;51/3740-1.html", "google_scholar": "oPaIw-gAAAAJ;btMITekAAAAJ;;1dgQHBkAAAAJ;https://scholar.google.com/citations?hl=zh-CN;tR3AZbwAAAAJ;;zH_wmdwAAAAJ;", "or_profile": "~Chenxu_Yang1;~Lanrui_Wang1;~Chong_Tian2;~Liang_Pang1;~Jiangnan_Li2;~Qirong_Ho1;~Yanan_Cao1;~Weiping_Wang4;~zheng_Lin4", "aff": "Institute of Information Enginering, Chinese Academy of Sciences;\tInstitute of Information Engineering, Chinese Academy of Sciences;Mohamed bin Zayed University of Artificial Intelligence;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Information Engineering, Chinese Academy of Sciences;Petuum, Inc.;Institute of Information Engineering, Chinese Academy of Sciences;IIE;Institute of Information Engineering, Chinese Academy of Sciences", "aff_domain": "iie.ac.cn;iie.ac.cn;mbzuai.ac.ae;ict.ac.cn;iie.ac.cn;petuum.com;iie.ac.cn;iie.ac.cn;iie.ac.cn", "position": "PhD student;MS student;MS student;Associate Professor;PhD student;CTO;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nyang2023multilevel,\ntitle={Multi-level Adaptive Contrastive Learning for Knowledge Internalization in Dialogue Generation},\nauthor={Chenxu Yang and Zheng Lin and Lanrui Wang and Chong Tian and Liang Pang and Jiangnan Li and Qirong Ho and Yanan Cao and Weiping Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=WCxfj3PsWb}\n}", "github": "", "project": "", "reviewers": "pQyx;XHDH;Q1NT", "site": "https://openreview.net/forum?id=WCxfj3PsWb", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;4;3", "reproducibility": "4;3;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-1161-8546;0000-0001-7943-8942;;0000-0003-3534-1094;0000-0002-8618-4992;0000-0002-8432-1658", "linkedin": ";;;;;;;;", "aff_unique_index": "0;0;1;0;0;2;0;3;0", "aff_unique_norm": "Chinese Academy of Sciences;Mohamed bin Zayed University of Artificial Intelligence;Petuum, Inc.;Institute of Industrial Engineers", "aff_unique_dep": "Institute of Information Engineering;;;", "aff_unique_url": "http://www.cas.cn;https://mbzuai.ac.ae;https://www.petuum.com;https://www.iie.org", "aff_unique_abbr": "CAS;MBZUAI;Petuum;IIE", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;2;0;2;0", "aff_country_unique": "China;United Arab Emirates;United States" }, { "id": "WEHwc4hSQR", "title": "GD-COMET: A Geo-Diverse Commonsense Inference Model", "track": "main", "status": "Short Main", "tldr": "", "abstract": "With the increasing integration of AI into everyday life, it's becoming crucial to design AI systems to serve users from diverse backgrounds by making them culturally aware. In this paper, we present GD-COMET, a geo-diverse version of the COMET commonsense inference model. GD-COMET goes beyond Western commonsense knowledge and is capable of generating inferences pertaining to a broad range of cultures. We demonstrate the effectiveness of GD-COMET through a comprehensive human evaluation across 5 diverse cultures, as well as extrinsic evaluation on a geo-diverse task. The evaluation shows that GD-COMET captures and generates culturally nuanced commonsense knowledge, demonstrating its potential to benefit NLP applications across the board and contribute to making NLP more inclusive.", "keywords": "commonsense reasoning;culture-aware NLP;geo-diverse applications", "primary_area": "", "supplementary_material": "", "author": "Mehar Bhatia;Vered Shwartz", "authorids": "~Mehar_Bhatia2;~Vered_Shwartz1", "gender": "F;F", "homepage": "https://meharbhatia.github.io/;https://www.cs.ubc.ca/~vshwartz/", "dblp": ";166/2038", "google_scholar": "F1efoLgAAAAJ;bbe4ResAAAAJ", "or_profile": "~Mehar_Bhatia2;~Vered_Shwartz1", "aff": "University of British Columbia;University of British Columbia", "aff_domain": "ubc.ca;ubc.ca", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\nbhatia2023gdcomet,\ntitle={{GD}-{COMET}: A Geo-Diverse Commonsense Inference Model},\nauthor={Mehar Bhatia and Vered Shwartz},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=WEHwc4hSQR}\n}", "github": "", "project": "", "reviewers": "3GPm;J9XN;La5x", "site": "https://openreview.net/forum?id=WEHwc4hSQR", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;5", "excitement": "3;4;4", "reproducibility": "4;5;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": "meharbhatia23/;vered-shwartz-99548633/", "aff_unique_index": "0;0", "aff_unique_norm": "University of British Columbia", "aff_unique_dep": "", "aff_unique_url": "https://www.ubc.ca", "aff_unique_abbr": "UBC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "id": "WLIFsPSq3t", "title": "Beyond Layout Embedding: Layout Attention with Gaussian Biases for Structured Document Understanding", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Effectively encoding layout information is a central problem in structured document understanding. Most existing methods rely heavily on millions of trainable parameters to learn the layout features of each word from Cartesian coordinates. However, two unresolved questions remain: (1) Is the Cartesian coordinate system the optimal choice for layout modeling? (2) Are massive learnable parameters truly necessary for layout representation? In this paper, we address these questions by proposing Layout Attention with Gaussian Biases (LAGaBi): Firstly, we find that polar coordinates provide a superior choice over Cartesian coordinates as they offer a measurement of both distance and angle between word pairs, capturing relative positions more effectively. Furthermore, by feeding the distances and angles into 2-D Gaussian kernels, we model intuitive inductive layout biases, i.e., the words closer within a document should receive more attention, which will act as the attention biases to revise the textual attention distribution. LAGaBi is model-agnostic and language-independent, which can be applied to a range of transformer-based models, such as the text pre-training models from the BERT series and the LayoutLM series that incorporate visual features. Experimental results on three widely used benchmarks demonstrate that, despite reducing the number of layout parameters from millions to 48, LAGaBi achieves competitive or even superior performance.", "keywords": "Structured Document Understanding;Layout Attention;Spatial Relationships;Polar Coordinates", "primary_area": "", "supplementary_material": "", "author": "Xi Zhu;Xue Han;Shuyuan Peng;Shuo Lei;Chao Deng;Junlan Feng", "authorids": "~Xi_Zhu3;~Xue_Han3;~Shuyuan_Peng1;~Shuo_Lei2;~Chao_Deng4;~Junlan_Feng3", "gender": "F;F;M;M;M;F", "homepage": "https://scholar.google.com/citations?user=C4E0NZkAAAAJ&hl=zh-CN;;;https://scholar.google.com/;;", "dblp": "15/6877.html;;https://dblp.uni-trier.de/pid/300/8424;11/1111.html;;36/3948", "google_scholar": "C4E0NZkAAAAJ;Rg4xqCgAAAAJ;;https://scholar.google.com/citations;https://scholar.google.com/citations?hl=en;https://scholar.google.es/citations?user=rBjPtmQAAAAJ", "or_profile": "~Xi_Zhu3;~Xue_Han3;~Shuyuan_Peng1;~Shuo_Lei2;~Chao_Deng4;~Junlan_Feng3", "aff": "China Mobile Research Institute;China Mobile Communications Company Limited Research Institute;China Mobile Research Institute;China Mobile Research Institute;China Mobile Research Institute;China Mobile", "aff_domain": "chinamobile.com;chinamobile.com;chinamobile.com;chinamobile.com;jiutian.10086.cn;ioa.ac.cn", "position": "Researcher;Researcher;Researcher;Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nzhu2023beyond,\ntitle={Beyond Layout Embedding: Layout Attention with Gaussian Biases for Structured Document Understanding},\nauthor={Xi Zhu and Xue Han and Shuyuan Peng and Shuo Lei and Chao Deng and Junlan Feng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=WLIFsPSq3t}\n}", "github": "", "project": "", "reviewers": "NEpE;zPEC;MynQ", "site": "https://openreview.net/forum?id=WLIFsPSq3t", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;5", "excitement": "3;3;3", "reproducibility": "3;4;2", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0001-0222-066X;;;0000-0000-0000-0000;0000-0003-4449-5247;0000-0001-5292-2945", "linkedin": "https://www.linkedin.com/;;;https://www.linkedin.com/;https://www.linkedin.cn/incareer/in/ACoAAB5sppAB_Da2tlvgSyM7NFTWl6d1DhZZe1o;junlan-feng-8968ba11/", "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "China Mobile;China Mobile Communications Group Co., Ltd.", "aff_unique_dep": "Research Institute;Research Institute", "aff_unique_url": "https://www.chinamobile.com/;http://www.chinamobileltd.com/", "aff_unique_abbr": "CMRI;CMCC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "WLV8cm80DB", "title": "$\\textit{Swap and Predict}$ -- Predicting the Semantic Changes in Words across Corpora by Context Swapping", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Meanings of words change over time and across domains.\nDetecting the semantic changes of words is an important task for various NLP applications that must make time-sensitive predictions.\nWe consider the problem of predicting whether a given target word, $w$, changes its meaning between two different text corpora, $\\mathcal{C}_1$ and $\\mathcal{C}_2$.\nFor this purpose, we propose $\\textit{Swapping-based Semantic Change Detection}$ (SSCD), an unsupervised method that randomly swaps contexts between $\\mathcal{C}_1$ and $\\mathcal{C}_2$ where $w$ occurs.\nWe then look at the distribution of contextualised word embeddings of $w$, obtained from a pretrained masked language model (MLM), representing the meaning of $w$ in its occurrence contexts in $\\mathcal{C}_1$ and $\\mathcal{C}_2$.\nIntuitively, if the meaning of $w$ does not change between $\\mathcal{C}_1$ and $\\mathcal{C}_2$, we would expect the distributions of contextualised word embeddings of $w$ to remain the same before and after this random swapping process.\nDespite its simplicity, we demonstrate that even by using pretrained MLMs without any fine-tuning, our proposed context swapping method accurately predicts the semantic changes of words in four languages (English, German, Swedish, and Latin) and across different time spans (over 50 years and about five years).\nMoreover, our method achieves significant performance improvements compared to strong baselines for the English semantic change prediction task. Source code is available at https://github.com/a1da4/svp-swap .", "keywords": "Computational Semantics;Contextualised Word Embeddings;Semantic Change Detection", "primary_area": "", "supplementary_material": "", "author": "Taichi Aida;Danushka Bollegala", "authorids": "~Taichi_Aida1;~Danushka_Bollegala1", "gender": "M;M", "homepage": "https://sites.google.com/view/a1da;https://danushka.net", "dblp": "268/1886.html;https://dblp.uni-trier.de/pers/hd/b/Bollegala:Danushka", "google_scholar": "https://scholar.google.co.jp/citations?user=YumEhloAAAAJ;https://scholar.google.co.uk/citations?user=kLqCYLMAAAAJ", "or_profile": "~Taichi_Aida1;~Danushka_Bollegala1", "aff": "Tokyo Metropolitan University;University of Liverpool", "aff_domain": "tmu.ac.jp;liverpool.ac.uk", "position": "PhD student;Professor", "bibtex": "@inproceedings{\naida2023textitswap,\ntitle={\\${\\textbackslash}textit\\{Swap and Predict\\}\\$ -- Predicting the Semantic Changes in Words across Corpora by Context Swapping},\nauthor={Taichi Aida and Danushka Bollegala},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=WLV8cm80DB}\n}", "github": "", "project": "", "reviewers": "SSFX;Wzo4;UWZX;8L1o", "site": "https://openreview.net/forum?id=WLV8cm80DB", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;3;5;4", "excitement": "2;3;2;3", "reproducibility": "4;4;4;4", "correctness": "3;4;3;2", "rating_avg": 3.0, "confidence_avg": 3.75, "excitement_avg": 2.5, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 13, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-4476-7003", "linkedin": ";danushka-bollegala-6a636516/?originalSubdomain=uk", "aff_unique_index": "0;1", "aff_unique_norm": "Tokyo Metropolitan University;University of Liverpool", "aff_unique_dep": ";", "aff_unique_url": "https://www.tmuc.ac.jp;https://www.liverpool.ac.uk", "aff_unique_abbr": "TMU;Liv Uni", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Japan;United Kingdom" }, { "id": "WLZX3et7VT", "title": "Active Retrieval Augmented Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Despite the remarkable ability of large language models (LMs) to comprehend and generate language, they have a tendency to hallucinate and create factually inaccurate output. Augmenting LMs by retrieving information from external knowledge resources is one promising solution. Most existing retrieval augmented LMs employ a retrieve-and-generate setup that only retrieves information once based on the input. This is limiting, however, in more general scenarios involving generation of long texts, where continually gathering information throughout generation is essential. In this work, we provide a generalized view of active retrieval augmented generation, methods that actively decide when and what to retrieve across the course of the generation. We propose Forward-Looking Active REtrieval augmented generation (FLARE), a generic method which iteratively uses a prediction of the upcoming sentence to anticipate future content, which is then utilized as a query to retrieve relevant documents to regenerate the sentence if it contains low-confidence tokens. We test FLARE along with baselines comprehensively over 4 long-form knowledge-intensive generation tasks/datasets. FLARE achieves superior or competitive performance on all tasks, demonstrating the effectiveness of our method.", "keywords": "Retrieval Augmented Language Model;Long-form Generation;Active Retrieval", "primary_area": "", "supplementary_material": "", "author": "Zhengbao Jiang;Frank F. Xu;Luyu Gao;Zhiqing Sun;Qian Liu;Jane Dwivedi-Yu;Yiming Yang;Jamie Callan;Graham Neubig", "authorids": "~Zhengbao_Jiang2;~Frank_F._Xu1;~Luyu_Gao1;~Zhiqing_Sun1;~Qian_Liu2;~Jane_Dwivedi-Yu1;~Yiming_Yang1;~Jamie_Callan1;~Graham_Neubig1", "gender": "M;M;M;M;M;F;;M;F", "homepage": ";https://frankxfz.me/;https://luyug.github.io/;https://www.cs.cmu.edu/~zhiqings/;http://siviltaram.github.io/;http://www.cs.cmu.edu/~yiming/;http://www.cs.cmu.edu/~callan/;http://phontron.com;https://janedwivedi.github.io/", "dblp": ";190/4519;;211/7692;;25/1666;c/JamesPCallan;03/8155;215/3352", "google_scholar": ";1hXyfIkAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;bcbeUo0AAAAJ;MlZq4XwAAAAJ;https://scholar.google.com/citations?hl=en;wlosgkoAAAAJ;ev8Ilx0AAAAJ", "or_profile": "~Zhengbao_Jiang2;~Frank_F._Xu1;~Luyu_Gao1;~Zhiqing_Sun1;~Qian_Liu2;~Yiming_Yang1;~Jamie_Callan1;~Graham_Neubig1;~Jane_Yu1", "aff": "School of Computer Science, Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Sea AI Lab;School of Computer Science, Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Meta AI ", "aff_domain": "cs.cmu.edu;cmu.edu;cmu.edu;cs.cmu.edu;sea.com;cs.cmu.edu;cmu.edu;cmu.edu;meta.com", "position": "PhD student;PhD student;PhD student;PhD student;Researcher;Full Professor;Full Professor;Associate Professor;Researcher", "bibtex": "@inproceedings{\njiang2023active,\ntitle={Active Retrieval Augmented Generation},\nauthor={Zhengbao Jiang and Frank F. Xu and Luyu Gao and Zhiqing Sun and Qian Liu and Jane Dwivedi-Yu and Yiming Yang and Jamie Callan and Graham Neubig},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=WLZX3et7VT}\n}", "github": "", "project": "", "reviewers": "6yUm;uvL6;PjRU", "site": "https://openreview.net/forum?id=WLZX3et7VT", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "3;4;4", "reproducibility": "4;4;4", "correctness": "3;3;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0001-8322-607X;;;", "linkedin": ";;;zhiqing-sun-5781b3100/;;yiming-yang-24100924/;;;janeaisleyyu/", "aff_unique_index": "0;0;0;0;1;0;0;0;2", "aff_unique_norm": "Carnegie Mellon University;Sea AI Lab;Meta", "aff_unique_dep": "School of Computer Science;;Meta AI", "aff_unique_url": "https://www.cmu.edu;;https://meta.com", "aff_unique_abbr": "CMU;;Meta", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Pittsburgh;", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States;" }, { "id": "WQR3xpEJRJ", "title": "CTQScorer: Combining Multiple Features for In-context Example Selection for Machine Translation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models have demonstrated the capability to perform on machine translation when the input is prompted with a few examples (in-context learning). \nTranslation quality depends on various features of the selected examples, such as their quality and relevance, but previous work has predominantly focused on individual features in isolation. In this paper, we propose a general framework for combining different features influencing example selection. We learn a regression model, CTQ Scorer (Contextual Translation Quality), that selects examples based on multiple features in order to maximize the translation quality. On multiple language pairs and language models, we show that CTQ Scorer helps significantly outperform random selection as well as strong single-factor baselines reported in the literature. We also see an improvement of over 2.5 COMET points on average with respect to a strong BM25 retrieval-based baseline.", "keywords": "few-shot prompting;machine translation;example selection", "primary_area": "", "supplementary_material": "", "author": "Aswanth Kumar M;Ratish Puduppully;Raj Dabre;Anoop Kunchukuttan", "authorids": "~Aswanth_Kumar_M1;~Ratish_Puduppully1;~Raj_Dabre1;~Anoop_Kunchukuttan1", "gender": "M;M;M;", "homepage": ";https://ratishsp.github.io/;;http://anoopk.in/", "dblp": ";165/0748;127/0168;126/8631", "google_scholar": ";https://scholar.google.co.uk/citations?user=FrB_UMIAAAAJ;https://scholar.google.co.jp/citations?user=x91u618AAAAJ;jnoUuGcAAAAJ", "or_profile": "~Aswanth_Kumar_M1;~Ratish_Puduppully1;~Raj_Dabre1;~Anoop_Kunchukuttan1", "aff": "Indian Institute of Technology, Madras;A*STAR;National Institute of Information and Communications Technology (NICT), National Institute of Advanced Industrial Science and Technology;Microsoft", "aff_domain": "iiitm.ac.in;a-star.edu.sg;nict.go.jp;microsoft.com", "position": "MS student;Researcher;Postdoc;Senior Applied Researcher", "bibtex": "@inproceedings{\nm2023ctqscorer,\ntitle={{CTQS}corer: Combining Multiple Features for In-context Example Selection for Machine Translation},\nauthor={Aswanth Kumar M and Ratish Puduppully and Raj Dabre and Anoop Kunchukuttan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=WQR3xpEJRJ}\n}", "github": "", "project": "", "reviewers": "Tf25;STxx;a97U", "site": "https://openreview.net/forum?id=WQR3xpEJRJ", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "4;4;3", "reproducibility": "3;3;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "aswanth-kumar/;ratishsp/;;anoopkunchukuttan/", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Indian Institute of Technology Madras;Agency for Science, Technology and Research;National Institute of Information and Communications Technology;Microsoft", "aff_unique_dep": ";;;Microsoft Corporation", "aff_unique_url": "https://www.iitm.ac.in;https://www.a-star.edu.sg;https://www.nict.go.jp/;https://www.microsoft.com", "aff_unique_abbr": "IIT Madras;A*STAR;NICT;Microsoft", "aff_campus_unique_index": "0", "aff_campus_unique": "Madras;", "aff_country_unique_index": "0;1;2;3", "aff_country_unique": "India;Singapore;Japan;United States" }, { "id": "WQamRhhbsf", "title": "Impact of Co-occurrence on Factual Knowledge of Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models (LLMs) often make factually incorrect responses despite their success in various applications. In this paper, we hypothesize that relying heavily on simple co-occurrence statistics of the pre-training corpora is one of the main factors that cause factual errors. Our results reveal that LLMs are vulnerable to the co-occurrence bias, defined as preferring frequently co-occurred words over the correct answer. Consequently, LLMs struggle to recall facts whose subject and object rarely co-occur in the pre-training dataset although they are seen during finetuning. We show that co-occurrence bias remains despite scaling up model sizes or finetuning. Therefore, we suggest finetuning on a debiased dataset to mitigate the bias by filtering out biased samples whose subject-object co-occurrence count is high. Although debiased finetuning allows LLMs to memorize rare facts in the training set, it is not effective in recalling rare facts unseen during finetuning. Further research in mitigation will help build reliable language models by preventing potential errors. The code is available at [https://github.com/CheongWoong/impact\\_of\\_cooccurrence](https://github.com/CheongWoong/impact\\_of\\_cooccurrence).", "keywords": "Factual Knowledge;Large Language Models;Co-occurrence;Term Frequency;Data Statistics", "primary_area": "", "supplementary_material": "", "author": "Cheongwoong Kang;Jaesik Choi", "authorids": "~Cheongwoong_Kang1;~Jaesik_Choi1", "gender": "M;M", "homepage": "https://cheongwoong.github.io/;https://sailab.kaist.ac.kr/jaesik", "dblp": "252/4978;13/1402", "google_scholar": "MYiAPWYAAAAJ;RqMLVzUAAAAJ", "or_profile": "~Cheongwoong_Kang1;~Jaesik_Choi1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nkang2023impact,\ntitle={Impact of Co-occurrence on Factual Knowledge of Large Language Models},\nauthor={Cheongwoong Kang and Jaesik Choi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=WQamRhhbsf}\n}", "github": "", "project": "", "reviewers": "2dAt;B1hy;M9gY", "site": "https://openreview.net/forum?id=WQamRhhbsf", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;2;3", "reproducibility": "5;3;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": "cheongwoong-kang/;", "aff_unique_index": "0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "WRYhaSrThy", "title": "Automatic Prompt Optimization with \"Gradient Descent\" and Beam Search", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large Language Models (LLMs) have shown impressive performance as general purpose agents, but their abilities remain highly dependent on prompts which are hand written with onerous trial-and-error effort. We propose a simple and nonparametric solution to this problem, Prompt Optimization with Textual Gradients (ProTeGi), which is inspired by numerical gradient descent to automatically improve prompts, assuming access to training data and an LLM API. The algorithm uses minibatches of data to form natural language \"gradients\" that criticize the current prompt, much like how numerical gradients point in the direction of error ascent. The natural language gradients are then \"propagated\" into the prompt by editing the prompt in the opposite semantic direction of the gradient. These gradient descent steps are guided by a beam search and bandit selection procedure which significantly improves algorithmic efficiency. Preliminary results across three benchmark NLP tasks and the novel problem of LLM jailbreak detection suggest that Automatic Prompt Optimization can outperform prior prompt editing techniques and improve an initial prompt's performance by up to 31%, by using data to rewrite vague task descriptions into more precise annotation instructions.", "keywords": "LLM;Prompt;Prompt Optimization;Prompt Engineering;Optimization;Gradient Descent", "primary_area": "", "supplementary_material": "", "author": "Reid Pryzant;Dan Iter;Jerry Li;Yin Tat Lee;Chenguang Zhu;Michael Zeng", "authorids": "~Reid_Pryzant1;~Dan_Iter1;~Jerry_Li1;~Yin_Tat_Lee1;~Chenguang_Zhu1;~Michael_Zeng1", "gender": ";Not Specified;M;;M;M", "homepage": ";https://daniter-cu.github.io/;https://jerryzli.github.io/;;;https://www.microsoft.com/en-us/research/people/nzeng/", "dblp": "205/3986;63/10689.html;;;48/7536-1.html;232/1866-1.html", "google_scholar": "FkufKDgAAAAJ;bg8RrSkAAAAJ;4zybTq4AAAAJ;;1b2kKWoAAAAJ;", "or_profile": "~Reid_Pryzant1;~Dan_Iter1;~Jerry_Li1;~Yin_Tat_Lee1;~Chenguang_Zhu1;~Michael_Zeng1", "aff": "Microsoft Research;Microsoft;Microsoft;;Zoom;Microsoft", "aff_domain": "research.microsoft.com;microsoft.com;microsoft.com;;zoom.us;microsoft.com", "position": "Researcher;Researcher;Senior Researcher;;Principal Researcher;Vice President Research Manager", "bibtex": "@inproceedings{\npryzant2023automatic,\ntitle={Automatic Prompt Optimization with ''Gradient Descent'' and Beam Search},\nauthor={Reid Pryzant and Dan Iter and Jerry Li and Yin Tat Lee and Chenguang Zhu and Michael Zeng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=WRYhaSrThy}\n}", "github": "", "project": "", "reviewers": "cYMM;jdT4;2zD3", "site": "https://openreview.net/forum?id=WRYhaSrThy", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;4;4", "excitement": "4;4;3", "reproducibility": "3;4;3", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";daniter;;;;michaelnanshanzeng/", "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Microsoft;Zoom Video Communications Inc.", "aff_unique_dep": "Microsoft Research;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://zoom.us", "aff_unique_abbr": "MSR;Zoom", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "WVs1qhIUms", "title": "Empirical Study of Zero-Shot NER with ChatGPT", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) exhibited powerful capability in various natural language processing tasks. This work focuses on exploring LLM performance on zero-shot information extraction, with a focus on the ChatGPT and named entity recognition (NER) task. Inspired by the remarkable reasoning capability of LLM on symbolic and arithmetic reasoning, we adapt the prevalent reasoning methods to NER and propose reasoning strategies tailored for NER. First, we explore a decomposed question-answering paradigm by breaking down the NER task into simpler subproblems by labels. Second, we propose syntactic augmentation to stimulate the model's intermediate thinking in two ways: syntactic prompting, which encourages the model to analyze the syntactic structure itself, and tool augmentation, which provides the model with the syntactic information generated by a parsing tool. Besides, we adapt self-consistency to NER by proposing a two-stage majority voting strategy, which first votes for the most consistent mentions, then the most consistent types. The proposed methods achieve remarkable improvements for zero-shot NER across seven benchmarks, including Chinese and English datasets, and on both domain-specific and general-domain scenarios. In addition, we present a comprehensive analysis of the error types with suggestions for optimization directions. We also verify the effectiveness of the proposed methods on the few-shot setting and other LLMs.", "keywords": "ChatGPT;Named Entity Recognition;Zero-Shot;Reasoning;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "Tingyu Xie;Qi Li;Jian Zhang;Yan Zhang;Zuozhu Liu;Hongwei Wang", "authorids": "~Tingyu_Xie1;~Qi_Li22;~Jian_Zhang39;~Yan_Zhang12;~Zuozhu_Liu1;~Hongwei_Wang6", "gender": "F;;M;M;M;M", "homepage": ";;https://scholar.google.com/citations?user=I-PBHTgAAAAJ&hl=en;;https://person.zju.edu.cn/en/lzz;https://zjui.intl.zju.edu.cn/en/node/778", "dblp": "249/9667;;;;173/9297;13/5641-1", "google_scholar": "7hUBBjEAAAAJ;;I-PBHTgAAAAJ;-oIMVnUAAAAJ;h602wLIAAAAJ;", "or_profile": "~Tingyu_Xie1;~Qi_Li22;~Jian_Zhang39;~Yan_Zhang12;~Zuozhu_Liu1;~Hongwei_Wang6", "aff": "Zhejiang University;;Zhejiang University;National University of Singapore;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;;zju.edu.cn;nus.edu.sg;zju.edu.cn;zju.edu.cn", "position": "PhD student;;PhD student;Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nxie2023empirical,\ntitle={Empirical Study of Zero-Shot {NER} with Chat{GPT}},\nauthor={Tingyu Xie and Qi Li and Jian Zhang and Yan Zhang and Zuozhu Liu and Hongwei Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=WVs1qhIUms}\n}", "github": "", "project": "", "reviewers": "5LaW;B3Rk;5Z6K", "site": "https://openreview.net/forum?id=WVs1qhIUms", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;3;4", "reproducibility": "4;4;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-6342-0243;;0000-0002-7816-502X;", "linkedin": ";;;zhang-yan-1001940/;;", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Zhejiang University;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.nus.edu.sg", "aff_unique_abbr": "ZJU;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;Singapore" }, { "id": "Weszm4zCzP", "title": "M$^3$Seg: A Maximum-Minimum Mutual Information Paradigm for Unsupervised Topic Segmentation in ASR Transcripts", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Topic segmentation aims to detect topic boundaries and split automatic speech recognition transcriptions (e.g., meeting transcripts) into segments that are bounded by thematic meanings. \nIn this work, we propose M$^3$Seg, a novel Maximum-Minimum Mutual information paradigm for linear topic segmentation without using any parallel data.\nSpecifically, by employing sentence representations provided by pre-trained language models, M$^3$Seg first learns a region-based segment encoder based on the maximization of mutual information between the global segment representation and the local contextual sentence representation.\nSecondly, an edge-based boundary detection module aims to segment the whole by topics based on minimizing the mutual information between different segments.\nExperiment results on two public datasets demonstrate the effectiveness of M$^3$Seg, which outperform the state-of-the-art methods by a significant (18\\%\u201337\\% improvement) margin.", "keywords": "Unsupervised topic segmentation;mutual information maximization/minimization;automatic-speech-recognition (ASR) transcripts structuring", "primary_area": "", "supplementary_material": "", "author": "Ke Wang;Xiutian Zhao;Yanghui Li;Wei Peng", "authorids": "~Ke_Wang2;~Xiutian_Zhao1;~Yanghui_Li1;~Wei_Peng6", "gender": "M;M;M;M", "homepage": ";https://xiutian.github.io;;https://www.rmit.edu.au/profiles/p/wei-peng3", "dblp": "https://dblp.uni-trier.de/pid/181/2613.html;362/7856;;", "google_scholar": "https://scholar.google.com/citations?hl=en;HfOmKncAAAAJ;;", "or_profile": "~Ke_Wang2;~Xiutian_Zhao1;~Yanghui_Li1;~Wei_Peng6", "aff": "Huawei Technologies Ltd.;;;Huawei Technologies Ltd.", "aff_domain": "huawei.com;;;huawei.com", "position": "Researcher;;;Principal Researcher", "bibtex": "@inproceedings{\nwang2023mseg,\ntitle={M\\${\\textasciicircum}3\\$Seg: A Maximum-Minimum Mutual Information Paradigm for Unsupervised Topic Segmentation in {ASR} Transcripts},\nauthor={Ke Wang and Xiutian Zhao and Yanghui Li and Wei Peng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Weszm4zCzP}\n}", "github": "", "project": "", "reviewers": "bouu;t5HK;ZX6U;mmRA", "site": "https://openreview.net/forum?id=Weszm4zCzP", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "3;1;3;5", "excitement": "4;3;3;4", "reproducibility": "4;3;3;3", "correctness": "4;4;3;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 3.5, "reproducibility_avg": 3.25, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-2300-0743;;0000-0002-5262-7775;", "linkedin": ";;;wei-peng-phd-in-ai-4515ba22/?originalSubdomain=au", "aff_unique_index": "0;0", "aff_unique_norm": "Huawei", "aff_unique_dep": "Huawei Technologies", "aff_unique_url": "https://www.huawei.com", "aff_unique_abbr": "Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "WiKLXsWzBy", "title": "Don\u2019t Trust ChatGPT when your Question is not in English: A Study of Multilingual Abilities and Types of LLMs", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) have demonstrated exceptional natural language understanding abilities, and have excelled in a variety of natural language processing (NLP) tasks. Despite the fact that most LLMs are trained predominantly on English, multiple studies have demonstrated their capabilities in a variety of languages. However, fundamental questions persist regarding how LLMs acquire their multilingual abilities and how performance varies across different languages. These inquiries are crucial for the study of LLMs since users and researchers often come from diverse language backgrounds, potentially influencing how they use LLMs and interpret their output. In this work, we propose a systematic way of qualitatively and quantitatively evaluating the multilingual capabilities of LLMs. We investigate the phenomenon of cross-language generalization in LLMs, wherein limited multilingual training data leads to advanced multilingual capabilities. To accomplish this, we employ a novel prompt back-translation method. The results demonstrate that LLMs, such as GPT, can effectively transfer learned knowledge across different languages, yielding relatively consistent results in translation-equivariant tasks, in which the correct output does not depend on the language of the input. However, LLMs struggle to provide accurate results in translation-variant tasks, which lack this property, requiring careful user judgment to evaluate the answers.", "keywords": "multilingual;LLM;GPT", "primary_area": "", "supplementary_material": "", "author": "Xiang Zhang;Senyu Li;Bradley Hauer;Ning Shi;Grzegorz Kondrak", "authorids": "~Xiang_Zhang17;~Senyu_Li1;~Bradley_Hauer1;~Ning_Shi1;~Grzegorz_Kondrak1", "gender": ";M;;M;Not Specified", "homepage": ";;https://webdocs.cs.ualberta.ca/~bmhauer/;https://sites.google.com/ualberta.ca/shining;http://webdocs.cs.ualberta.ca/~kondrak/", "dblp": ";216/6935;127/6967;67/3378;40/3774", "google_scholar": ";9MzYLOcAAAAJ;https://scholar.google.ca/citations?user=F3ZNBV4AAAAJ;qaqVNMQAAAAJ;https://scholar.google.com.tw/citations?user=TV3Tl_sAAAAJ", "or_profile": "~Xiang_Zhang17;~Senyu_Li1;~Bradley_Hauer1;~Ning_Shi1;~Grzegorz_Kondrak1", "aff": ";University of Alberta;University of Alberta;University of Alberta;University of Alberta", "aff_domain": ";ualberta.ca;ualberta.ca;ualberta.ca;ualberta.ca", "position": ";MS student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nzhang2023dont,\ntitle={Don{\\textquoteright}t Trust Chat{GPT} when your Question is not in English: A Study of Multilingual Abilities and Types of {LLM}s},\nauthor={Xiang Zhang and Senyu Li and Bradley Hauer and Ning Shi and Grzegorz Kondrak},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=WiKLXsWzBy}\n}", "github": "", "project": "", "reviewers": "2vNk;d1zp;kYZb", "site": "https://openreview.net/forum?id=WiKLXsWzBy", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;3", "excitement": "4;4;4", "reproducibility": "4;5;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-9479-6999", "linkedin": ";;;stshining/;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Alberta", "aff_unique_dep": "", "aff_unique_url": "https://www.ualberta.ca", "aff_unique_abbr": "UAlberta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "id": "WkpTWlXGHC", "title": "Scalable-DSC: A Structural Template Prompt Approach to Scalable Dialogue State Correction", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Dialogue state error correction has recently been proposed to correct wrong slot values in predicted dialogue states, thereby mitigating the error propagation problem for dialogue state tracking (DST). These approaches, though effective, are heavily intertwined with specific DST models, limiting their applicability to other DST models. To solve this problem, we propose Scalable Dialogue State Correction (Scalable-DSC), which can correct wrong slot values in the dialogue state predicted by any DST model. Specifically, we propose a Structural Template Prompt (STP) that converts predicted dialogue state from any DST models into a standardized natural language sequence as a part of the historical context, associates them with dialogue history information, and generates a corrected dialogue state sequence based on predefined template options. We further enhance Scalable-DSC by introducing two training strategies. The first employs a predictive state simulator to simulate the predicted dialogue states as the training data to enhance the generalization ability of the model. The second involves using the dialogue state predicted by DST as the training data, aiming at mitigating the inconsistent error type distribution between the training and inference. Experiments confirm that our model achieves state-of-the-art results on MultiWOZ 2.0-2.4.", "keywords": "Dialogue state tracking;Error propagation;Dialogue state correction", "primary_area": "", "supplementary_material": "", "author": "Haoxiang Su;Hongyan Xie;Hao Huang;Shuangyong Song;Ruiyu Fang;Xiaomeng Huang;Sijie Feng", "authorids": "~Haoxiang_Su1;~Hongyan_Xie1;~Hao_Huang9;~Shuangyong_Song2;~Ruiyu_Fang1;~Xiaomeng_Huang2;~Sijie_Feng1", "gender": "M;M;M;M;F;F;M", "homepage": "https://scholar.google.com/citations?view_op=list_works&hl=zh-CN&user=qkYO48UAAAAJ;;http://it.xju.edu.cn/info/1149/1706.htm;;https://github.com/mavis8368;https://scholar.google.com/citations?hl=zh-CN&user=7V6iSgEAAAAJ;", "dblp": ";;;165/9514.html;;;91/8381", "google_scholar": ";https://scholar.google.com.hk/citations?user=GizF4ncAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;;;https://scholar.google.de/citations?user=cFHBCNEAAAAJ", "or_profile": "~Haoxiang_Su1;~Hongyan_Xie1;~Hao_Huang9;~Ruiyu_Fang1;~Xiaomeng_Huang2;~Sijie_Feng1;~Shuangyong_Song1", "aff": "Xinjiang University;JD.com;Xinjiang University;China Telecom;;Xinjiang University;ChinaTelecom", "aff_domain": "xju.edu.cn;jd.com;xju.edu.cn;chinatelecom.com.cn;;xju.edu.cn;chinatelecom.cn", "position": "PhD student;Researcher;Full Professor;Researcher;;MS student;Researcher", "bibtex": "@inproceedings{\nsu2023scalabledsc,\ntitle={Scalable-{DSC}: A Structural Template Prompt Approach to Scalable Dialogue State Correction},\nauthor={Haoxiang Su and Hongyan Xie and Hao Huang and Shuangyong Song and Ruiyu Fang and Xiaomeng Huang and Sijie Feng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=WkpTWlXGHC}\n}", "github": "", "project": "", "reviewers": "QXpz;5vDD;kfrt", "site": "https://openreview.net/forum?id=WkpTWlXGHC", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;5;5", "excitement": "3;3;4", "reproducibility": "3;3;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 4.666666666666667, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-6604-0951;;;;0000-0001-7465-1082", "linkedin": ";;;;;;https://www.linkedin.cn/incareer/in/songshuangyong", "aff_unique_index": "0;1;0;2;0;2", "aff_unique_norm": "Xinjiang University;JD.com;China Telecom", "aff_unique_dep": ";;", "aff_unique_url": "http://www.xju.edu.cn;https://www.jd.com;https://www.chinatelecom.com.cn", "aff_unique_abbr": "XJU;JD;CT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "WmpyDkTHvI", "title": "Text2Tree: Aligning Text Representation to the Label Tree Hierarchy for Imbalanced Medical Classification", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Deep learning approaches exhibit promising performances on various text tasks. However, they are still struggling on medical text classification since samples are often extremely imbalanced and scarce. Different from existing mainstream approaches that focus on supplementary semantics with external medical information, this paper aims to rethink the data challenges in medical texts and present a novel framework-agnostic algorithm called Text2Tree that only utilizes internal label hierarchy in training deep learning models. We embed the ICD code tree structure of labels into cascade attention modules for learning hierarchy-aware label representations. Two new learning schemes, Similarity Surrogate Learning (SSL) and Dissimilarity Mixup Learning (DML), are devised to boost text classification by reusing and distinguishing samples of other labels following the label representation hierarchy, respectively. Experiments on authoritative public datasets and real-world medical records show that our approach stably achieves superior performances over classical and advanced imbalanced classification methods. Our code is available at https://github.com/jyansir/Text2Tree.", "keywords": "text mining;medical text representation;imbalanced text classification", "primary_area": "", "supplementary_material": "", "author": "Jiahuan Yan;Haojun Gao;Zhang Kai;Weize Liu;Danny Chen;Jian Wu;Jintai Chen", "authorids": "~Jiahuan_Yan1;~Haojun_Gao1;~Zhang_Kai2;~Weize_Liu1;~Danny_Chen1;~Jian_Wu6;~Jintai_Chen1", "gender": "M;M;M;;Not Specified;M;M", "homepage": ";;https://www.linkedin.cn/incareer/in/ACoAADQzT6kBRwrVEDjjuTpJQWyUKBYbPnNOlr4;;https://engineering.nd.edu/faculty/danny-chen/;https://scholar.google.com/citations?hl=zh-TW&user=VO9XIXYAAAAJ;https://whatashot.github.io/", "dblp": "334/7537;;;;c/DannyZChen.html;96/2744-1;249/3929", "google_scholar": "_wQJGDcAAAAJ;;;;tRerdSIAAAAJ;https://scholar.google.com/citations?hl=zh-TW;https://scholar.google.com/citations?hl=en", "or_profile": "~Jiahuan_Yan1;~Haojun_Gao1;~Zhang_Kai2;~Weize_Liu1;~Danny_Chen1;~Jian_Wu6;~Jintai_Chen1", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;;University of Notre Dame, USA;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;;nd.edu;zju.edu.cn;zju.edu.cn", "position": "PhD student;MS student;MS student;;Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nyan2023texttree,\ntitle={Text2Tree: Aligning Text Representation to the Label Tree Hierarchy for Imbalanced Medical Classification},\nauthor={Jiahuan Yan and Haojun Gao and Zhang Kai and Weize Liu and Danny Chen and Jian Wu and Jintai Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=WmpyDkTHvI}\n}", "github": "", "project": "", "reviewers": "xmzY;8duU;LHTo", "site": "https://openreview.net/forum?id=WmpyDkTHvI", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;3;3", "excitement": "3;4;3", "reproducibility": "4;3;4", "correctness": "2;4;3", "rating_avg": 2.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-2002-2579;0000-0002-6265-2157;;;0000-0001-6565-2884;;0000-0002-3199-2597", "linkedin": ";;;;;;jintai-chen-3a09921b0/", "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Zhejiang University;University of Notre Dame", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.nd.edu", "aff_unique_abbr": "ZJU;Notre Dame", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "id": "Wom397PB55", "title": "TheoremQA: A Theorem-driven Question Answering Dataset", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The recent LLMs like GPT-4 and PaLM-2 have made tremendous progress in solving fundamental math problems like GSM8K by achieving over 90% accuracy. However, their capabilities to solve more challenging math problems which require domain-specific knowledge (i.e. theorem) have yet to be investigated. In this paper, we introduce TheoremQA, the first theorem-driven question-answering dataset designed to evaluate AI models' capabilities to apply theorems to solve challenging science problems. TheoremQA is curated by domain experts containing 800 high-quality questions covering 350 theorems from Math, Physics, EE&CS, and Finance. We evaluate a wide spectrum of 16 large language and code models with different prompting strategies like Chain-of-Thoughts and Program-of-Thoughts. We found that GPT-4's capabilities to solve these problems are unparalleled, achieving an accuracy of 51% with Program-of-Thoughts Prompting. All the existing open-sourced models are below 15%, barely surpassing the random-guess baseline. Given the diversity and broad coverage of TheoremQA, we believe it can be used as a better benchmark to evaluate LLMs' capabilities to solve challenging science problems.", "keywords": "Language Model;Question Answering;Math;Theorem", "primary_area": "", "supplementary_material": "", "author": "Wenhu Chen;Ming Yin;Max Ku;Pan Lu;Yixin Wan;Xueguang Ma;Jianyu Xu;Xinyi Wang;Tony Xia", "authorids": "~Wenhu_Chen3;~Ming_Yin4;~Max_Ku1;~Pan_Lu2;~Yixin_Wan1;~Xueguang_Ma1;~Jianyu_Xu1;~Xinyi_Wang2;~Tony_Xia1", "gender": "M;M;F;M;M;F;M;M;M", "homepage": "https://mingyin0312.github.io;https://kuwingfung.github.io/;https://scholar.google.com/citations?user=hZPIICQAAAAJ&hl=en;;https://xu-jy.github.io/;https://wangxinyilinda.github.io/;https://lupantech.github.io/;https://tonyxia2001.github.io/;https://wenhuchen.github.io/", "dblp": "89/453.html;348/0574.html;320/5376;44/9030;;;;;136/0957.html", "google_scholar": "ncBRYIUAAAAJ;https://scholar.google.com.hk/citations?user=oCFgVhUAAAAJ;hZPIICQAAAAJ;4kvcmkQAAAAJ;3ubVhAMAAAAJ;3vvbplcAAAAJ;IyucsdQAAAAJ;;https://scholar.google.co.jp/citations?user=U8ShbhUAAAAJ", "or_profile": "~Ming_Yin4;~Max_Ku1;~Yixin_Wan1;~Xueguang_Ma1;~Jianyu_Xu1;~Xinyi_Wang2;~Pan_Lu1;~Tanglin_Xia1;~wenhu_chen1", "aff": "UC, Santa Barbara;Caritas Institute of Higher Education;University of California, Los Angeles;University of Waterloo;UC Santa Barbara;Microsoft;University of California, Los Angeles;University of California, Los Angeles;University of Waterloo", "aff_domain": "ucsb.edu;cihe.edu.hk;ucla.edu;uwaterloo.ca;ucsb.edu;microsoft.com;ucla.edu;ucla.edu;uwaterloo.ca", "position": "PhD student;Researcher;PhD student;PhD student;PhD student;Intern;PhD student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nchen2023theoremqa,\ntitle={Theorem{QA}: A Theorem-driven Question Answering Dataset},\nauthor={Wenhu Chen and Ming Yin and Max Ku and Pan Lu and Yixin Wan and Xueguang Ma and Jianyu Xu and Xinyi Wang and Tony Xia},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Wom397PB55}\n}", "github": "", "project": "", "reviewers": "aN4a;CivJ;quPx", "site": "https://openreview.net/forum?id=Wom397PB55", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "4;4;4", "reproducibility": "5;4;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-6458-0751;;;;;;;;", "linkedin": ";max-ku-650571172/;elaine-yixin-wan-8032b8136/;;;xinyi-wang-444385133/;pan-lu-9308909a/;;", "aff_unique_index": "0;1;2;3;0;4;2;2;3", "aff_unique_norm": "University of California, Santa Barbara;Caritas Institute of Higher Education;University of California, Los Angeles;University of Waterloo;Microsoft", "aff_unique_dep": ";;;;Microsoft Corporation", "aff_unique_url": "https://www.ucsb.edu;https://www.caritas.edu.hk;https://www.ucla.edu;https://uwaterloo.ca;https://www.microsoft.com", "aff_unique_abbr": "UCSB;;UCLA;UW;Microsoft", "aff_campus_unique_index": "0;1;2;0;2;2", "aff_campus_unique": "Santa Barbara;Hong Kong SAR;Los Angeles;", "aff_country_unique_index": "0;1;0;2;0;0;0;0;2", "aff_country_unique": "United States;China;Canada" }, { "id": "WuuxbObghx", "title": "Federated Learning of Large Language Models with Parameter-Efficient Prompt Tuning and Adaptive Optimization", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Federated learning (FL) is a promising paradigm to enable collaborative model training with decentralized data. However, the training process of Large Language Models (LLMs) generally incurs the update of significant parameters, which limits the applicability of FL techniques to tackle the LLMs in real scenarios. Prompt tuning can significantly reduce the number of parameters to update, but it either incurs performance degradation or low training efficiency. The straightforward utilization of prompt tuning in the FL often raises non-trivial communication costs and dramatically degrades performance. In addition, the decentralized data is generally non-Independent and Identically Distributed (non-IID), which brings client drift problems and thus poor performance. This paper proposes a Parameter-efficient prompt Tuning approach with Adaptive Optimization, i.e., FedPepTAO, to enable efficient and effective FL of LLMs. First, an efficient partial prompt tuning approach is proposed to improve performance and efficiency simultaneously. Second, a novel adaptive optimization method is developed to address the client drift problems on both the device and server sides to enhance performance further. Extensive experiments based on 10 datasets demonstrate the superb performance (up to 60.8% in terms of accuracy) and efficiency (up to 97.59% in terms of training time) of FedPepTAO compared with 9 baseline approaches. Our code is available at https://github.com/llm-eff/FedPepTAO.", "keywords": "Federated learning;Large Language Models;Prompt tuning;Prefix tuning", "primary_area": "", "supplementary_material": "", "author": "Tianshi Che;Ji Liu;Yang Zhou;Jiaxiang Ren;jiwen zhou;Victor S. Sheng;Huaiyu Dai;Dejing Dou", "authorids": "~Tianshi_Che1;~Ji_Liu8;~Yang_Zhou4;~Jiaxiang_Ren1;~jiwen_zhou1;~Victor_S._Sheng1;~Huaiyu_Dai2;~Dejing_Dou4", "gender": "M;M;;M;M;M;M;M", "homepage": "https://auburn.edu;;http://eng.auburn.edu/users/yangzhou/;;https://github.com/vincezhou;;https://ece.ncsu.edu/people/hdai/;https://ix.cs.uoregon.edu/~dou/", "dblp": "296/8746;51/4433-3;07/4580-1;205/7041;;36/4372;09/5360.html;26/2854.html", "google_scholar": "jkxujjEAAAAJ;C16EBHUAAAAJ;yvE8Po0AAAAJ;https://scholar.google.com/citations?hl=zh-CN;;0epc43IAAAAJ;HOSH65oAAAAJ;qBHsQ04AAAAJ", "or_profile": "~Tianshi_Che1;~Ji_Liu8;~Yang_Zhou4;~Jiaxiang_Ren1;~jiwen_zhou1;~Victor_S._Sheng1;~Huaiyu_Dai2;~Dejing_Dou4", "aff": "Auburn University;Baidu;Auburn University;Auburn University;Baidu;Texas Tech University;North Carolina State University;", "aff_domain": "auburn.edu;baidu.com;auburn.edu;auburn.edu;baidu.com;ttu.edu;ncsu.edu;", "position": "PhD student;Researcher;Assistant Professor;PhD student;Undergrad student;Associate Professor;Full Professor;", "bibtex": "@inproceedings{\nche2023federated,\ntitle={Federated Learning of Large Language Models with Parameter-Efficient Prompt Tuning and Adaptive Optimization},\nauthor={Tianshi Che and Ji Liu and Yang Zhou and Jiaxiang Ren and jiwen zhou and Victor S. Sheng and Huaiyu Dai and Dejing Dou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=WuuxbObghx}\n}", "github": "", "project": "", "reviewers": "tTk3;ZSxS;nVG8;6pQ8", "site": "https://openreview.net/forum?id=WuuxbObghx", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;3;3;3", "excitement": "4;4;3;4", "reproducibility": "3;4;2;3", "correctness": "4;3;2;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.75, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 13, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-7839-4933;;;;;", "linkedin": "tianshi-che-5169891b7/;;;;;;;", "aff_unique_index": "0;1;0;0;1;2;3", "aff_unique_norm": "Auburn University;Baidu;Texas Tech University;North Carolina State University", "aff_unique_dep": ";Baidu, Inc.;;", "aff_unique_url": "https://www.auburn.edu;https://www.baidu.com;https://www.ttu.edu;https://www.ncsu.edu", "aff_unique_abbr": "Auburn;Baidu;TTU;NCSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;1;0;0", "aff_country_unique": "United States;China" }, { "id": "WxxYSpsv97", "title": "Generating Extractive Answers: Gated Recurrent Memory Reader for Conversational Question Answering", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Conversational question answering (CQA) is a more complicated task than traditional single-turn machine reading comprehension (MRC). Different from large language models (LLMs) like ChatGPT, the models of CQA need to extract answers from given contents to answer follow-up questions according to conversation history. In this paper, we propose a novel architecture, i.e., Gated Recurrent Memory Reader (GRMR), which integrates traditional extractive MRC models into a generalized sequence-to-sequence framework. After the passage is encoded, the decoder will generate the extractive answers turn by turn. Different from previous models that concatenate the previous questions and answers as context superficially and redundantly, our model can use less storage space and consider historical memory deeply and selectively. Experiments on the Conversational Question Answering (CoQA) dataset show that our model achieves comparable results to most models with the least space occupancy.", "keywords": "Conversational Question Answering;Machine Reading Comprehension;Attention", "primary_area": "", "supplementary_material": "", "author": "Xuanyu Zhang;Qing Yang", "authorids": "~Xuanyu_Zhang1;~Qing_Yang11", "gender": "M;M", "homepage": ";https://www.duxiaoman.com/index", "dblp": ";47/3749", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;", "or_profile": "~Xuanyu_Zhang1;~Qing_Yang11", "aff": "DXM;Du Xiaoman Technology(BeiJing)", "aff_domain": "duxiaoman.com;duxiaoman.com", "position": "Researcher;Principal Researcher", "bibtex": "@inproceedings{\nzhang2023generating,\ntitle={Generating Extractive Answers: Gated Recurrent Memory Reader for Conversational Question Answering},\nauthor={Xuanyu Zhang and Qing Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=WxxYSpsv97}\n}", "github": "", "project": "", "reviewers": "3NnP;wcpV;G5eK", "site": "https://openreview.net/forum?id=WxxYSpsv97", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;5", "excitement": "2;3;4", "reproducibility": "3;2;5", "correctness": "2;4;4", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;1", "aff_unique_norm": "DXM;Du Xiaoman Technology", "aff_unique_dep": ";", "aff_unique_url": ";", "aff_unique_abbr": ";", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";China" }, { "id": "Wy4adj2FUJ", "title": "A Sequence-to-Structure Approach to Document-level Targeted Sentiment Analysis", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Most previous studies on aspect-based sentiment analysis (ABSA) were carried out at the sentence level, while the research of document-level ABSA has not received enough attention. In this work, we focus on the document-level targeted sentiment analysis task, which aims to extract the opinion targets consisting of multi-level entities from a review document and predict their sentiments. We propose a Sequence-to-Structure (Seq2Struct) approach to address the task, which is able to explicitly model the hierarchical structure among multiple opinion targets in a document, and capture the long-distance dependencies among affiliated entities across sentences. In addition to the existing Seq2Seq approach, we further construct four strong baselines with different pretrained models. Experimental results on six domains show that our Seq2Struct approach outperforms all the baselines significantly. Aside from the performance advantage in outputting the multi-level target-sentiment pairs, our approach has another significant advantage - it can explicitly display the hierarchical structure of the opinion targets within a document. Our source code is publicly released at https://github.com/NUSTM/Doc-TSA-Seq2Struct.", "keywords": "Aspect-based Sentiment Analysis;Document-level;Targeted Sentiment Analysis", "primary_area": "", "supplementary_material": "", "author": "Nan Song;Hongjie Cai;Rui Xia;Jianfei Yu;Zhen Wu;Xinyu Dai", "authorids": "~Nan_Song3;~Hongjie_Cai1;~Rui_Xia1;~Jianfei_Yu1;~Zhen_Wu2;~Xinyu_Dai1", "gender": ";M;M;;M;M", "homepage": "https://github.com/yogurt-sn;;http://www.nustm.cn/member/rxia/;;https://wuzhen247.github.io/;http://cs.nju.edu.cn/daixinyu", "dblp": "08/6590;280/0160.html;;;16/4485-2;39/5815", "google_scholar": ";-BRuU2IAAAAJ;https://scholar.google.com.hk/citations?user=Znde6gwAAAAJ;;IoGlgtoAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Nan_Song3;~Hongjie_Cai1;~Rui_Xia1;~Jianfei_Yu1;~Zhen_Wu2;~Xinyu_Dai1", "aff": "Nanjing University of Science and Technology;;Nanjing University of Science and Technology;;Nanjing University;Nanjing University", "aff_domain": "njust.edu.cn;;njust.edu.cn;;nju.edu.cn;nju.edu.cn", "position": "MS student;;Full Professor;;Researcher;Full Professor", "bibtex": "@inproceedings{\nsong2023a,\ntitle={A Sequence-to-Structure Approach to Document-level Targeted Sentiment Analysis},\nauthor={Nan Song and Hongjie Cai and Rui Xia and Jianfei Yu and Zhen Wu and Xinyu Dai},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Wy4adj2FUJ}\n}", "github": "", "project": "", "reviewers": "7rjQ;vt9J;GrSt;moT5;F76R", "site": "https://openreview.net/forum?id=Wy4adj2FUJ", "pdf_size": 0, "rating": "4;4;4;4;4", "confidence": "4;4;3;4;4", "excitement": "2;3;3;2;3", "reproducibility": "4;4;3;3;3", "correctness": "3;4;3;2;3", "rating_avg": 4.0, "confidence_avg": 3.8, "excitement_avg": 2.6, "reproducibility_avg": 3.4, "correctness_avg": 3.0, "replies_avg": 17, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-7678-103X;", "linkedin": ";;;;;", "aff_unique_index": "0;0;1;1", "aff_unique_norm": "Nanjing University of Science and Technology;Nanjing University", "aff_unique_dep": ";", "aff_unique_url": "http://www.nust.edu.cn/;https://www.nju.edu.cn", "aff_unique_abbr": "NUST;Nanjing U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "Wyod73NboS", "title": "Are Language Models Worse than Humans at Following Prompts? It's Complicated", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Prompts have been the center of progress in advancing language models' zero-shot and few-shot performance. However, recent work finds that models can perform surprisingly well when given intentionally irrelevant or misleading prompts. Such results may be interpreted as evidence that model behavior is not \"human like\". In this study, we challenge a central assumption in such work: that humans would perform badly when given pathological instructions. We find that humans are able to reliably ignore irrelevant instructions and thus, like models, perform well on the underlying task despite an apparent lack of signal regarding the task they are being asked to do. However, when given deliberately misleading instructions, humans follow the instructions faithfully, whereas models do not. Our findings caution that future research should not idealize human behaviors as a monolith and should not train or evaluate models to mimic assumptions about these behaviors without first validating humans\u2019 behaviors empirically.", "keywords": "instruction following;prompting;human study;natural language inference", "primary_area": "", "supplementary_material": "", "author": "Albert Webson;Alyssa Marie Loo;Qinan Yu;Ellie Pavlick", "authorids": "~Albert_Webson1;~Alyssa_Marie_Loo1;~Qinan_Yu1;~Ellie_Pavlick1", "gender": ";F;F;F", "homepage": "https://representations.ai;;https://www.linkedin.com/in/qinan-yu-9b50471b2/;http://cs.brown.edu/people/epavlick/", "dblp": "276/1456;;;141/4059", "google_scholar": "3OQplr0AAAAJ;;;sFyrSa8AAAAJ", "or_profile": "~Albert_Webson1;~Alyssa_Marie_Loo1;~Qinan_Yu1;~Ellie_Pavlick1", "aff": "Brown University;Brown University;Brown University;Brown University", "aff_domain": "brown.edu;brown.edu;brown.edu;brown.edu", "position": "PhD student;Undergrad student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nwebson2023are,\ntitle={Are Language Models Worse than Humans at Following Prompts? It's Complicated},\nauthor={Albert Webson and Alyssa Marie Loo and Qinan Yu and Ellie Pavlick},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Wyod73NboS}\n}", "github": "", "project": "", "reviewers": "k3cH;YaYe;fzxT;3rDv", "site": "https://openreview.net/forum?id=Wyod73NboS", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;2;2;2", "excitement": "3;3;4;2", "reproducibility": "4;3;3;4", "correctness": "4;4;4;2", "rating_avg": 3.0, "confidence_avg": 2.5, "excitement_avg": 3.0, "reproducibility_avg": 3.5, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";alyssamarieloo;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Brown University", "aff_unique_dep": "", "aff_unique_url": "https://www.brown.edu", "aff_unique_abbr": "Brown", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "Wz1jEwvpGO", "title": "Logic Unveils Truth, While Disguise Obscures It: Transition Logic Augmented Response Selection for Multi-Turn Dialogue", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Multi-turn response selection aims to retrieve a response for a dialogue context from a candidate pool and negative sampling is the key to its retrieval performance. However, previous methods of negative samples tend to yield false negatives due to the one-to-many property in open-domain dialogue, which is detrimental to the optimization process. To deal with the problem, we propose a sequential variational ladder auto-encoder to capture the diverse one-to-many transition pattern of multiple characteristics in open-domain dialogue. The learned transition logic thus assists in identifying potential positives in disguise. Meanwhile, we propose a TRIGGER framework to adjust negative sampling in the training process such that the scope of false negatives dynamically updates according to the model capacity. Extensive experiments on two benchmarks verify the effectiveness of our approach.", "keywords": "multi-turn dialogue;dialogue retrieval", "primary_area": "", "supplementary_material": "", "author": "Tingchen Fu;Xueliang Zhao;Lemao Liu;Rui Yan", "authorids": "~Tingchen_Fu1;~Xueliang_Zhao1;~Lemao_Liu3;~Rui_Yan2", "gender": "M;M;M;M", "homepage": ";;https://gsai.ruc.edu.cn/english/ruiyan;https://lemaoliu.github.io/homepage/", "dblp": "318/0986;;19/2405-1;41/10887.html", "google_scholar": ";h-87C9cAAAAJ;eLw6g-UAAAAJ;", "or_profile": "~Tingchen_Fu1;~Xueliang_Zhao1;~Rui_Yan2;~lemao_liu1", "aff": "Renmin University of China;The University of Hong Kong;Renmin University of China;Tencent", "aff_domain": "ruc.edu.cn;cs.hku.hk;ruc.edu.cn;tencent.com", "position": "MS student;PhD student;Associate Professor;Researcher", "bibtex": "@inproceedings{\nfu2023logic,\ntitle={Logic Unveils Truth, While Disguise Obscures It: Transition Logic Augmented Response Selection for Multi-Turn Dialogue},\nauthor={Tingchen Fu and Xueliang Zhao and Lemao Liu and Rui Yan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Wz1jEwvpGO}\n}", "github": "", "project": "", "reviewers": "kX1o;oe9s;wMUd", "site": "https://openreview.net/forum?id=Wz1jEwvpGO", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "3;2;3", "reproducibility": "4;3;4", "correctness": "4;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-3356-6823;", "linkedin": "%E5%BB%B7%E7%90%9B-%E4%BB%98-b00435181/;;;", "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Renmin University of China;University of Hong Kong;Tencent", "aff_unique_dep": ";;Tencent Holdings Limited", "aff_unique_url": "http://www.ruc.edu.cn;https://www.hku.hk;https://www.tencent.com", "aff_unique_abbr": "RUC;HKU;Tencent", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "X2R4yhtenj", "title": "Effects of Human Adversarial and Affable Samples on BERT Generalization", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "BERT-based models have had strong performance on leaderboards, yet have been demonstrably worse in real-world settings requiring generalization. Limited quantities of training data is considered a key impediment to achieving generalizability in machine learning. In this paper, we examine the impact of training data quality, not quantity, on a model's generalizability. We consider two characteristics of training data: the portion of human-adversarial (h-adversarial) samples, i.e. sample pairs with seemingly minor differences but different ground-truth labels, and human-affable (h-affable) training samples, i.e. sample pairs with minor differences but the same ground-truth label. We find that for a fixed size of training samples, having 10-30% h-adversarial instances improves the precision, and therefore F_1, by up to 20 points in the tasks of text classification and relation extraction. Increasing h-adversarials beyond this range can result in performance plateaus or even degradation. In contrast, h-affables may not contribute to a model's generalizability and may even degrade generalization performance.", "keywords": "BERT;Generalization;Robustness in NLP", "primary_area": "", "supplementary_material": "", "author": "Aparna Elangovan;Estrid He;Yuan Li;Karin Verspoor", "authorids": "~Aparna_Elangovan1;~Estrid_He1;~Yuan_Li5;~Karin_Verspoor1", "gender": ";F;M;F", "homepage": ";;;", "dblp": "232/4874;119/5963-2;86/6196-12.html;07/6465", "google_scholar": "eaow7uAAAAAJ;Lwso-psAAAAJ;qDDC09EAAAAJ;dUxHnbcAAAAJ", "or_profile": "~Aparna_Elangovan1;~Estrid_He1;~Yuan_Li5;~Karin_Verspoor1", "aff": "University of Melbourne;The University of Melbourne;The University of Melbourne;Royal Melbourne Institute of Technology", "aff_domain": "unimelb.edu.au;unimelb.edu.au;unimelb.edu.au;rmit.edu.au", "position": "PhD student;Postdoc;Postdoc;Professor & Dean", "bibtex": "@inproceedings{\nelangovan2023effects,\ntitle={Effects of Human Adversarial and Affable Samples on {BERT} Generalization},\nauthor={Aparna Elangovan and Estrid He and Yuan Li and Karin Verspoor},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=X2R4yhtenj}\n}", "github": "", "project": "", "reviewers": "6RJa;dy3Y;V7ws", "site": "https://openreview.net/forum?id=X2R4yhtenj", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;2;4", "excitement": "2;2;4", "reproducibility": "3;3;4", "correctness": "2;2;4", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 2.6666666666666665, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-8661-1544", "linkedin": ";;;karinverspoor/", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Melbourne;Royal Melbourne Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.unimelb.edu.au;https://www.rmit.edu.au", "aff_unique_abbr": "UniMelb;RMIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Australia" }, { "id": "X570XzeYSW", "title": "Task-Agnostic Low-Rank Adapters for Unseen English Dialects", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large Language Models (LLMs) are trained on corpora disproportionally weighted in favor of Standard American English. As a result, speakers of other dialects experience significantly more failures when interacting with these technologies. In practice, these speakers often accommodate their speech to be better understood. Our work shares the belief that language technologies should be designed to accommodate the diversity in English dialects and not the other way around. However, prior work on dialect struggle with generalizing to evolving and emerging dialects in a scalable manner. To fill this gap, our method, HyperLoRA, leverages expert linguistic knowledge to enable resource-efficient adaptation via hypernetworks. By disentangling dialect-specific and cross-dialectal information, HyperLoRA improves generalization to unseen dialects in a task-agnostic fashion. Not only is HyperLoRA more scalable in the number of parameters, but it also achieves the best or most competitive performance across 5 dialects in a zero-shot setting. In this way, our approach facilitates access to language technology for billions of English dialect speakers who are traditionally underrepresented.", "keywords": "Low-resource;Dialects;Hypernetworks;LoRA;Cross-dialectal Alignment", "primary_area": "", "supplementary_material": "", "author": "Zedian Xiao;William Barr Held;Yanchen Liu;Diyi Yang", "authorids": "~Zedian_Xiao1;~William_Barr_Held1;~Yanchen_Liu2;~Diyi_Yang2", "gender": ";M;M;F", "homepage": "https://www.linkedin.com/in/zedian-xiao/;https://williamheld.com/;https://liuyanchen1015.github.io/;https://cs.stanford.edu/~diyiy/", "dblp": ";245/8601.html;;70/11145", "google_scholar": ";SP9VJNkAAAAJ;https://scholar.google.com/citations?hl=en;j9jhYqQAAAAJ", "or_profile": "~Zedian_Xiao1;~William_Barr_Held1;~Yanchen_Liu2;~Diyi_Yang2", "aff": "Computer Science Department, Stanford University;Georgia Institute of Technology;Harvard University;Stanford University", "aff_domain": "cs.stanford.edu;gatech.edu;harvard.edu;stanford.edu", "position": "MS student;PhD student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nxiao2023taskagnostic,\ntitle={Task-Agnostic Low-Rank Adapters for Unseen English Dialects},\nauthor={Zedian Xiao and William Barr Held and Yanchen Liu and Diyi Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=X570XzeYSW}\n}", "github": "", "project": "", "reviewers": "wWbU;ZQT3;i59w;g9kY", "site": "https://openreview.net/forum?id=X570XzeYSW", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "5;3;4;2", "excitement": "4;4;4;4", "reproducibility": "4;5;4;4", "correctness": "4;4;4;4", "rating_avg": 3.0, "confidence_avg": 3.5, "excitement_avg": 4.0, "reproducibility_avg": 4.25, "correctness_avg": 4.0, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";williambarrheld/;;", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Stanford University;Georgia Institute of Technology;Harvard University", "aff_unique_dep": "Computer Science Department;;", "aff_unique_url": "https://www.stanford.edu;https://www.gatech.edu;https://www.harvard.edu", "aff_unique_abbr": "Stanford;Georgia Tech;Harvard", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "X597Q58y1U", "title": "Enhancing Code-Switching for Cross-lingual SLU: A Unified View of Semantic and Grammatical Coherence", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Despite the success of spoken language understanding (SLU) in high-resource languages, achieving similar performance in low-resource settings, such as zero-shot scenarios, remains challenging due to limited labeled training data. To improve zero-shot cross-lingual SLU, recent studies have explored code-switched sentences containing tokens from multiple languages. However, vanilla code-switched sentences often lack semantic and grammatical coherence. We ascribe this lack to two issues: (1) randomly replacing code-switched tokens with equal probability and (2) disregarding token-level dependency within each language. To tackle these issues, in this paper, we propose a novel method termed SoGo, for zero-shot cross-lingual SLU. First, we use a saliency-based substitution approach to extract keywords as substitution options. Then, we introduce a novel token-level alignment strategy that considers the similarity between the context and the code-switched tokens, ensuring grammatical coherence in code-switched sentences. Extensive experiments and analyses demonstrate the superior performance of SoGo across nine languages on MultiATIS++.", "keywords": "Cross-lingual SLU;Semantic Coherence;Grammatical Coherence", "primary_area": "", "supplementary_material": "", "author": "Zhihong Zhu;Xuxin Cheng;Zhiqi Huang;Dongsheng Chen;Yuexian Zou", "authorids": "~Zhihong_Zhu1;~Xuxin_Cheng3;~Zhiqi_Huang2;~Dongsheng_Chen1;~Yuexian_Zou2", "gender": ";;M;M;", "homepage": ";;https://zhiqi-huang.github.io/;;", "dblp": ";;;;", "google_scholar": ";;5JGMGCsAAAAJ;https://scholar.google.com.hk/citations?user=2sI1wsoAAAAJ;", "or_profile": "~Zhihong_Zhu1;~Xuxin_Cheng3;~Zhiqi_Huang2;~Dongsheng_Chen1;~Yuexian_Zou2", "aff": ";;Tencent Game;Peking University;", "aff_domain": ";;tencent.com;pku.edu.cn;", "position": ";;Researcher;MS student;", "bibtex": "@inproceedings{\nzhu2023enhancing,\ntitle={Enhancing Code-Switching for Cross-lingual {SLU}: A Unified View of Semantic and Grammatical Coherence},\nauthor={Zhihong Zhu and Xuxin Cheng and Zhiqi Huang and Dongsheng Chen and Yuexian Zou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=X597Q58y1U}\n}", "github": "", "project": "", "reviewers": "nHnV;uvK4;YoWR;ncFV", "site": "https://openreview.net/forum?id=X597Q58y1U", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;4;4;4", "excitement": "4;4;3;2", "reproducibility": "4;5;4;2", "correctness": "4;4;4;4", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 3.25, "reproducibility_avg": 3.75, "correctness_avg": 4.0, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;zhiqi-huang-133499142/;;", "aff_unique_index": "0;1", "aff_unique_norm": "Tencent;Peking University", "aff_unique_dep": "Tencent Game;", "aff_unique_url": "https://www.tencent.com;http://www.pku.edu.cn", "aff_unique_abbr": "Tencent;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "X6DrwxlMD9", "title": "Evaluating Dependencies in Fact Editing for Language Models: Specificity and Implication Awareness", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The potential of using a large language model (LLM) as a knowledge base (KB) has sparked significant interest. To maintain the knowledge acquired by LLMs, we need to ensure that the editing of learned facts respects internal logical constraints, which are known as dependency of knowledge. Existing work on editing LLMs has partially addressed the issue of dependency, when the editing of a fact should apply to its lexical variations without disrupting irrelevant ones. However, they neglect the dependency between a fact and its logical implications.\nWe propose an evaluation protocol with an accompanying question-answering dataset, StandUp, that provides a comprehensive assessment of the editing process considering the above notions of dependency. Our protocol involves setting up a controlled environment in which we edit facts and monitor their impact on LLMs, along with their implications based on If-Then rules. Extensive experiments on StandUp show that existing knowledge editing methods are sensitive to the surface form of knowledge, and that they have limited performance in inferring the implications of edited facts.", "keywords": "knowledge editing;large language models;knowledge base", "primary_area": "", "supplementary_material": "", "author": "Zichao Li;Ines Arous;Siva Reddy;Jackie CK Cheung", "authorids": "~Zichao_Li3;~Ines_Arous1;~Siva_Reddy1;~Jackie_CK_Cheung1", "gender": ";;M;M", "homepage": ";https://inesarous.github.io/;http://sivareddy.in;http://cs.mcgill.ca/~jcheung/", "dblp": "95/147-3;207/8093;64/8153;00/9012", "google_scholar": ";RWXHLa8AAAAJ;;https://scholar.google.com.tw/citations?user=Um-wmYQAAAAJ", "or_profile": "~Zichao_Li3;~Ines_Arous1;~Siva_Reddy1;~Jackie_CK_Cheung1", "aff": "McGill University;, McGill University;Mila, McGill University;Microsoft", "aff_domain": "mcgill.ca;cs.mcgill.ca;mila.quebec;microsoft.com", "position": "PhD student;Postdoc;Assistant Professor;Consulting Researcher", "bibtex": "@inproceedings{\nli2023evaluating,\ntitle={Evaluating Dependencies in Fact Editing for Language Models: Specificity and Implication Awareness},\nauthor={Zichao Li and Ines Arous and Siva Reddy and Jackie CK Cheung},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=X6DrwxlMD9}\n}", "github": "", "project": "", "reviewers": "uPob;UeeZ;A8T6", "site": "https://openreview.net/forum?id=X6DrwxlMD9", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;2", "excitement": "2;3;3", "reproducibility": "3;3;2", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-7513-6197;;", "linkedin": ";ines-arous/;;", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "McGill University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.mcgill.ca;https://www.microsoft.com", "aff_unique_abbr": "McGill;Microsoft", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Canada;United States" }, { "id": "X6HDI4cqWF", "title": "End-to-End Autoregressive Retrieval via Bootstrapping for Smart Reply Systems", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Reply suggestion systems represent a staple component of many instant messaging and email systems. However, the requirement to produce sets of replies, rather than individual replies, makes the task poorly suited for out-of-the-box retrieval architectures, which only consider individual message-reply similarity. As a result, these system often rely on additional post-processing modules to diversify the outputs. However, these approaches are ultimately bottlenecked by the performance of the initial retriever, which in practice struggles to present a sufficiently diverse range of options to the downstream diversification module, leading to the suggestions being less relevant to the user. In this paper, we consider a novel approach that radically simplifies this pipeline through an autoregressive text-to-text retrieval model, that learns the smart reply task end-to-end from a dataset of (message, reply set) pairs obtained via bootstrapping. Empirical results show this method consistently outperforms a range of state-of-the-art baselines across three datasets, corresponding to a 5.1\\%-17.9\\% improvement in relevance, and a 0.5\\%-63.1\\% improvement in diversity compared to the best baseline approach. We make our code publicly available.", "keywords": "dialog;smart reply;reply suggestion", "primary_area": "", "supplementary_material": "", "author": "Benjamin Towle;Ke Zhou", "authorids": "~Benjamin_Towle1;~Ke_Zhou3", "gender": ";", "homepage": ";", "dblp": ";", "google_scholar": ";", "or_profile": "~Benjamin_Towle1;~Ke_Zhou3", "aff": ";", "aff_domain": ";", "position": ";", "bibtex": "@inproceedings{\ntowle2023endtoend,\ntitle={End-to-End Autoregressive Retrieval via Bootstrapping for Smart Reply Systems},\nauthor={Benjamin Towle and Ke Zhou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=X6HDI4cqWF}\n}", "github": "", "project": "", "reviewers": "86dA;RPm6;YiYZ", "site": "https://openreview.net/forum?id=X6HDI4cqWF", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;3", "excitement": "2;4;3", "reproducibility": "3;4;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";" }, { "id": "XB0u7RTXrV", "title": "SCENE: Self-Labeled Counterfactuals for Extrapolating to Negative Examples", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Detecting negatives (such as non-entailment relationships, unanswerable questions, and false claims) is an important and challenging aspect of many natural language understanding tasks. Though manually collecting challenging negative examples can help models detect them, it is both costly and domain-specific. In this work, we propose Self-labeled Counterfactuals for Extrapolating to Negative Examples (SCENE), an automatic method for synthesizing training data that greatly improves models' ability to detect challenging negative examples. In contrast with standard data augmentation, which synthesizes new examples for existing labels, SCENE can synthesize negative examples zero-shot from only positive ones. Given a positive example, SCENE perturbs it with a mask infilling model, then determines whether the resulting example is negative based on a self-training heuristic. With access to only answerable training examples, SCENE can close 69.6% of the performance gap on SQuAD 2.0, a dataset where half of the evaluation examples are unanswerable, compared to a model trained on SQuAD 2.0. Our method also extends to boolean question answering and recognizing textual entailment, and improves generalization from SQuAD to ACE-whQA, an out-of-domain extractive QA benchmark.", "keywords": "Question Answering;Counterfactual Generation;Data Augmentation", "primary_area": "", "supplementary_material": "", "author": "Deqing Fu;Ameya Godbole;Robin Jia", "authorids": "~Deqing_Fu1;~Ameya_Godbole1;~Robin_Jia1", "gender": "M;M;M", "homepage": "https://deqingfu.github.io/;https://ameyagodbole.github.io/;https://robinjia.github.io/", "dblp": "304/3030;213/8024;182/2556", "google_scholar": "fsbgfqEAAAAJ;https://scholar.google.co.in/citations?user=_nzzImgAAAAJ;ajZ-_O0AAAAJ", "or_profile": "~Deqing_Fu1;~Ameya_Godbole1;~Robin_Jia1", "aff": "University of Southern California;Google;University of Southern California", "aff_domain": "usc.edu;google.com;usc.edu", "position": "PhD student;Intern;Assistant Professor", "bibtex": "@inproceedings{\nfu2023scene,\ntitle={{SCENE}: Self-Labeled Counterfactuals for Extrapolating to Negative Examples},\nauthor={Deqing Fu and Ameya Godbole and Robin Jia},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=XB0u7RTXrV}\n}", "github": "", "project": "", "reviewers": "oQE5;NjSu;b1yf", "site": "https://openreview.net/forum?id=XB0u7RTXrV", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;5;4", "excitement": "4;4;4", "reproducibility": "4;3;4", "correctness": "4;5;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.333333333333333, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";ameyag416/;", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Southern California;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.usc.edu;https://www.google.com", "aff_unique_abbr": "USC;Google", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Los Angeles;Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "XEBHsJpFY9", "title": "Culturally Aware Natural Language Inference", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Humans produce and consume language in a particular cultural context, which includes knowledge about specific norms and practices. A listener's awareness of the cultural context is critical for interpreting the speaker's meaning. A simple expression like *I didn't leave a tip* implies a strong sense of dissatisfaction when tipping is assumed to be the norm. As NLP systems reach users from different cultures, achieving culturally aware language understanding becomes increasingly important. However, current research has focused on building cultural knowledge bases without studying how such knowledge leads to contextualized interpretations of texts. In this work, we operationalize cultural variations in language understanding through a natural language inference (NLI) task that surfaces cultural variations as label disagreement between annotators from different cultural groups. We introduce the first Culturally Aware Natural Language Inference (CALI) dataset with 2.7K premise-hypothesis pairs annotated by two cultural groups located in the U.S. and India. With CALI, we categorize how cultural norms affect language understanding and present an evaluation framework to assess at which levels large language models are culturally aware. Our dataset is available at https://github.com/SALT-NLP/CulturallyAwareNLI.", "keywords": "cultural norms;natural language inference", "primary_area": "", "supplementary_material": "", "author": "Jing Huang;Diyi Yang", "authorids": "~Jing_Huang2;~Diyi_Yang2", "gender": ";F", "homepage": "https://explanare.github.io/;https://cs.stanford.edu/~diyiy/", "dblp": "14/4834-14;70/11145", "google_scholar": "zM_wp_MAAAAJ;j9jhYqQAAAAJ", "or_profile": "~Jing_Huang2;~Diyi_Yang2", "aff": "Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nhuang2023culturally,\ntitle={Culturally Aware Natural Language Inference},\nauthor={Jing Huang and Diyi Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=XEBHsJpFY9}\n}", "github": "", "project": "", "reviewers": "PaX1;QEVL;61Gu", "site": "https://openreview.net/forum?id=XEBHsJpFY9", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;5", "excitement": "3;4;2", "reproducibility": "3;3;5", "correctness": "3;3;2", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "XEwQ1fDbDN", "title": "Examining Inter-Consistency of Large Language Models Collaboration: An In-depth Analysis via Debate", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large Language Models (LLMs) have shown impressive capabilities in various applications, but they still face various inconsistency issues. Existing works primarily focus on the inconsistency issues within a single LLM, while we complementarily explore the inter-consistency among multiple LLMs for collaboration. To examine whether LLMs can collaborate effectively to achieve a consensus for a shared goal, we focus on commonsense reasoning, and introduce a formal debate framework (FORD) to conduct a three-stage debate among LLMs with real-world scenarios alignment: fair debate, mismatched debate, and roundtable debate. Through extensive experiments on various datasets, LLMs can effectively collaborate to reach a consensus despite noticeable inter-inconsistencies, but imbalances in their abilities can lead to domination by superior LLMs. Leveraging a more advanced LLM like GPT-4 as an authoritative judge can boost collaboration performance. Our work contributes to understanding the inter-consistency among LLMs and lays the foundation for developing future collaboration methods. Codes and data are available at https://github.com/Waste-Wood/FORD.", "keywords": "Large Language Models;Inconsistency;Debate;Commonsense Reasoning", "primary_area": "", "supplementary_material": "", "author": "Kai Xiong;Xiao Ding;Yixin Cao;Ting Liu;Bing Qin", "authorids": "~Kai_Xiong2;~Xiao_Ding1;~Yixin_Cao2;~Ting_Liu2;~Bing_Qin2", "gender": "M;M;M;M;", "homepage": "https://waste-wood.github.io/;http://ir.hit.edu.cn/~xding/index_english.htm;https://sites.google.com/view/yixin-homepage;;http://ir.hit.edu.cn/~qinb", "dblp": "38/6410-2;;20/8038-2;52/5150-1;86/5934.html", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;Mi9XXuAAAAAJ;https://scholar.google.co.uk/citations?user=CnhTvdoAAAAJ;zyMJ1V0AAAAJ;LKnCub0AAAAJ", "or_profile": "~Kai_Xiong2;~Xiao_Ding1;~Yixin_Cao2;~Ting_Liu2;~Bing_Qin2", "aff": "Singapore Management University;Harbin Institute of Technology;Singapore Management University;Harbin Institute of Technology;Harbin Institute of Technology", "aff_domain": "smu.edu.sg;hit.edu.cn;smu.edu.sg;hit.edu.cn;hit.edu.cn", "position": "Researcher;Full Professor;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nxiong2023examining,\ntitle={Examining Inter-Consistency of Large Language Models Collaboration: An In-depth Analysis via Debate},\nauthor={Kai Xiong and Xiao Ding and Yixin Cao and Ting Liu and Bing Qin},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=XEwQ1fDbDN}\n}", "github": "", "project": "", "reviewers": "deKp;LsCJ;R6wy", "site": "https://openreview.net/forum?id=XEwQ1fDbDN", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;3;4", "reproducibility": "4;3;2", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-5909-3075;0000-0002-5838-0320;;;0000-0002-2543-5604", "linkedin": ";;;;", "aff_unique_index": "0;1;0;1;1", "aff_unique_norm": "Singapore Management University;Harbin Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.smu.edu.sg;http://www.hit.edu.cn/", "aff_unique_abbr": "SMU;HIT", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;1;0;1;1", "aff_country_unique": "Singapore;China" }, { "id": "XHftyT3k4j", "title": "Enhancing Argument Structure Extraction with Efficient Leverage of Contextual Information", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Argument structure extraction (ASE) aims to identify the discourse structure of arguments within documents. Previous research has demonstrated that contextual information is crucial for developing an effective ASE model. However, we observe that merely concatenating sentences in a contextual window does not fully utilize contextual information and can sometimes lead to excessive attention on less informative sentences. To tackle this challenge, we propose an Efficient Context-aware ASE model (ECASE) that fully exploits contextual information by enhancing modeling capacity and augmenting training data. Specifically, we introduce a sequence-attention module and distance-weighted similarity loss to aggregate contextual information and argumentative information. Additionally, we augment the training data by randomly masking discourse markers and sentences, which reduces the model's reliance on specific words or less informative sentences. Our experiments on five datasets from various domains demonstrate that our model achieves state-of-the-art performance. Furthermore, ablation studies confirm the effectiveness of each module in our model.", "keywords": "Argument Mining;Argument structure extraction;Discourse Structure of Arguments", "primary_area": "", "supplementary_material": "", "author": "Yun Luo;Zhen Yang;Fandong Meng;Yingjie Li;Jie Zhou;Yue Zhang", "authorids": "~Yun_Luo1;~Zhen_Yang4;~Fandong_Meng3;~Yingjie_Li2;~Jie_Zhou8;~Yue_Zhang7", "gender": "M;M;M;;M;M", "homepage": ";;http://fandongmeng.github.io/;;;http://frcchang.github.io", "dblp": ";;117/4056.html;;00/5012-16;47/722-4", "google_scholar": "B_bdRlAAAAAJ;cuGFOQsAAAAJ;sA8U4S0AAAAJ;6OWRfPoAAAAJ;https://scholar.google.com.hk/citations?user=OijxQCMAAAAJ;", "or_profile": "~Yun_Luo1;~Zhen_Yang4;~Fandong_Meng3;~Yingjie_Li2;~Jie_Zhou8;~Yue_Zhang7", "aff": "westlake university;Tencent.inc;WeChat AI, Tencent Inc.;Westlake University;WeChat AI, Tencent Inc.;Westlake University", "aff_domain": "westlake.edu;tencent.com;tencent.com;westlake.edu;tencent.com;westlake.edu.cn", "position": "PhD student;Researcher;Principal Researcher;Postdoc;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nluo2023enhancing,\ntitle={Enhancing Argument Structure Extraction with Efficient Leverage of Contextual Information},\nauthor={Yun Luo and Zhen Yang and Fandong Meng and Yingjie Li and Jie Zhou and Yue Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=XHftyT3k4j}\n}", "github": "", "project": "", "reviewers": "xDHq;d9xz;LkxQ", "site": "https://openreview.net/forum?id=XHftyT3k4j", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;3;3", "excitement": "3;3;4", "reproducibility": "4;3;4", "correctness": "2;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-8158-2377;0000-0003-4015-4576;0000-0002-5899-5165;0000-0002-5214-2268", "linkedin": ";;;;;", "aff_unique_index": "0;1;1;0;1;0", "aff_unique_norm": "Westlake University;Tencent", "aff_unique_dep": ";Tencent", "aff_unique_url": "https://www.westlake.edu.cn;https://www.tencent.com", "aff_unique_abbr": "WU;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "XIHl40UylS", "title": "STEER: Unified Style Transfer with Expert Reinforcement", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "While text style transfer has many applications across natural language processing, the core premise of transferring from a single source style is unrealistic in a real-world setting. In this work, we focus on arbitrary style transfer: rewriting a text from an arbitrary, unknown style to a target style.\n\nWe propose STEER: Unified Style Transfer with Expert Reinforcement, a unified frame-work developed to overcome the challenge of limited parallel data for style transfer. STEER involves automatically generating a corpus of style-transfer pairs using a product of experts during decoding. The generated offline data is then used to pre-train an initial policy before switching to online, off-policy reinforcement learning for further improvements via fine-grained reward signals. STEER is unified and can transfer to multiple target styles from an arbitrary, unknown source style, making it particularly flexible and efficient.\n\nExperimental results on a challenging dataset with text from a diverse set of styles demonstrate state-of-the-art results compared to competitive baselines. Remarkably, STEER outperforms the 175B parameter instruction-tuned GPT-3 on overall style transfer quality, despite being 226 times smaller in size. We also show STEER is robust, maintaining its style transfer capabilities on out-of-domain data, and surpassing nearly all baselines across various styles. The success of our method highlights the potential of RL algorithms when augmented with controllable decoding to overcome the challenge of limited data supervision.", "keywords": "style transfer;natural language generation;reinforcement learning;controllable decoding", "primary_area": "", "supplementary_material": "", "author": "Skyler Hallinan;Faeze Brahman;Ximing Lu;Jaehun Jung;Sean Welleck;Yejin Choi", "authorids": "~Skyler_Hallinan1;~Faeze_Brahman1;~Ximing_Lu1;~Jaehun_Jung1;~Sean_Welleck1;~Yejin_Choi1", "gender": "M;F;F;M;;F", "homepage": "https://skylerhallinan.com/;https://fabrahman.github.io;https://gloriaximinglu.github.io/;https://jaehunjung.com;;https://yejinc.github.io/", "dblp": "256/6863;276/6005;24/10879;192/7707;;89/579-1", "google_scholar": "mO_tZ94AAAAJ;viCG2ikAAAAJ;https://scholar.google.com/citations?hl=en;_bXzUGEAAAAJ;;vhP-tlcAAAAJ", "or_profile": "~Skyler_Hallinan1;~Faeze_Brahman1;~Ximing_Lu1;~Jaehun_Jung1;~Sean_Welleck1;~Yejin_Choi1", "aff": "University of Washington;Allen Institute for AI;University of Washington;University of Washington;;Department of Computer Science, University of Washington", "aff_domain": "uw.edu;allenai.org;cs.washington.edu;uw.edu;;cs.washington.edu", "position": "MS student;Postdoc;Undergrad student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nhallinan2023steer,\ntitle={{STEER}: Unified Style Transfer with Expert Reinforcement},\nauthor={Skyler Hallinan and Faeze Brahman and Ximing Lu and Jaehun Jung and Sean Welleck and Yejin Choi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=XIHl40UylS}\n}", "github": "", "project": "", "reviewers": "kv8D;kVMX;3tNE", "site": "https://openreview.net/forum?id=XIHl40UylS", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;3;3", "reproducibility": "4;4;3", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-0292-3074;;", "linkedin": "skyler-hallinan/;;;;;", "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "University of Washington;Allen Institute for AI", "aff_unique_dep": ";", "aff_unique_url": "https://www.washington.edu;https://allenai.org", "aff_unique_abbr": "UW;AI2", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "XILoK6g4va", "title": "Hierarchical Fusion for Online Multimodal Dialog Act Classification", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We propose a framework for online multimodal dialog act (DA) classification based on raw audio and ASR-generated transcriptions of current and past utterances. Existing multimodal DA classification approaches are limited by ineffective audio modeling and late-stage fusion. We showcase significant improvements in multimodal DA classification by integrating modalities at a more granular level and incorporating recent advancements in large language and audio models for audio feature extraction. We further investigate the effectiveness of self-attention and cross-attention mechanisms in modeling utterances and dialogs for DA classification. We achieve a substantial increase of 3 percentage points in the F1 score relative to current state-of-the-art models on two prominent DA classification datasets, MRDA and EMOTyDA.", "keywords": "Dialog Act Classification;Multimodality;Early Fusion;Online Inference", "primary_area": "", "supplementary_material": "", "author": "Md Messal Monem Miah;Adarsh Pyarelal;Ruihong Huang", "authorids": "~Md_Messal_Monem_Miah1;~Adarsh_Pyarelal1;~Ruihong_Huang1", "gender": "M;M;F", "homepage": ";https://adarsh.cc;https://people.engr.tamu.edu/huangrh/index.html", "dblp": "362/8586;242/7424;42/4811.html", "google_scholar": "TP9RQXYAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=NU2aHWUAAAAJ", "or_profile": "~Md_Messal_Monem_Miah1;~Adarsh_Pyarelal1;~Ruihong_Huang1", "aff": "Texas A&M University - College Station;University of Arizona;Texas A&M University", "aff_domain": "tamu.edu;arizona.edu;cse.tamu.edu", "position": "PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nmiah2023hierarchical,\ntitle={Hierarchical Fusion for Online Multimodal Dialog Act Classification},\nauthor={Md Messal Monem Miah and Adarsh Pyarelal and Ruihong Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=XILoK6g4va}\n}", "github": "", "project": "", "reviewers": "ybDR;YaT8;7qHe", "site": "https://openreview.net/forum?id=XILoK6g4va", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;3;4", "reproducibility": "1;4;4", "correctness": "4;3;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-1602-0386;", "linkedin": "md-messal-monem-miah-024358112/;adarshpyarelal/;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Texas A&M University;University of Arizona", "aff_unique_dep": ";", "aff_unique_url": "https://www.tamu.edu;https://www.arizona.edu", "aff_unique_abbr": "TAMU;UA", "aff_campus_unique_index": "0", "aff_campus_unique": "College Station;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "XJRNw74kXK", "title": "POSQA: Probe the World Models of LLMs with Size Comparisons", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Embodied language comprehension emphasizes that language understanding is not solely a matter of mental processing in the brain but also involves interactions with the physical and social environment. With the explosive growth of Large Language Models (LLMs) and their already ubiquitous presence in our daily lives, it is becoming increasingly necessary to verify their real-world understanding. Inspired by cognitive theories, we propose POSQA: a Physical Object Size Question Answering dataset with simple size comparison questions to examine the extremity and analyze the potential mechanisms of the embodied comprehension of the latest LLMs.\n\nWe show that even the largest LLMs today perform poorly under the zero-shot setting. We then push their limits with advanced prompting techniques and external knowledge augmentation. Furthermore, we investigate whether their real-world comprehension primarily derives from contextual information or internal weights and analyse the impact of prompt formats and report bias of different objects. Our results show that real-world understanding that LLMs shaped from textual data can be vulnerable to deception and confusion by the surface form of prompts, which makes it less aligned with human behaviours.", "keywords": "Embodied language comprehension;World Model;Large Language Models;AI Alignment", "primary_area": "", "supplementary_material": "", "author": "Chang Shu;Jiuzhou Han;Fangyu Liu;Ehsan Shareghi;Nigel Collier", "authorids": "~Chang_Shu5;~Jiuzhou_Han1;~Fangyu_Liu1;~Ehsan_Shareghi1;~Nigel_Collier1", "gender": "M;M;M;M;M", "homepage": "https://ciaranshu.github.io;https://jiuzhouh.github.io;http://fangyuliu.me/about;https://eehsan.github.io/;https://sites.google.com/site/nhcollier/", "dblp": ";299/7553;84/11483-1;09/7859;90/2619", "google_scholar": "SxQjvCUAAAAJ;7mAsmgYAAAAJ;https://scholar.google.ch/citations?user=d19PiS0AAAAJ;https://scholar.google.com.au/citations?user=EhnQJFwAAAAJ;https://scholar.google.co.uk/citations?user=ZMelBa0AAAAJ", "or_profile": "~Chang_Shu5;~Jiuzhou_Han1;~Fangyu_Liu1;~Ehsan_Shareghi1;~Nigel_Collier1", "aff": "University of Cambridge;Monash University;University of Cambridge;Monash University;University of Cambridge", "aff_domain": "cam.ac.uk;monash.edu;cam.ac.uk;monash.edu;cam.ac.uk", "position": "PhD student;PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nshu2023posqa,\ntitle={{POSQA}: Probe the World Models of {LLM}s with Size Comparisons},\nauthor={Chang Shu and Jiuzhou Han and Fangyu Liu and Ehsan Shareghi and Nigel Collier},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=XJRNw74kXK}\n}", "github": "", "project": "", "reviewers": "266w;GsC7;gnhd", "site": "https://openreview.net/forum?id=XJRNw74kXK", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;3", "excitement": "3;4;4", "reproducibility": "4;3;2", "correctness": "3;4;4", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-7038-3623;;0000-0002-7230-4164", "linkedin": ";;fangyu-liu-48a003b0/;;", "aff_unique_index": "0;1;0;1;0", "aff_unique_norm": "University of Cambridge;Monash University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.monash.edu", "aff_unique_abbr": "Cambridge;Monash", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;1;0;1;0", "aff_country_unique": "United Kingdom;Australia" }, { "id": "XLXCWNNWvL", "title": "Training Simultaneous Speech Translation with Robust and Random Wait-k-Tokens Strategy", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Simultaneous Speech Translation (SimulST) is a task focused on ensuring high-quality translation of speech in low-latency situations. \nDespite this, the modality gap (\\emph{e.g.}, unknown word boundaries) between audio and text presents a challenge. \nThis gap hinders the effective application of policies from simultaneous text translation (SimulMT) and compromises the performance of offline speech translation. \nTo address this issue, we first leverage the Montreal Forced Aligner (MFA) and utilize audio transcription pairs in pre-training the acoustic encoder, and introduce a token-level cross-modal alignment that allows the wait-$k$ policy from SimulMT to better adapt to SimulST. \nThis token-level boundary alignment simplifies the decision-making process for predicting read/write actions, as if the decoder were directly processing text tokens. \nSubsequently, to optimize the SimulST task, we propose a robust and random wait-$k$-tokens strategy. \nThis strategy allows a single model to meet various latency requirements and minimizes error accumulation of boundary alignment during inference. \nOur experiments on the MuST-C dataset show that our method achieves better trade-off between translation quality and latency.", "keywords": "Simultaneous Speech Translation;Robust and Random Wait-k;Cross-modal alignment", "primary_area": "", "supplementary_material": "", "author": "Linlin Zhang;Kai Fan;Jiajun Bu;Zhongqiang Huang", "authorids": "~Linlin_Zhang1;~Kai_Fan1;~Jiajun_Bu1;~Zhongqiang_Huang1", "gender": "F;M;M;M", "homepage": ";https://scholar.google.com/citations?user=SQqkcdgAAAAJ&hl=zh;https://person.zju.edu.cn/bjj;", "dblp": ";20/3825-2.html;50/3147;10/3565", "google_scholar": ";SQqkcdgAAAAJ;OgZP2okAAAAJ;", "or_profile": "~Linlin_Zhang1;~Kai_Fan1;~Jiajun_Bu1;~Zhongqiang_Huang1", "aff": "Zhejiang University;Alibaba Group;Zhejiang University;Alibaba Group", "aff_domain": "zju.edu.cn;alibaba-inc.com;zju.edu.cn;alibaba-inc.com", "position": "PhD student;Researcher;Full Professor;Senior Staff Engineer", "bibtex": "@inproceedings{\nzhang2023training,\ntitle={Training Simultaneous Speech Translation with Robust and Random Wait-k-Tokens Strategy},\nauthor={Linlin Zhang and Kai Fan and Jiajun Bu and Zhongqiang Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=XLXCWNNWvL}\n}", "github": "", "project": "", "reviewers": "6LVe;j7oz;7R6j", "site": "https://openreview.net/forum?id=XLXCWNNWvL", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;3;4", "reproducibility": "4;2;4", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-8256-0807;0000-0002-1097-2044;", "linkedin": ";;;", "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Zhejiang University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "ZJU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "XMpzcC9L5z", "title": "How Predictable Are Large Language Model Capabilities? A Case Study on BIG-bench", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We investigate the predictability of large language model (LLM) capabilities: given records of past experiments using different model families, numbers of parameters, tasks, and numbers of in-context examples, can we accurately predict LLM performance on new experiment configurations? Answering this question has practical implications for LLM users (e.g., deciding which models to try), developers (e.g., prioritizing evaluation on representative tasks), and the research community (e.g., identifying hard-to-predict capabilities that warrant further investigation).\n\nWe study the performance prediction problem on experiment records from BIG-bench. On a random train-test split, an MLP-based predictor achieves an $R^2$ score greater than 95%, indicating the presence of learnable patterns within the experiment records. We then formulate the problem of searching for \"small-bench,\" an informative subset of BIG-bench tasks from which the performance on the full set can be maximally recovered. We find a subset as informative as BIG-bench Hard for evaluating new model families, while being $3\\times$ smaller. Additionally, we find competitive subsets by clustering task representations learned by our MLP-based predictor and selecting tasks close to cluster centroids, highlighting the importance of task diversity in constructing \"small-bench.\"", "keywords": "large language models;performance prediction;benchmarking", "primary_area": "", "supplementary_material": "", "author": "Qinyuan Ye;Harvey Yiyun Fu;Xiang Ren;Robin Jia", "authorids": "~Qinyuan_Ye1;~Harvey_Yiyun_Fu1;~Xiang_Ren1;~Robin_Jia1", "gender": "F;M;M;M", "homepage": "http://yeqy.xyz/;https://harvey-fin.github.io/;https://shanzhenren.github.io/;https://robinjia.github.io/", "dblp": "239/5731;;36/360-1;182/2556", "google_scholar": "g230ERwAAAAJ;0ZBEwDUAAAAJ;_moJlrIAAAAJ;ajZ-_O0AAAAJ", "or_profile": "~Qinyuan_Ye1;~Harvey_Yiyun_Fu1;~Xiang_Ren1;~Robin_Jia1", "aff": "Microsoft;University of Southern California;University of Southern California;University of Southern California", "aff_domain": "microsoft.com;usc.edu;usc.edu;usc.edu", "position": "Intern;Undergrad student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nye2023how,\ntitle={How Predictable Are Large Language Model Capabilities? A Case Study on {BIG}-bench},\nauthor={Qinyuan Ye and Harvey Yiyun Fu and Xiang Ren and Robin Jia},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=XMpzcC9L5z}\n}", "github": "", "project": "", "reviewers": "6piE;WS63;Ku3n", "site": "https://openreview.net/forum?id=XMpzcC9L5z", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;5;3", "excitement": "3;4;3", "reproducibility": "5;4;4", "correctness": "3;5;2", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";harvey-fu-yiyun/;xren7;", "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Microsoft;University of Southern California", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://www.usc.edu", "aff_unique_abbr": "Microsoft;USC", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "XNnFTKCacy", "title": "Coherent Entity Disambiguation via Modeling Topic and Categorical Dependency", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Previous entity disambiguation (ED) methods adopt a discriminative paradigm, where prediction is made based on matching scores between mention context and candidate entities using length-limited encoders. \nHowever, these methods often struggle to capture explicit discourse-level dependencies, resulting in incoherent predictions at the abstract level (e.g. topic or category).\nWe propose CoherentED, an ED system equipped with novel designs aimed at enhancing the coherence of entity predictions.\nOur method first introduces an unsupervised variational autoencoder (VAE) to extract latent topic vectors of context sentences.\nThis approach not only allows the encoder to handle longer documents more effectively, conserves valuable input space, but also keeps a topic-level coherence.\nAdditionally, we incorporate an external category memory, enabling the system to retrieve relevant categories for undecided mentions.\nBy employing step-by-step entity decisions, \nthis design facilitates the modeling of entity-entity interactions, thereby maintaining maximum coherence at the category level. \nWe achieve new state-of-the-art results on popular ED benchmarks, with an average improvement of 1.3 F1 points. Our model demonstrates particularly outstanding performance on challenging long-text scenarios.", "keywords": "Entity disambiguation;Knowledge base;Entity linking", "primary_area": "", "supplementary_material": "", "author": "Zilin Xiao;Linjun Shou;Xingyao Zhang;Jie Wu;MING GONG;Daxin Jiang", "authorids": "~Zilin_Xiao1;~Linjun_Shou1;~Xingyao_Zhang2;~Jie_Wu15;~MING_GONG2;~Daxin_Jiang2", "gender": "M;M;;M;;M", "homepage": "https://zilin.me/;https://www.microsoft.com/en-us/research/people/lisho/;;http://tobeadded.com;;https://www.microsoft.com/en-us/research/people/djiang/", "dblp": "330/7498;;;;;77/5094", "google_scholar": "IHDbVRoAAAAJ;Tj0DLa0AAAAJ;;;;N-wAHCoAAAAJ", "or_profile": "~Zilin_Xiao1;~Linjun_Shou1;~Xingyao_Zhang2;~Jie_Wu15;~MING_GONG2;~Daxin_Jiang2", "aff": "Microsoft;Microsoft;Microsoft;Microsoft;;Microsoft", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;microsoft.com;;microsoft.com", "position": "Intern;Researcher;Researcher;Researcher;;Researcher/Scientist", "bibtex": "@inproceedings{\nxiao2023coherent,\ntitle={Coherent Entity Disambiguation via Modeling Topic and Categorical Dependency},\nauthor={Zilin Xiao and Linjun Shou and Xingyao Zhang and Jie Wu and MING GONG and Daxin Jiang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=XNnFTKCacy}\n}", "github": "", "project": "", "reviewers": "AHEW;Wj22;LkD8", "site": "https://openreview.net/forum?id=XNnFTKCacy", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;3", "excitement": "4;3;3", "reproducibility": "3;3;4", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";;xingyao-zhang-138474268/?originalSubdomain=hk;jiewu-ecnu/;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Corporation", "aff_unique_url": "https://www.microsoft.com", "aff_unique_abbr": "Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "XQm8tlPKgY", "title": "SCITAB: A Challenging Benchmark for Compositional Reasoning and Claim Verification on Scientific Tables", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Current scientific fact-checking benchmarks exhibit several shortcomings, such as biases arising from crowd-sourced claims and an over-reliance on text-based evidence. We present SCITAB, a challenging evaluation dataset consisting of 1.2K expert-verified scientific claims that 1) originate from authentic scientific publications and 2) require compositional reasoning for verification. The claims are paired with evidence-containing scientific tables annotated with labels. Through extensive evaluations, we demonstrate that SCITAB poses a significant challenge to state-of-the-art models, including table-based pretraining models and large language models. All models except GPT-4 achieved performance barely above random guessing. Popular prompting techniques, such as Chain-of-Thought, do not achieve much performance gains on SCITAB. Our analysis uncovers several unique challenges posed by SCITAB, including table grounding, claim ambiguity, and compositional reasoning. Our codes and data are publicly available at https://github.com/XinyuanLu00/SciTab.", "keywords": "Scientific Fact-Checking;Table Reasoning;Compositional Reasoning;Dataset", "primary_area": "", "supplementary_material": "", "author": "Xinyuan Lu;Liangming Pan;Qian Liu;Preslav Nakov;Min-Yen Kan", "authorids": "~Xinyuan_Lu1;~Liangming_Pan1;~Qian_Liu2;~Preslav_Nakov2;~Min-Yen_Kan1", "gender": "F;M;M;M;M", "homepage": "https://xinyuanlu00.github.io/;https://liangmingpan.bio;http://siviltaram.github.io/;https://mbzuai.ac.ae/study/faculty/preslav-nakov/;https://www.comp.nus.edu.sg/~kanmy/", "dblp": "88/422;186/9707;;https://dblp.uni-trier.de/pid/19/1947;k/MinYenKan", "google_scholar": "-NtdX2sAAAAJ;JcjjOTUAAAAJ;bcbeUo0AAAAJ;DfXsKZ4AAAAJ;https://scholar.google.com.tw/citations?user=aNVcd3EAAAAJ", "or_profile": "~Xinyuan_Lu1;~Liangming_Pan1;~Qian_Liu2;~Preslav_Nakov2;~Min-Yen_Kan1", "aff": "National University of Singapore;University of California, Santa Barbara;Sea AI Lab;Mohamed bin Zayed University of Artificial Intelligence;National University of Singapore", "aff_domain": "u.nus.edu;ucsb.edu;sea.com;mbzuai.ac.ae;nus.edu.sg", "position": "PhD student;Postdoc;Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nlu2023scitab,\ntitle={{SCITAB}: A Challenging Benchmark for Compositional Reasoning and Claim Verification on Scientific Tables},\nauthor={Xinyuan Lu and Liangming Pan and Qian Liu and Preslav Nakov and Min-Yen Kan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=XQm8tlPKgY}\n}", "github": "", "project": "", "reviewers": "BWAe;P1EZ;z4dP", "site": "https://openreview.net/forum?id=XQm8tlPKgY", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "4;3;3", "reproducibility": "4;3;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-3600-1510;", "linkedin": ";;;preslavnakov/;", "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "National University of Singapore;University of California, Santa Barbara;Sea AI Lab;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.nus.edu.sg;https://www.ucsb.edu;;https://mbzuai.ac.ae", "aff_unique_abbr": "NUS;UCSB;;MBZUAI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Santa Barbara", "aff_country_unique_index": "0;1;3;0", "aff_country_unique": "Singapore;United States;;United Arab Emirates" }, { "id": "XT1hoHqs12", "title": "ReadPrompt: A Readable Prompting Method for Reliable Knowledge Probing", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Knowledge probing is a task to assess the knowledge encoded within pre-trained language models (PLMs) by having the PLM complete prompts such as \"Italy is located in \\_\\_,\". The model's prediction precision serves as a lower bound for the amount of knowledge it contains. Subsequent works explore training a series of vectors as prompts to guide PLMs towards more accurate predictions. However, these methods compromise the readability of the prompts. We cannot directly understand these prompts from their literal meaning, making it difficult to verify whether they are correct. Consequently, the credibility of probing results derived from these prompts is diminished. To address the issue, we propose a novel method called ReadPrompt, which aims to identify meaningful sentences to serve as prompts. Experiments show that ReadPrompt achieves state-of-the-art performance on the current knowledge probing benchmark. Moreover, since the prompt is readable, we discovered a misalignment between constructed prompts and knowledge, which is also present in current prompting methods verified by an attack experiment. We claim that the probing outcomes of the current prompting methods are unreliable that overestimate the knowledge contained within PLMs.", "keywords": "Prompt;Pre-trained Language Model;Readability;Knowledge Probing;Fact Retrieval;LAMA Dataset.", "primary_area": "", "supplementary_material": "", "author": "Zezhong WANG;Luyao YE;Hongru WANG;Wai-Chung Kwan;David Ho;Kam-Fai Wong", "authorids": "~Zezhong_WANG1;~Luyao_YE1;~Hongru_WANG1;~Wai-Chung_Kwan2;~David_Ho2;~Kam-Fai_Wong2", "gender": "M;F;M;M;M;", "homepage": ";;https://rulegreen.github.io/;;http://www.se.cuhk.edu.hk/~kfwong;https://kwanwaichung.github.io/", "dblp": "217/9660.html;191/5718;72/1462-3;;w/KamFaiWong;", "google_scholar": "xfl6gcgAAAAJ;https://scholar.google.com/citations?hl=zh-CN;s6UtVYUAAAAJ;;;77Lyt1cAAAAJ", "or_profile": "~Zezhong_WANG1;~Luyao_YE1;~Hongru_WANG1;~David_Ho2;~Kam-Fai_Wong2;~Wai_Chung_Kwan1", "aff": "The Chinese University of Hong Kong;City University of Hong Kong;University of Edinburgh;Chinese University of Hong Kong;The Chinese University of Hong Kong;The Chinese University of Hong Kong", "aff_domain": "cuhk.edu.hk;cityu.edu.hk;ed.ac.uk;cuhk.hk;cuhk.edu.hk;cuhk.edu.hk", "position": "PhD student;PhD student;Visiting Student;Lecturer;Full Professor;PhD student", "bibtex": "@inproceedings{\nwang2023readprompt,\ntitle={ReadPrompt: A Readable Prompting Method for Reliable Knowledge Probing},\nauthor={Zezhong WANG and Luyao YE and Hongru WANG and Wai-Chung Kwan and David Ho and Kam-Fai Wong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=XT1hoHqs12}\n}", "github": "", "project": "", "reviewers": "bnqg;5PKH;S7sg", "site": "https://openreview.net/forum?id=XT1hoHqs12", "pdf_size": 0, "rating": "4;4;4", "confidence": "1;3;4", "excitement": "3;3;4", "reproducibility": "3;4;4", "correctness": "4;2;4", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-4079-0097;0000-0003-0094-5760;0000-0001-5027-0138;0000-0001-8097-4910;0000-0002-9427-5659;", "linkedin": ";luyao-ye-06a265221/?trk=public_profile_samename-profile&originalSubdomain=hk;;;;wai-chung-kwan-46a6bb152", "aff_unique_index": "0;1;2;0;0;0", "aff_unique_norm": "Chinese University of Hong Kong;City University of Hong Kong;University of Edinburgh", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.cityu.edu.hk;https://www.ed.ac.uk", "aff_unique_abbr": "CUHK;CityU;Edinburgh", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "China;United Kingdom" }, { "id": "XW4t7P2hpN", "title": "Shall We Pretrain Autoregressive Language Models with Retrieval? A Comprehensive Study", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large decoder-only language models (LMs) can be largely improved in terms of perplexity by retrieval (e.g., RETRO), but its impact on text generation quality and downstream task accuracy is unclear. Thus, it is still an open question: shall we pretrain large autoregressive LMs with retrieval? To answer it, we perform a comprehensive study on a scalable pre-trained retrieval-augmented LM (i.e., RETRO) compared with standard GPT and retrieval-augmented GPT incorporated at fine-tuning or inference stages. We first provide the recipe to reproduce RETRO up to 9.5B parameters while retrieving a text corpus with 330B tokens. Based on that, we have the following novel findings: i) RETRO outperforms GPT on text generation with much less degeneration (i.e., repetition), moderately higher factual accuracy, and slightly lower toxicity with a nontoxic retrieval database. ii) On the LM Evaluation Harness benchmark, RETRO largely outperforms GPT on knowledge-intensive tasks, but is on par with GPT on other tasks. Furthermore, we introduce a simple variant of the model, RETRO++, which largely improves open-domain QA results of original RETRO (e.g., EM score +8.6 on Natural Question) and significantly outperforms retrieval-augmented GPT across different model sizes. Our findings highlight the promising direction of pretraining autoregressive LMs with retrieval as future foundation models. We release our implementation at: https://github.com/NVIDIA/Megatron-LM/tree/main/tools/retro.", "keywords": "large language model;pretraining;retrieval augmentation;retro;knowledge retrieval", "primary_area": "", "supplementary_material": "", "author": "Boxin Wang;Wei Ping;Peng Xu;Lawrence McAfee;Zihan Liu;Mohammad Shoeybi;Yi Dong;Oleksii Kuchaiev;Bo Li;Chaowei Xiao;Anima Anandkumar;Bryan Catanzaro", "authorids": "~Boxin_Wang1;~Wei_Ping1;~Peng_Xu7;~Lawrence_McAfee1;~Zihan_Liu2;~Mohammad_Shoeybi1;~Yi_Dong4;~Oleksii_Kuchaiev1;~Bo_Li19;~Chaowei_Xiao2;~Anima_Anandkumar1;~Bryan_Catanzaro1", "gender": ";M;M;M;M;M;M;;F;M;F;M", "homepage": "https://wbx.life;https://wpingnet.github.io/;https://scholar.google.com.hk/citations?user=PQ26NTIAAAAJ&hl=en;https://nvidia.com;https://zliucr.github.io;;;http://www.kuchaev.com;http://boli.cs.illinois.edu/;https://ctnzr.io;http://tensorlab.cms.caltech.edu/users/anima/;https://xiaocw11.github.io/", "dblp": "236/6319;08/8399.html;84/586-8;;46/9231;53/9742;;;50/3402-26;14/4826;;150/3317", "google_scholar": "YOf2ATIAAAAJ;6gKEYRgAAAAJ;https://scholar.google.com.hk/citations?user=PQ26NTIAAAAJ;;LPabcsYAAAAJ;62ElavIAAAAJ;;qmmIGnwAAAAJ;K8vJkTcAAAAJ;UZ6kI2AAAAAJ;bEcLezcAAAAJ;Juoqtj8AAAAJ", "or_profile": "~Boxin_Wang1;~Wei_Ping1;~Peng_Xu7;~Lawrence_McAfee1;~Zihan_Liu2;~Mohammad_Shoeybi1;~Yi_Dong4;~Oleksii_Kuchaiev1;~Bo_Li19;~Bryan_Catanzaro1;~anima_anandkumar1;~chaowei_xiao1", "aff": "Department of Computer Science, University of Illinois, Urbana Champaign;NVIDIA;NVIDIA;NVIDIA;NVIDIA;NVIDIA;NVIDIA;NVIDIA;University of Illinois, Urbana Champaign;NVIDIA;California Institute of Technology;Arizona State University", "aff_domain": "cs.illinois.edu;nvidia.com;nvidia.com;nvidia.com;nvidia.com;nvidia.com;nvidia.com;nvidia.com;illinois.edu;nvidia.com;caltech.edu;asu.edu", "position": "PhD student;Principal Researcher;Researcher;Researcher;Researcher;Director of Applied Resesrch;Researcher;Principal Researcher;Assistant Professor;Vice President;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2023shall,\ntitle={Shall We Pretrain Autoregressive Language Models with Retrieval? A Comprehensive Study},\nauthor={Boxin Wang and Wei Ping and Peng Xu and Lawrence McAfee and Zihan Liu and Mohammad Shoeybi and Yi Dong and Oleksii Kuchaiev and Bo Li and Chaowei Xiao and Anima Anandkumar and Bryan Catanzaro},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=XW4t7P2hpN}\n}", "github": "", "project": "", "reviewers": "HmWG;jg92;Q4QN", "site": "https://openreview.net/forum?id=XW4t7P2hpN", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;3", "excitement": "3;4;4", "reproducibility": "3;3;5", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 12, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;;;0000-0003-0034-7728;;0000-0002-7043-4926", "linkedin": ";wei-ping/;;;;shoeybi/;yi-dong-04057b18;oleksiikuchaiev/;;bryancatanzaro/;anima-anandkumar-35171b1/;", "aff_unique_index": "0;1;1;1;1;1;1;1;0;1;2;3", "aff_unique_norm": "University of Illinois Urbana-Champaign;NVIDIA;California Institute of Technology;Arizona State University", "aff_unique_dep": "Department of Computer Science;NVIDIA Corporation;;", "aff_unique_url": "https://illinois.edu;https://www.nvidia.com;https://www.caltech.edu;https://www.asu.edu", "aff_unique_abbr": "UIUC;NVIDIA;Caltech;ASU", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Urbana-Champaign;;Pasadena", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "XX73vFMemG", "title": "Co-training and Co-distillation for Quality Improvement and Compression of Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Knowledge Distillation (KD) compresses computationally expensive pre-trained language models (PLMs) by transferring their knowledge to smaller models, allowing their use in resource-constrained or real-time settings. However, most smaller models fail to surpass the performance of the original larger model, resulting in sacrificing performance to improve inference speed. To address this issue, we propose Co-Training and Co-Distillation (CTCD), a novel framework that improves performance and inference speed together by co-training two models while mutually distilling knowledge. The CTCD framework successfully achieves this based on two significant findings: 1) Distilling knowledge from the smaller model to the larger model during co-training improves the performance of the larger model. 2) The enhanced performance of the larger model further boosts the performance of the smaller model. The CTCD framework shows promise as it can be combined with existing techniques like architecture design or data augmentation, replacing one-way KD methods, to achieve further performance improvement. Extensive ablation studies demonstrate the effectiveness of CTCD, and the small model distilled by CTCD outperforms the original larger model by a significant margin of 1.66 on the GLUE benchmark.", "keywords": "Knowledge Distillation", "primary_area": "", "supplementary_material": "", "author": "Hayeon Lee;Rui Hou;Jongpil Kim;Davis Liang;Hongbo Zhang;Sung Ju Hwang;Alexander Min", "authorids": "~Hayeon_Lee1;~Rui_Hou3;~Jongpil_Kim2;~Davis_Liang1;~Hongbo_Zhang6;~Sung_Ju_Hwang1;~Alexander_Min1", "gender": "F;M;M;M;M;;", "homepage": "https://hayeonlee.github.io/;;;https://www.davisliang.com;;;", "dblp": "246/4987;;45/5331;206/6843;;;", "google_scholar": "5DaLgBUAAAAJ;;;9lh2gH8AAAAJ;https://scholar.google.com/citations?hl=en;;yVmNA2MAAAAJ", "or_profile": "~Hayeon_Lee1;~Rui_Hou3;~Jongpil_Kim2;~Davis_Liang1;~Hongbo_Zhang6;~Sung_Ju_Hwang1;~Alexander_Min1", "aff": "Korea Advanced Institute of Science & Technology;Meta Inc. ;;Meta ;Meta Facebook;;Meta Facebook", "aff_domain": "kaist.ac.kr;meta.inc;;meta.com;facebook.com;;meta.com", "position": "PhD student;Research Scientist;;Researcher;Research Scientist;;Research Scientist", "bibtex": "@inproceedings{\nlee2023cotraining,\ntitle={Co-training and Co-distillation for Quality Improvement and Compression of Language Models},\nauthor={Hayeon Lee and Rui Hou and Jongpil Kim and Davis Liang and Hongbo Zhang and Sung Ju Hwang and Alexander Min},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=XX73vFMemG}\n}", "github": "", "project": "", "reviewers": "kfJK;Syen;BAue", "site": "https://openreview.net/forum?id=XX73vFMemG", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;3;3", "reproducibility": "4;3;3", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;", "linkedin": ";rayhou/;;;;;", "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.kaist.ac.kr;https://www.meta.com", "aff_unique_abbr": "KAIST;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "South Korea;United States" }, { "id": "XbcprEi57p", "title": "Referring Image Segmentation via Joint Mask Contextual Embedding Learning and Progressive Alignment Network", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Referring image segmentation is a task that aims to predict pixel-wise masks corresponding to objects in an image described by natural language expressions. Previous methods for referring image segmentation employ a cascade framework to break down complex problems into multiple stages. However, its defects also obvious: existing methods within the cascade framework may encounter challenges in both maintaining a strong focus on the most relevant information during specific stages of the referring image segmentation process and rectifying errors propagated from early stages, which can ultimately result in sub-optimal performance. To address these limitations, we propose the Joint Mask Contextual Embedding Learning Network (JMCELN). JMCELN is designed to enhance the Cascade Framework by incorporating a Learnable Contextual Embedding and a Progressive Alignment Network (PAN). The Learnable Contextual Embedding module dynamically stores and utilizes reasoning information based on the current mask prediction results, enabling the network to adaptively capture and refine pertinent information for improved mask prediction accuracy. Furthermore, the Progressive Alignment Network (PAN) is introduced as an integral part of JMCELN. PAN leverages the output from the previous layer as a filter for the current output, effectively reducing inconsistencies between predictions from different stages. By iteratively aligning the predictions, PAN guides the Learnable Contextual Embedding to incorporate more discriminative information for reasoning, leading to enhanced prediction quality and a reduction in error propagation. With these methods, we achieved state-of-the-art results on three commonly used benchmarks, especially in more intricate datasets. The code will be released.", "keywords": "segmentation;multi-modality", "primary_area": "", "supplementary_material": "", "author": "ZILING HUANG;Shin'ichi Satoh", "authorids": "~ZILING_HUANG3;~Shin'ichi_Satoh1", "gender": "F;M", "homepage": "http://www.satoh-lab.nii.ac.jp;http://www.satoh-lab.nii.ac.jp/", "dblp": ";50/290", "google_scholar": ";https://scholar.google.co.jp/citations?user=7aEF5cQAAAAJ", "or_profile": "~ZILING_HUANG3;~Shin'ichi_Satoh1", "aff": "The University of Tokyo;National Institute of Informatics", "aff_domain": "u-tokyo.ac.jp;nii.ac.jp", "position": "PhD student;Professor", "bibtex": "@inproceedings{\nhuang2023referring,\ntitle={Referring Image Segmentation via Joint Mask Contextual Embedding Learning and Progressive Alignment Network},\nauthor={ZILING HUANG and Shin'ichi Satoh},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=XbcprEi57p}\n}", "github": "", "project": "", "reviewers": "hADV;mgwW;i668", "site": "https://openreview.net/forum?id=XbcprEi57p", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;3", "excitement": "3;3;4", "reproducibility": "4;4;4", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-6995-6447", "linkedin": "ziling-huang-7a358b15a;shin-ichi-satoh-a8669573/", "aff_unique_index": "0;1", "aff_unique_norm": "University of Tokyo;National Institute of Informatics", "aff_unique_dep": ";", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.nii.ac.jp/", "aff_unique_abbr": "UTokyo;NII", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "id": "XcNXOVhNlN", "title": "Reasoning Makes Good Annotators : An Automatic Task-specific Rules Distilling Framework for Low-resource Relation Extraction", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Relation extraction is often challenged by insufficient labeled data. Previous methods exploit knowledge from unlabeled data by generating pseudo labels in a self-training pipeline, which suffers a gradual drift problem. Logic rules, a transferable and explainable form of expert knowledge, have achieved promising success by improving the model with weak labels. But manually writing comprehensive rules set is challenging and tedious. To alleviate the human labor of writing high-quality rules, in this work, we propose ARIA, an Automatic task-specific Rules distilling framework. Specifically, we guide the pre-trained language model to reason rules as experts and compose them into robust compound rules for data labeling. Besides, ARIA could continuously enrich the rules set to power the labeling ability by discovering reliable model-labeled data for distinguishable rules generation. Experiments on two public datasets demonstrate the effectiveness of ARIA in a low-resource scenario.", "keywords": "relation extraction;language model;rule mining and pattern mining", "primary_area": "", "supplementary_material": "", "author": "Yilin Lu;Juncheng Li;Xiaoqiang Wang;Haochen Shi;Tao Chen;Siliang Tang", "authorids": "~Yilin_Lu1;~Juncheng_Li3;~Xiaoqiang_Wang3;~Haochen_Shi1;~Tao_Chen8;~Siliang_Tang1", "gender": "F;M;M;;M;M", "homepage": "https://www.researchgate.net/profile/Yilin-Lu-8;;https://scholar.google.com/citations?user=Ox5mwngAAAAJ&hl=en;;;https://person.zju.edu.cn/en/siliang", "dblp": ";182/7674-6;72/5143-7;;;44/5693", "google_scholar": ";lm9s-QgAAAAJ;Ox5mwngAAAAJ;;iapJOaoAAAAJ;8e7H3PcAAAAJ", "or_profile": "~Yilin_Lu1;~Juncheng_Li3;~Xiaoqiang_Wang3;~Haochen_Shi1;~Tao_Chen8;~Siliang_Tang1", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;;;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;;;zju.edu.cn", "position": "MS student;PhD student;MS student;;;Full Professor", "bibtex": "@inproceedings{\nlu2023reasoning,\ntitle={Reasoning Makes Good Annotators : An Automatic Task-specific Rules Distilling Framework for Low-resource Relation Extraction},\nauthor={Yilin Lu and Juncheng Li and Xiaoqiang Wang and Haochen Shi and Tao Chen and Siliang Tang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=XcNXOVhNlN}\n}", "github": "", "project": "", "reviewers": "Hf2b;WGwB;g2bw", "site": "https://openreview.net/forum?id=XcNXOVhNlN", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;5;4", "excitement": "3;2;3", "reproducibility": "3;1;3", "correctness": "4;2;4", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-2258-1291;;;;0000-0002-7356-9711", "linkedin": ";;xiaoqiang-wang-0596ba166/;;;siliang-tang-4734272a/", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "XclSRY9Wp8", "title": "Modeling Conceptual Attribute Likeness and Domain Inconsistency for Metaphor Detection", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Metaphor detection is an important and challenging task in natural language processing, which aims to distinguish between metaphorical and literal expressions in text. Previous studies mainly leverage the incongruity of source and target domains and contextual clues for detection, neglecting similar attributes shared between source and target concepts in metaphorical expressions. Based on conceptual metaphor theory, these similar attributes are essential to infer implicit meanings conveyed by the metaphor. Under the guidance of conceptual metaphor theory, in this paper, we model the likeness of attribute for the first time and propose a novel Attribute Likeness and Domain Inconsistency Learning framework (AIDIL) for word-pair metaphor detection. Specifically, we propose an attribute siamese network to mine similar attributes between source and target concepts. We then devise a domain contrastive learning strategy to learn the semantic inconsistency of concepts in source and target domains. Extensive experiments on four datasets verify that our method significantly outperforms the previous state-of-the-art methods, and demonstrate the generalization ability of our method.", "keywords": "Metaphor detection;Attribute likeness;Attribute siamese network;Conceptual metaphor theory", "primary_area": "", "supplementary_material": "", "author": "Yuan Tian;Nan Xu;Wenji Mao;Daniel Dajun Zeng", "authorids": "~Yuan_Tian10;~Nan_Xu1;~Wenji_Mao1;~Daniel_Dajun_Zeng1", "gender": ";M;F;M", "homepage": ";;;", "dblp": ";;16/2159.html;z/DanielDajunZeng", "google_scholar": ";https://scholar.google.com.hk/citations?user=oJcp__wAAAAJ;h6m4X_AAAAAJ;d-tAMlYAAAAJ", "or_profile": "~Yuan_Tian10;~Nan_Xu1;~Wenji_Mao1;~Daniel_Dajun_Zeng1", "aff": ";Institute of Automation\uff0cChinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences", "aff_domain": ";ia.ac.cn;ia.ac.cn;ia.ac.cn", "position": ";Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\ntian2023modeling,\ntitle={Modeling Conceptual Attribute Likeness and Domain Inconsistency for Metaphor Detection},\nauthor={Yuan Tian and Nan Xu and Wenji Mao and Daniel Dajun Zeng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=XclSRY9Wp8}\n}", "github": "", "project": "", "reviewers": "Gckv;kRQ4;Kgcm", "site": "https://openreview.net/forum?id=XclSRY9Wp8", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;2;4", "excitement": "3;4;4", "reproducibility": "4;2;3", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-9046-222X", "linkedin": ";;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation", "aff_unique_url": "http://www.ia.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "Xd2A31vcLd", "title": "ReSee: Responding through Seeing Fine-grained Visual Knowledge in Open-domain Dialogue", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Incorporating visual knowledge into text-only dialogue systems has become a potential direction to imitate the way humans think, imagine, and communicate. However, existing multimodal dialogue systems are either confined by the scale and quality of available datasets or the coarse concept of visual knowledge. To address these issues, we provide a new paradigm of constructing multimodal dialogues as well as two datasets extended from text-only dialogues under such paradigm (ReSee-$\\texttt{WoW}$, ReSee-$\\texttt{DD}$). We propose to explicitly split the visual knowledge into finer granularity (\"turn-level\" and \"entity-level\"). To further boost the accuracy and diversity of augmented visual information, we retrieve them from the Internet or a large image dataset. To demonstrate the superiority and universality of the provided visual knowledge, we propose a simple but effective framework ReSee to add visual representation into vanilla dialogue models by modality concatenations. We also conduct extensive experiments and ablations w.r.t. different model configurations and visual knowledge settings. Empirical, encouraging results not only demonstrate the effectiveness of introducing visual knowledge at both entity and turn level but also verify the proposed model ReSee outperforms several state-of-the-art methods on automatic and human evaluations. By leveraging text and vision knowledge, ReSee can produce informative responses with real-world visual concepts. Our code is available at https://github.com/ImKeTT/ReSee.", "keywords": "visual dialogue;multimodal dataset;knowledge-enhanced dialogue;pre-trained language model", "primary_area": "", "supplementary_material": "", "author": "Haoqin Tu;Yitong Li;Fei Mi;Zhongliang Yang", "authorids": "~Haoqin_Tu1;~Yitong_Li2;~Fei_Mi1;~Zhongliang_Yang1", "gender": "M;M;M;", "homepage": "https://www.haqtu.me;https://lrank.github.io/;https://mifei.github.io/;", "dblp": "309/7386;;161/0068;", "google_scholar": "https://scholar.google.com/citations?hl=en;ho4FGlsAAAAJ;gX3493QAAAAJ;jQ8vpPkAAAAJ", "or_profile": "~Haoqin_Tu1;~Yitong_Li2;~Fei_Mi1;~Zhongliang_Yang1", "aff": "University of Chinese Academy of Sciences;Huawei Technologies Co., Ltd.;;", "aff_domain": "ucas.ac.cn;huawei.com;;", "position": "MS student;Research Scientist;;", "bibtex": "@inproceedings{\ntu2023resee,\ntitle={ReSee: Responding through Seeing Fine-grained Visual Knowledge in Open-domain Dialogue},\nauthor={Haoqin Tu and Yitong Li and Fei Mi and Zhongliang Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Xd2A31vcLd}\n}", "github": "", "project": "", "reviewers": "ks6W;oqrU;cBSK", "site": "https://openreview.net/forum?id=Xd2A31vcLd", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;3;4", "reproducibility": "3;4;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;1", "aff_unique_norm": "University of Chinese Academy of Sciences;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "http://www.ucas.ac.cn;https://www.huawei.com", "aff_unique_abbr": "UCAS;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "XhR6ebeEXo", "title": "Good Meta-tasks Make A Better Cross-lingual Meta-transfer Learning for Low-resource Languages", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Model-agnostic meta-learning has garnered attention as a promising technique for enhancing few-shot cross-lingual transfer learning in low-resource scenarios. However, little attention was paid to the impact of data selection strategies on this cross-lingual meta-transfer method, particularly the sampling of cross-lingual meta-training data (i.e. meta-tasks) at the syntactic level to reduce language gaps. In this paper, we propose a Meta-Task Collector-based Cross-lingual Meta-Transfer framework (MeTaCo-XMT) to adapt different data selection strategies to construct meta-tasks for meta-transfer learning. Syntactic differences have an effect on transfer performance, so we consider a syntactic similarity sampling strategy and propose a syntactic distance metric model consisting of a syntactic encoder block based on the pre-trained model and a distance metric block using Word Move's Distance (WMD). Additionally, we conduct experiments with three different data selection strategies to instantiate our framework and analyze their performance impact. Experimental results on two multilingual NLP datasets, Wikiann and TydiQA, demonstrate the significant superiority of our approach compared to existing strong baselines.", "keywords": "Few-shot Cross-lingual Transfer Learning;Low-resource Languages;Model-agnostic Meta-learning", "primary_area": "", "supplementary_material": "", "author": "Linjuan Wu;Zongyi Guo;Baoliang Cui;Haihong Tang;Weiming Lu", "authorids": "~Linjuan_Wu1;~Zongyi_Guo1;~Baoliang_Cui1;~Haihong_Tang1;~Weiming_Lu1", "gender": "F;M;M;F;", "homepage": ";https://www.linkedin.cn/incareer/in/ACoAABQq-0YB0_bM3S-4KPK0XxB8QNRYGGsLs5o;;;", "dblp": "https://dblp.uni-trier.de/pid/262/2608;;;234/6882;", "google_scholar": "https://scholar.google.com.hk/citations?user=lZbrKQEAAAAJ;;00Gxg34AAAAJ;https://scholar.google.com/citations?hl=en;", "or_profile": "~Linjuan_Wu1;~Zongyi_Guo1;~Baoliang_Cui1;~Haihong_Tang1;~Weiming_Lu1", "aff": "Zhejiang University;;;Alibaba Group;", "aff_domain": "zju.edu.cn;;;alibaba-inc.com;", "position": "PhD student;;;Researcher;", "bibtex": "@inproceedings{\nwu2023good,\ntitle={Good Meta-tasks Make A Better Cross-lingual Meta-transfer Learning for Low-resource Languages},\nauthor={Linjuan Wu and Zongyi Guo and Baoliang Cui and Haihong Tang and Weiming Lu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=XhR6ebeEXo}\n}", "github": "", "project": "", "reviewers": "5wKC;CVtE;cvjj", "site": "https://openreview.net/forum?id=XhR6ebeEXo", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;4", "excitement": "3;3;3", "reproducibility": "4;5;4", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-7103-975X;", "linkedin": ";;https://www.linkedin.cn/injobs/in/%E4%BF%9D%E8%89%AF-%E5%B4%94-7b11a592;;", "aff_unique_index": "0;1", "aff_unique_norm": "Zhejiang University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "ZJU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "XjwNxSE0v8", "title": "Learning Easily Updated General Purpose Text Representations with Adaptable Task-Specific Prefix", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Many real-world applications require making multiple predictions from the same text. Fine-tuning a large pre-trained language model for each downstream task causes computational burdens in the inference time due to several times of forward passes. To amortize the computational cost, freezing the language model and building lightweight models for downstream tasks based on fixed text representations are common solutions. Accordingly, how to learn fixed but general text representations that can generalize well to unseen downstream tasks becomes a challenge. Previous works have shown that the generalizability of representations can be improved by fine-tuning the pre-trained language model with some source tasks in a multi-tasking way. In this work, we propose a prefix-based method to learn the fixed text representations with source tasks. We learn a task-specific prefix for each source task independently and combine them to get the final representations. Our experimental results show that prefix-based training performs better than multi-tasking training and can update the text representations at a smaller computational cost than multi-tasking training.", "keywords": "text representations;prefix tuning", "primary_area": "", "supplementary_material": "", "author": "Kuan-Hao Huang;Liang Tan;Rui Hou;Sinong Wang;Amjad Almahairi;Ruty Rinott", "authorids": "~Kuan-Hao_Huang1;~Liang_Tan1;~Rui_Hou3;~Sinong_Wang1;~Amjad_Almahairi1;~Ruty_Rinott1", "gender": "M;M;M;M;M;", "homepage": "https://khhuang.me;;;https://sites.google.com/site/snongwang/;;", "dblp": "24/255;;;140/0795;167/5984;29/8323", "google_scholar": "PIWnCdYAAAAJ;N3rZr9kAAAAJ;;CYMAfxsAAAAJ;https://scholar.google.ca/citations?user=WbYAa7IAAAAJ;", "or_profile": "~Kuan-Hao_Huang1;~Liang_Tan1;~Rui_Hou3;~Sinong_Wang1;~Amjad_Almahairi1;~Ruty_Rinott1", "aff": "University of California, Los Angeles;Meta Facebook;Meta Inc. ;Meta Facebook;Meta Facebook;", "aff_domain": "ucla.edu;meta.com;meta.inc;fb.com;facebook.com;", "position": "PhD student;Researcher;Research Scientist;Research scientist;Research Scientist;", "bibtex": "@inproceedings{\nhuang2023learning,\ntitle={Learning Easily Updated General Purpose Text Representations with Adaptable Task-Specific Prefix},\nauthor={Kuan-Hao Huang and Liang Tan and Rui Hou and Sinong Wang and Amjad Almahairi and Ruty Rinott},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=XjwNxSE0v8}\n}", "github": "", "project": "", "reviewers": "asuG;hBJN;tLRw;VDm4", "site": "https://openreview.net/forum?id=XjwNxSE0v8", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;3;3", "excitement": "3;3;4;2", "reproducibility": "4;4;4;3", "correctness": "4;4;4;2", "rating_avg": 3.0, "confidence_avg": 3.5, "excitement_avg": 3.0, "reproducibility_avg": 3.75, "correctness_avg": 3.5, "replies_avg": 13, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";liang-tan-6646a484/;rayhou/;wang-s-simon-194512a7;;", "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "University of California, Los Angeles;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.ucla.edu;https://meta.com", "aff_unique_abbr": "UCLA;Meta", "aff_campus_unique_index": "0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "XkexLrJDss", "title": "Exploring the Potential of Large Language Models in Generating Code-Tracing Questions for Introductory Programming Courses", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "In this paper, we explore the application of large language models (LLMs) for generating code-tracing questions in introductory programming courses. We designed targeted prompts for GPT4, guiding it to generate code-tracing questions based on code snippets and descriptions. We established a set of human evaluation metrics to assess the quality of questions produced by the model compared to those created by human experts. Our analysis provides insights into the capabilities and potential of LLMs in generating diverse code-tracing questions. Additionally, we present a unique dataset of human and LLM-generated tracing questions, serving as a valuable resource for both the education and NLP research communities. This work contributes to the ongoing dialogue on the potential uses of LLMs in educational settings.", "keywords": "Large Language Model;Natural Language Processing;Computer Science Education;Novice Programming", "primary_area": "", "supplementary_material": "", "author": "Aysa Xuemo Fan;Haoran Ranran Zhang;Luc Paquette;Rui Zhang", "authorids": "~Aysa_Xuemo_Fan1;~Haoran_Ranran_Zhang1;~Luc_Paquette2;~Rui_Zhang7", "gender": "Non-Binary;M;M;M", "homepage": "https://github.com/aysafanxm;https://windchimeran.github.io;;https://ryanzhumich.github.io/", "dblp": ";;;60/2536-37", "google_scholar": ";aDqdjcUAAAAJ;8sFPUZcAAAAJ;nhuB5CEAAAAJ", "or_profile": "~Aysa_Xuemo_Fan1;~Haoran_Ranran_Zhang1;~Luc_Paquette2;~Rui_Zhang7", "aff": "University of Illinois, Urbana Champaign;Pennsylvania State University;University of Illinois Urbana-Champaign;Pennsylvania State University", "aff_domain": "uiuc.edu;psu.edu;education.illinois.edu;psu.edu", "position": "PhD student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nfan2023exploring,\ntitle={Exploring the Potential of Large Language Models in Generating Code-Tracing Questions for Introductory Programming Courses},\nauthor={Aysa Xuemo Fan and Haoran Ranran Zhang and Luc Paquette and Rui Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=XkexLrJDss}\n}", "github": "", "project": "", "reviewers": "Qaqj;TK2P;t9s4", "site": "https://openreview.net/forum?id=XkexLrJDss", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;5;3", "excitement": "4;2;2", "reproducibility": "4;4;4", "correctness": "4;3;2", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "xuemo-aysa-fan/;;;", "aff_unique_index": "0;1;0;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;Pennsylvania State University", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://www.psu.edu", "aff_unique_abbr": "UIUC;PSU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "XlIrJUKTgS", "title": "Improving Seq2Seq Grammatical Error Correction via Decoding Interventions", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The sequence-to-sequence (Seq2Seq) approach has recently been widely used in grammatical error correction (GEC) and shows promising performance.\nHowever, the Seq2Seq GEC approach still suffers from two issues.\nFirst, a Seq2Seq GEC model can only be trained on parallel data, which, in GEC task, is often noisy and limited in quantity.\nSecond, the decoder of a Seq2Seq GEC model lacks an explicit awareness of the correctness of the token being generated.\nIn this paper, we propose a unified decoding intervention framework that employs an external critic to assess the appropriateness of the token to be generated incrementally, and then dynamically influence the choice of the next token.\nWe discover and investigate two types of critics: a pre-trained left-to-right language model critic and an incremental target-side grammatical error detector critic.\nThrough extensive experiments on English and Chinese datasets, our framework consistently outperforms strong baselines and achieves results competitive with state-of-the-art methods.", "keywords": "grammatical error correction;decoding;sequence-to-sequence;seq2seq", "primary_area": "", "supplementary_material": "", "author": "Houquan Zhou;Yumeng Liu;Zhenghua Li;Min Zhang;Bo Zhang;Chen Li;Ji Zhang;Fei Huang", "authorids": "~Houquan_Zhou1;~Yumeng_Liu2;~Zhenghua_Li1;~Min_Zhang9;~Bo_Zhang15;~Chen_Li20;~Ji_Zhang3;~Fei_Huang1", "gender": "M;M;M;M;M;Not Specified;;M", "homepage": "https://cv.hqzhou.com;https://github.com/ymliucs;https://web.suda.edu.cn/zhli13/;https://zhangmin-nlp-ai.github.io/;;;;https://sites.google.com/view/fei-huang", "dblp": "221/7847;;72/8937;83/5342-5;;;86/1953-11;h/FeiHuang.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.ca/citations?user=faXAgZQAAAAJ;https://scholar.google.com/citations?hl=zh-CN;xo6U21oAAAAJ;FFISHC4AAAAJ;cgnuJDUAAAAJ;9r98PpoAAAAJ", "or_profile": "~Houquan_Zhou1;~Yumeng_Liu2;~Zhenghua_Li1;~Min_Zhang9;~Bo_Zhang15;~Chen_Li20;~Ji_Zhang3;~Fei_Huang2", "aff": "Soochow University, China;Soochow University, China;Soochow University;Harbin Institute of Technology, Shenzhen;;Alibaba Group;Alibaba Group;Alibaba Group US", "aff_domain": "suda.edu.cn;suda.edu.cn;suda.edu.cn;hit.edu.cn;;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com", "position": "PhD student;MS student;Professor;Full Professor;;Principal Researcher;Senior Staff Engineer;Senior Research Director", "bibtex": "@inproceedings{\nzhou2023improving,\ntitle={Improving Seq2Seq Grammatical Error Correction via Decoding Interventions},\nauthor={Houquan Zhou and Yumeng Liu and Zhenghua Li and Min Zhang and Bo Zhang and Chen Li and Ji Zhang and Fei Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=XlIrJUKTgS}\n}", "github": "", "project": "", "reviewers": "wsFU;cq4q;izec;hcJu", "site": "https://openreview.net/forum?id=XlIrJUKTgS", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "5;3;3;4", "excitement": "2;3;4;2", "reproducibility": "3;3;4;3", "correctness": "3;3;4;4", "rating_avg": 3.0, "confidence_avg": 3.75, "excitement_avg": 2.75, "reproducibility_avg": 3.25, "correctness_avg": 3.5, "replies_avg": 13, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0004-4607-7641;;0000-0002-3911-801X;;;;;", "linkedin": ";;;;;;;fei-huang-cas-cmu", "aff_unique_index": "0;0;0;1;2;2;2", "aff_unique_norm": "Soochow University;Harbin Institute of Technology;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.soochow.edu.cn;http://en.hhit.edu.cn/;https://www.alibaba.com", "aff_unique_abbr": "Soochow U;HIT;Alibaba", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;0;0;1", "aff_country_unique": "China;United States" }, { "id": "XmS9J3Lvip", "title": "Towards Formality-Aware Neural Machine Translation by Leveraging Context Information", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Formality is one of the most important linguistic properties to determine the naturalness of translation. Although a target-side context contains formality-related tokens, the sparsity within the context makes it difficult for context-aware neural machine translation (NMT) models to properly discern them. In this paper, we introduce a novel training method to explicitly inform the NMT model by pinpointing key informative tokens using a formality classifier. Given a target context, the formality classifier guides the model to concentrate on the formality-related tokens within the context. Additionally, we modify the standard cross-entropy loss, especially toward the formality-related tokens obtained from the classifier. Experimental results show that our approaches not only improve overall translation quality but also reflect the appropriate formality from the target context.", "keywords": "Neural Machine Translation;Context-Aware Translation;Formality-Aware Translation;Formality Control", "primary_area": "", "supplementary_material": "", "author": "Dohee Kim;Yujin Baek;Soyoung Yang;Jaegul Choo", "authorids": "~Dohee_Kim1;~Yujin_Baek1;~Soyoung_Yang1;~Jaegul_Choo1", "gender": "F;F;F;M", "homepage": ";;;https://sites.google.com/site/jaegulchoo/", "dblp": ";225/4942;239/8032;07/2074", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;5Mw3sVAAAAAJ;GHJYsLEAAAAJ", "or_profile": "~Dohee_Kim1;~Yujin_Baek1;~Soyoung_Yang1;~Jaegul_Choo1", "aff": "Korea Advanced Institute of Science & Technology;KAIST;SAIT;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;ee.kaist.ac.kr;samsung.co.kr;kaist.ac.kr", "position": "MS student;PhD student;Intern;Associate Professor", "bibtex": "@inproceedings{\nkim2023towards,\ntitle={Towards Formality-Aware Neural Machine Translation by Leveraging Context Information},\nauthor={Dohee Kim and Yujin Baek and Soyoung Yang and Jaegul Choo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=XmS9J3Lvip}\n}", "github": "", "project": "", "reviewers": "P8XY;7DuG;9XVS", "site": "https://openreview.net/forum?id=XmS9J3Lvip", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;3;2", "reproducibility": "4;4;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;soyoung-yang-b96032166/;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Southern Alberta Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.kaist.ac.kr;https://www.sait.ca", "aff_unique_abbr": "KAIST;SAIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "South Korea;Canada" }, { "id": "XpK2LCt8iM", "title": "Turn-Level Active Learning for Dialogue State Tracking", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Dialogue state tracking (DST) plays an important role in task-oriented dialogue systems. However, collecting a large amount of turn-by-turn annotated dialogue data is costly and inefficient. In this paper, we propose a novel turn-level active learning framework for DST to actively select turns in dialogues to annotate. Given the limited labelling budget, experimental results demonstrate the effectiveness of selective annotation of dialogue turns. Additionally, our approach can effectively achieve comparable DST performance to traditional training approaches with significantly less annotated data, which provides a more efficient way to annotate new dialogue data.", "keywords": "dialogue state tracking;active learning;data annotation", "primary_area": "", "supplementary_material": "", "author": "Zihan Zhang;Meng Fang;Fanghua Ye;Ling Chen;Mohammad Reza Namazi Rad", "authorids": "~Zihan_Zhang3;~Meng_Fang1;~Fanghua_Ye1;~Ling_Chen5;~Mohammad_Reza_Namazi_Rad2", "gender": "M;M;M;F;M", "homepage": "https://zhangzihangit.github.io/;;https://www.fanghuaye.xyz/;https://profiles.uts.edu.au/Ling.Chen;https://www.linkedin.com/in/mo-namazi/", "dblp": ";67/463;203/0957;17/1237-6;", "google_scholar": "https://scholar.google.com.au/citations?hl=en;IcNYP1oAAAAJ;UXN7iUsAAAAJ;https://scholar.google.com.au/citations?user=L5aYWQcAAAAJ;https://scholar.google.com.au/citations?user=uoGBVTYAAAAJ", "or_profile": "~Zihan_Zhang3;~Meng_Fang1;~Fanghua_Ye1;~Ling_Chen5;~Mohammad_Reza_Namazi_Rad2", "aff": "University of Technology Sydney;Eindhoven University of Technology;University College London;University of Technology Sydney;", "aff_domain": "uts.edu.au;tue.nl;ucl.ac.uk;uts.edu.au;", "position": "PhD student;Assistant Professor;PhD student;Full Professor;", "bibtex": "@inproceedings{\nzhang2023turnlevel,\ntitle={Turn-Level Active Learning for Dialogue State Tracking},\nauthor={Zihan Zhang and Meng Fang and Fanghua Ye and Ling Chen and Mohammad Reza Namazi Rad},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=XpK2LCt8iM}\n}", "github": "", "project": "", "reviewers": "ttCV;Es6J;fWKM", "site": "https://openreview.net/forum?id=XpK2LCt8iM", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;4;4", "excitement": "4;4;4", "reproducibility": "4;4;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-6468-5729;", "linkedin": "zihan-zhang-a40855172/;;fanghua-ye-81084587/;;", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Technology Sydney;Eindhoven University of Technology;University College London", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uts.edu.au;https://www.tue.nl;https://www.ucl.ac.uk", "aff_unique_abbr": "UTS;TU/e;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "Australia;Netherlands;United Kingdom" }, { "id": "Xqhdpk0Qrj", "title": "GLEN: Generative Retrieval via Lexical Index Learning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Generative retrieval shed light on a new paradigm of document retrieval, aiming to directly generate the identifier of a relevant document for a query. While it takes advantage of bypassing the construction of auxiliary index structures, existing studies face two significant challenges: (i) the discrepancy between the knowledge of pre-trained language models and identifiers and (ii) the gap between training and inference that poses difficulty in learning to rank. To overcome these challenges, we propose a novel generative retrieval method, namely Generative retrieval via LExical iNdex learning (GLEN). For training, GLEN effectively exploits a dynamic lexical identifier using a two-phase index learning strategy, enabling it to learn meaningful lexical identifiers and relevance signals between queries and documents. For inference, GLEN utilizes collision-free inference, using identifier weights to rank documents without additional overhead. Experimental results prove that GLEN achieves state-of-the-art or competitive performance against existing generative retrieval methods on various benchmark datasets, e.g., NQ320k, MS MARCO, and BEIR. The code is available at https://github.com/skleee/GLEN.", "keywords": "Generative retrieval;Document retrieval;Lexical index", "primary_area": "", "supplementary_material": "", "author": "Sunkyung Lee;Minjin Choi;Jongwuk Lee", "authorids": "~Sunkyung_Lee1;~Minjin_Choi1;~Jongwuk_Lee1", "gender": "F;M;M", "homepage": "https://skleee.github.io/;https://sites.google.com/view/minjinchoi;", "dblp": "200/5020-1;226/4796;04/3445", "google_scholar": "https://scholar.google.com/citations?hl=ko;https://scholar.google.co.kr/citations?user=FwmzoJkAAAAJ;", "or_profile": "~Sunkyung_Lee1;~Minjin_Choi1;~Jongwuk_Lee1", "aff": "SungKyunKwan University;SungKyunKwan University;Sungkyunkwan University", "aff_domain": "skku.edu;skku.edu;skku.edu", "position": "MS student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nlee2023glen,\ntitle={{GLEN}: Generative Retrieval via Lexical Index Learning},\nauthor={Sunkyung Lee and Minjin Choi and Jongwuk Lee},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Xqhdpk0Qrj}\n}", "github": "", "project": "", "reviewers": "L2RC;XCay;CqNg", "site": "https://openreview.net/forum?id=Xqhdpk0Qrj", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "3;4;4", "reproducibility": "3;5;4", "correctness": "4;5;3", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-8178-6708;0000-0001-5151-6056;", "linkedin": "sunkyunglee;minjin-choi-720832159/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Sungkyunkwan University", "aff_unique_dep": "", "aff_unique_url": "https://www.skku.edu", "aff_unique_abbr": "SKKU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "Xt1JbFofwP", "title": "TabPrompt: Graph-based Pre-training and Prompting for Few-shot Table Understanding", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Table Understanding (TU) is a crucial aspect of information extraction that enables machines to comprehend the semantics behind tabular data. However, existing methods of TU cannot deal with the scarcity of labeled tabular data. In addition, these methods primarily focus on the textual content within the table, disregarding the inherent topological information of the table. This can lead to a misunderstanding of the tabular semantics. In this paper, we propose TabPrompt, a new framework to tackle the above challenges. Prompt-based learning has gained popularity due to its exceptional performance in few-shot learning. Thus, we introduce prompt-based learning to handle few-shot TU. Furthermore, Graph Contrastive Learning (Graph CL) demonstrates remarkable capabilities in capturing topological information, making Graph Neural Networks an ideal method for encoding tables. Hence, we develop a novel Graph CL method tailored to tabular data. This method serves as the pretext task during the pre-training phase, allowing the generation of vector representations that incorporate the table's topological information. The experimental results of outperforming all strong baselines demonstrate the strength of our method in few-shot table understanding tasks.", "keywords": "Table Understanding;Prompt-based Learning;Graph Contrastive Learning;Graph Neural Network", "primary_area": "", "supplementary_material": "", "author": "Rihui Jin;Jianan Wang;Wei Tan;Yongrui Chen;Guilin Qi;Wang Hao", "authorids": "~Rihui_Jin1;~Jianan_Wang4;~Wei_Tan5;~Yongrui_Chen1;~Guilin_Qi2;~Wang_Hao4", "gender": ";F;F;M;M;M", "homepage": ";https://www.zhihu.com/people/feb3c159e6854aaf003881e61a0eb776;http://alibaba-inc.com;;https://cse.seu.edu.cn/_s191/2023/1024/c23024a469541/page.psp;", "dblp": "362/8622;;;143/0948-2.html;71/5935;", "google_scholar": ";;;8ZjIHyEAAAAJ;;", "or_profile": "~Rihui_Jin1;~Jianan_Wang4;~Wei_Tan5;~Yongrui_Chen1;~Guilin_Qi2;~Wang_Hao4", "aff": "Southeast University;;;Southeast University;Southeast University;", "aff_domain": "seu.edu.cn;;;seu.edu.cn;seu.edu.cn;", "position": "MS student;;;PhD student;Full Professor;", "bibtex": "@inproceedings{\njin2023tabprompt,\ntitle={TabPrompt: Graph-based Pre-training and Prompting for Few-shot Table Understanding},\nauthor={Rihui Jin and Jianan Wang and Wei Tan and Yongrui Chen and Guilin Qi and Wang Hao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Xt1JbFofwP}\n}", "github": "", "project": "", "reviewers": "ss3f;MxSf;k7YW", "site": "https://openreview.net/forum?id=Xt1JbFofwP", "pdf_size": 0, "rating": "2;2;2", "confidence": "3;4;4", "excitement": "4;3;3", "reproducibility": "3;4;4", "correctness": "3;4;3", "rating_avg": 2.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-2384-2505;;;0000-0001-8934-3920;0000-0003-0150-7236;", "linkedin": ";;;;;https://www.linkedin.cn/incareer/in/ACoAAERCs1YBdcl0VRd8kzVwigPvyr9S6_KRU_4", "aff_unique_index": "0;0;0", "aff_unique_norm": "Southeast University", "aff_unique_dep": "", "aff_unique_url": "https://www.seu.edu.cn/", "aff_unique_abbr": "SEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "XtlquCY7qs", "title": "MADNet: Maximizing Addressee Deduction Expectation for Multi-Party Conversation Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Modeling multi-party conversations (MPCs) with graph neural networks has been proven effective at capturing complicated and graphical information \ufb02ows. However, existing methods rely heavily on the necessary addressee labels and can only be applied to an ideal setting where each utterance must be tagged with an \u201c@\u201d or other equivalent addressee label. To study the scarcity of addressee labels which is a common issue in MPCs, we propose MADNet that maximizes addressee deduction expectation in heterogeneous graph neural networks for MPC generation. Given an MPC with a few addressee labels missing, existing methods fail to build a consecutively connected conversation graph, but only a few separate conversation fragments instead. To ensure message passing between these conversation fragments, four additional types of latent edges are designed to complete a fully-connected graph. Besides, to optimize the edge-type-dependent message passing for those utterances without addressee labels, an Expectation-Maximization-based method that iteratively generates silver addressee labels (E step), and optimizes the quality of generated responses (M step), is designed. Experimental results on two Ubuntu IRC channel benchmarks show that MADNet outperforms various baseline models on the task of MPC generation, especially under the more common and challenging setting where part of addressee labels are missing.", "keywords": "Dialogue System;Multi-Party Conversation;Addressee Deduction;Latent Edge;Expectation-Maximization", "primary_area": "", "supplementary_material": "", "author": "Jia-Chen Gu;Chao-Hong Tan;Caiyuan Chu;Zhen-Hua Ling;Chongyang Tao;Quan Liu;Cong Liu", "authorids": "~Jia-Chen_Gu1;~Chao-Hong_Tan1;~Caiyuan_Chu1;~Zhen-Hua_Ling1;~Chongyang_Tao1;~Quan_Liu1;~Cong_Liu6", "gender": "M;;M;M;M;M;M", "homepage": "https://jasonforjoy.github.io/;;http://staff.ustc.edu.cn/~zhling/;;http://home.ustc.edu.cn/~quanliu/;;", "dblp": "93/3604.html;282/0435;70/5210;;;95/6404-6.html;314/9706", "google_scholar": "https://scholar.google.com/citations?hl=en;FkWdcrcAAAAJ;f8jRR3EAAAAJ;x_cOKuwAAAAJ;https://scholar.google.com.hk/citations?user=ha4mKNcAAAAJ;;", "or_profile": "~Jia-Chen_Gu1;~Chao-Hong_Tan1;~Zhen-Hua_Ling1;~Chongyang_Tao1;~Quan_Liu1;~Cong_Liu6;~Chu_Caiyuan1", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;Microsoft;IFLYTEK CO.LTD.;iFLYTEK;Chongqing University", "aff_domain": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;microsoft.com;iflytek.com;iflytek.com;cqu.edu.cn", "position": "Postdoc;PhD student;Professor;Researcher;Principal Researcher;Researcher;MS student", "bibtex": "@inproceedings{\ngu2023madnet,\ntitle={{MADN}et: Maximizing Addressee Deduction Expectation for Multi-Party Conversation Generation},\nauthor={Jia-Chen Gu and Chao-Hong Tan and Caiyuan Chu and Zhen-Hua Ling and Chongyang Tao and Quan Liu and Cong Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=XtlquCY7qs}\n}", "github": "", "project": "", "reviewers": "rqCf;JM8Y;wap4;dY2D;SdPB", "site": "https://openreview.net/forum?id=XtlquCY7qs", "pdf_size": 0, "rating": "4;4;4;4;4", "confidence": "4;3;4;4;3", "excitement": "3;4;3;4;4", "reproducibility": "4;4;3;4;3", "correctness": "4;4;4;3;4", "rating_avg": 4.0, "confidence_avg": 3.6, "excitement_avg": 3.6, "reproducibility_avg": 3.6, "correctness_avg": 3.8, "replies_avg": 16, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0009-0007-1116-9046;;", "linkedin": ";;;;;;", "aff_unique_index": "0;0;0;1;2;2;3", "aff_unique_norm": "University of Science and Technology of China;Microsoft;iFLYTEK;Chongqing University", "aff_unique_dep": ";Microsoft Corporation;;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.microsoft.com;https://www.iflytek.com;https://www.cqu.edu.cn", "aff_unique_abbr": "USTC;Microsoft;iFLYTEK;CQU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "China;United States" }, { "id": "XySNnzF9Ir", "title": "Automatic Evaluate Dialogue Appropriateness by Using Dialogue Act", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Evaluation of dialogue systems requires assessing various aspects, among which appropriateness holds significance as a core element of communicative language competence. However, current evaluations heavily rely on human judgments, which are time-consuming, labor-intensive, prone to biases, and lacking objectivity. In this paper, we introduce Dialogue Act Appropriateness (DAA), a novel method that utilizes the underlying patterns of dialogue act transitions to evaluate the appropriateness of chatbot responses. We learn transition patterns from human-human dialogue corpora, evaluating chatbot appropriateness by measuring the similarity of their transition patterns to those observed in human-human dialogues. To validate DAA, we annotate a test dataset by manually evaluating the appropriateness of dialogues from multiple chatbot systems. The experimental results demonstrate a strong correlation between our evaluation metric and human ratings, establishing the reliability of DAA as a measure of dialogue appropriateness.", "keywords": "Automatic Dialogue System Evaluation; Dialogue Evaluation; Dialogue System", "primary_area": "", "supplementary_material": "", "author": "Bao Chen;Yuanjie Wang;Zeming Liu;Yuhang Guo", "authorids": "~Bao_Chen1;~Yuanjie_Wang2;~Zeming_Liu1;~Yuhang_Guo1", "gender": "M;M;;", "homepage": "https://github.com/xba0;;;", "dblp": ";;;74/10083-1", "google_scholar": "zTWwkhgAAAAJ;https://scholar.google.com.hk/citations?user=iODIim4AAAAJ;;", "or_profile": "~Bao_Chen1;~Yuanjie_Wang2;~Zeming_Liu1;~Yuhang_Guo1", "aff": "Beijing Institute of Technology;;;Beijing Institute of Technology", "aff_domain": "bit.edu.cn;;;bit.edu.cn", "position": "MS student;;;Lecturer", "bibtex": "@inproceedings{\nchen2023automatic,\ntitle={Automatic Evaluate Dialogue Appropriateness by Using Dialogue Act},\nauthor={Bao Chen and Yuanjie Wang and Zeming Liu and Yuhang Guo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=XySNnzF9Ir}\n}", "github": "", "project": "", "reviewers": "gNvL;LjnF;sYa4;e6re", "site": "https://openreview.net/forum?id=XySNnzF9Ir", "pdf_size": 0, "rating": "2;2;2;2", "confidence": "3;3;4;4", "excitement": "4;3;2;4", "reproducibility": "3;2;2;4", "correctness": "3;2;2;4", "rating_avg": 2.0, "confidence_avg": 3.5, "excitement_avg": 3.25, "reproducibility_avg": 2.75, "correctness_avg": 2.75, "replies_avg": 13, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;0", "aff_unique_norm": "Beijing Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.bit.edu.cn/", "aff_unique_abbr": "BIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "XySq36VD0U", "title": "A Lightweight Method to Generate Unanswerable Questions in English", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "If a question cannot be answered with the available information, robust systems for question answering (QA) should know *not* to answer.\nOne way to build QA models that do this is with additional training data comprised of unanswerable questions, created either by employing annotators or through automated methods for unanswerable question generation.\nTo show that the model complexity of existing automated approaches is not justified, we examine a simpler data augmentation method for unanswerable question generation in English: performing antonym and entity swaps on answerable questions.\nCompared to the prior state-of-the-art, data generated with our training-free and lightweight strategy results in better models (+1.6 F1 points on SQuAD 2.0 data with BERT-large), and has higher human-judged relatedness and readability.\nWe quantify the raw benefits of our approach compared to no augmentation across multiple encoder models, using different amounts of generated data, and also on TydiQA-MinSpan data (+9.3 F1 points with BERT-large).\nOur results establish swaps as a simple but strong baseline for future work.", "keywords": "extractive question answering;machine reading comprehension;data augmentation", "primary_area": "", "supplementary_material": "", "author": "Vagrant Gautam;Miaoran Zhang;Dietrich Klakow", "authorids": "~Vagrant_Gautam1;~Miaoran_Zhang1;~Dietrich_Klakow1", "gender": "Agender;F;M", "homepage": "https://dippedrusk.com/;https://mrzhang11.github.io/;https://www.lsv.uni-saarland.de/", "dblp": "344/3375;302/4697;00/1846", "google_scholar": "BG7ORjIAAAAJ;_FuKQZoAAAAJ;https://scholar.google.de/citations?user=_HtGYmoAAAAJ", "or_profile": "~Vagrant_Gautam1;~Miaoran_Zhang1;~Dietrich_Klakow1", "aff": "Saarland University;Saarland University;Saarland University", "aff_domain": "uni-saarland.de;uni-saarland.de;saarland.de", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\ngautam2023a,\ntitle={A Lightweight Method to Generate Unanswerable Questions in English},\nauthor={Vagrant Gautam and Miaoran Zhang and Dietrich Klakow},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=XySq36VD0U}\n}", "github": "", "project": "", "reviewers": "iyDW;YiU3;HhfT;1Std", "site": "https://openreview.net/forum?id=XySq36VD0U", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;4;4", "excitement": "2;3;3;3", "reproducibility": "4;4;4;4", "correctness": "3;4;3;2", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 2.75, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 12, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-7263-8578;;0000-0002-4147-9690", "linkedin": "dippedrusk/;;https://www.linkedin.com/feed/?trk=DACH-SEM_google-adwords_brand-ghpwwww.l", "aff_unique_index": "0;0;0", "aff_unique_norm": "Saarland University", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-saarland.de", "aff_unique_abbr": "UdS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "Xyb8Qh6vxU", "title": "A Comprehensive Evaluation of Large Language Models on Legal Judgment Prediction", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models (LLMs) have demonstrated great potential for domain-specific applications, such as the law domain. However, recent disputes over GPT-4's law evaluation raise questions concerning their performance in real-world legal tasks. To systematically investigate their competency in the law, we design practical baseline solutions based on LLMs and test on the task of legal judgment prediction. In our solutions, LLMs can work alone to answer open questions or coordinate with an information retrieval (IR) system to learn from similar cases or solve simplified multi-choice questions.\nWe show that similar cases and multi-choice options, namely label candidates, included in prompts can help LLMs recall domain knowledge that is critical for expertise legal reasoning.\nWe additionally present an intriguing paradox wherein an IR system surpasses the performance of LLM+IR due to limited gains acquired by weaker LLMs from powerful IR systems. In such case, the role of LLMs becomes redundant. Our evaluation pipeline can be easily extended into other tasks to facilitate evaluations in other domains.\nCode is available at https://github.com/srhthu/LM-CompEval-Legal", "keywords": "Language Model;Law;Legal Judgment Prediction;Large Language Model;Language Model Evaluation", "primary_area": "", "supplementary_material": "", "author": "Ruihao Shui;Yixin Cao;Xiang Wang;Tat-Seng Chua", "authorids": "~Ruihao_Shui1;~Yixin_Cao2;~Xiang_Wang6;~Tat-Seng_Chua2", "gender": "M;M;M;M", "homepage": ";https://sites.google.com/view/yixin-homepage;https://github.com/xiangwang1223;http://www.comp.nus.edu.sg/~chuats/", "dblp": "264/5119;20/8038-2;31/2864-10;", "google_scholar": "psTK6aQAAAAJ;https://scholar.google.co.uk/citations?user=CnhTvdoAAAAJ;https://scholar.google.com.sg/citations?user=HdhaQB0AAAAJ;https://scholar.google.com.tw/citations?user=Z9DWCBEAAAAJ", "or_profile": "~Ruihao_Shui1;~Yixin_Cao2;~Xiang_Wang6;~Tat-seng_Chua1", "aff": "National University of Singapore;Singapore Management University;University of Science and Technology of China;National University of Singapore", "aff_domain": "u.nus.edu;smu.edu.sg;ustc.edu.cn;nus.edu.sg", "position": "PhD student;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nshui2023a,\ntitle={A Comprehensive Evaluation of Large Language Models on Legal Judgment Prediction},\nauthor={Ruihao Shui and Yixin Cao and Xiang Wang and Tat-Seng Chua},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Xyb8Qh6vxU}\n}", "github": "", "project": "", "reviewers": "9huf;JuZH;gyVE", "site": "https://openreview.net/forum?id=Xyb8Qh6vxU", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;3;4", "excitement": "3;3;3", "reproducibility": "4;4;4", "correctness": "4;3;3", "rating_avg": 2.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-0944-3188;;0000-0002-6148-6329;0000-0001-6097-7807", "linkedin": ";;;", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "National University of Singapore;Singapore Management University;University of Science and Technology of China", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;https://www.smu.edu.sg;http://www.ustc.edu.cn", "aff_unique_abbr": "NUS;SMU;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Singapore;China" }, { "id": "Xyy1p1IGvn", "title": "Semantic matching for text classification with complex class descriptions", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Text classifiers are an indispensable tool for machine learning practitioners, but adapting them to new classes is expensive. To reduce the cost of new classes, previous work exploits class descriptions and/or labels from existing classes. However, these approaches leave a gap in the model development cycle as they support either zero- or few-shot learning, but not both. Existing classifiers either do not work on zero-shot problems, or fail to improve much with few-shot labels. Further, prior work is aimed at concise class descriptions, which may be insufficient for complex classes. We overcome these shortcomings by casting text classification as a matching problem, where a model matches examples with relevant class descriptions. This formulation lets us leverage labels and complex class descriptions to perform zero- and few-shot learning on new classes. We compare this approach with numerous baselines on text classification tasks with complex class descriptions and find that it achieves strong zero-shot performance and scales well with few-shot samples, beating strong baselines by 22.48% (average precision) in the 10-shot setting. Furthermore, we extend the popular Model-Agnostic Meta-Learning algorithm to the zero-shot matching setting and show it improves zero-shot performance by 4.29%. Our results show that expressing text classification as a matching problem is a cost-effective way to address new classes. This strategy enables zero-shot learning for cold-start scenarios and few-shot learning so the model can improve until it is capable enough to deploy.", "keywords": "natural language processing;few-shot learning;zero-shot learning;semantic matching", "primary_area": "", "supplementary_material": "", "author": "Brian M De Silva;Kuan-Wen Huang;Gwang Gook Lee;Karen Hovsepian;Yan Xu;Mingwei Shen", "authorids": "~Brian_M_De_Silva1;~Kuan-Wen_Huang1;~Gwang_Gook_Lee1;~Karen_Hovsepian1;~Yan_Xu11;~Mingwei_Shen1", "gender": "M;M;M;M;F;M", "homepage": "https://www.briandesilva.com;;;;https://www.linkedin.com/in/yanxu001/;https://www.amazon.science/author/mingwei-shen", "dblp": ";;;82/3906.html;;47/10080", "google_scholar": ";bnAVWQIAAAAJ;E-QavWMAAAAJ;1aYamLoAAAAJ;;UBcc9-IAAAAJ", "or_profile": "~Brian_M_De_Silva1;~Kuan-Wen_Huang1;~Gwang_Gook_Lee1;~Karen_Hovsepian1;~Yan_Xu11;~Mingwei_Shen1", "aff": "Amazon;;Amazon;;Amazon;Amazon", "aff_domain": "amazon.com;;amazon.com;;amazon.com;amazon.com", "position": "Researcher;;Researcher;;Researcher;Researcher", "bibtex": "@inproceedings{\nsilva2023semantic,\ntitle={Semantic matching for text classification with complex class descriptions},\nauthor={Brian M De Silva and Kuan-Wen Huang and Gwang Gook Lee and Karen Hovsepian and Yan Xu and Mingwei Shen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Xyy1p1IGvn}\n}", "github": "", "project": "", "reviewers": "6T3q;LCiA;LVKW", "site": "https://openreview.net/forum?id=Xyy1p1IGvn", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;1;4", "excitement": "4;3;3", "reproducibility": "5;3;3", "correctness": "5;3;4", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-0944-900X;;;;;", "linkedin": "brian-de-silva-22a82a38/;;gwang-gook-lee-07187753/;;;mingweishen", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Amazon", "aff_unique_dep": "Amazon.com, Inc.", "aff_unique_url": "https://www.amazon.com", "aff_unique_abbr": "Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "Y0PN9Eic8T", "title": "Dynamic Stashing Quantization for Efficient Transformer Training", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Large Language Models (LLMs) have demonstrated impressive performance on a range of Natural Language Processing (NLP) tasks. Unfortunately, the immense amount of computations and memory accesses required for LLM training makes them prohibitively expensive in terms of hardware cost, and thus challenging to deploy in use cases such as on-device learning. In this paper, motivated by the observation that LLM training is memory-bound, we propose a novel dynamic quantization strategy, termed Dynamic Stashing Quantization (DSQ), that puts a special focus on reducing the memory operations, but also enjoys the other benefits of low precision training, such as the reduced arithmetic cost. We conduct a thorough study on two translation tasks (trained-from-scratch) and three classification tasks (fine-tuning). DSQ reduces the amount of arithmetic operations by $20.95\\times$ and the number of DRAM operations by $2.55\\times$ on IWSLT17 compared to the standard 16-bit fixed-point, which is widely used in on-device learning.", "keywords": "Transformer;Training;Dynamic Stashing Quantization", "primary_area": "", "supplementary_material": "", "author": "Guo Yang;Daniel Lo;Robert D. Mullins;Yiren Zhao", "authorids": "~Guo_Yang2;~Daniel_Lo1;~Robert_D._Mullins1;~Yiren_Zhao2", "gender": "M;;M;M", "homepage": "https://github.com/gy261;;https://aaronzhao.me;https://www.csat.cam.ac.uk/~rdm34", "dblp": ";08/7863.html;https://dblp.uni-trier.de/pers/hd/z/Zhao:Yiren;31/789", "google_scholar": ";;lOOmgEgAAAAJ;zjXO2HMAAAAJ", "or_profile": "~Guo_Yang2;~Daniel_Lo1;~Yiren_Zhao2;~Robert_Mullins1", "aff": "Computer Laboratory, University of Cambridge;Microsoft;Imperial College London;University of Cambridge", "aff_domain": "cl.cam.ac.uk;microsoft.com;ic.ac.uk;cam.ac.uk", "position": "Researcher;Engineer;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nyang2023dynamic,\ntitle={Dynamic Stashing Quantization for Efficient Transformer Training},\nauthor={Guo Yang and Daniel Lo and Robert D. Mullins and Yiren Zhao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Y0PN9Eic8T}\n}", "github": "", "project": "", "reviewers": "Qa9X;3Ppe;vpzW;jF2i", "site": "https://openreview.net/forum?id=Y0PN9Eic8T", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "3;3;4;2", "excitement": "4;4;3;4", "reproducibility": "3;4;3;3", "correctness": "4;3;3;3", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 3.75, "reproducibility_avg": 3.25, "correctness_avg": 3.25, "replies_avg": 12, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0006-8777-5432;;;", "linkedin": "guo-yang-1b492a21b/;;yiren-aaron-zhao-baa8b5116/;", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Cambridge;Microsoft;Imperial College London", "aff_unique_dep": "Computer Laboratory;Microsoft Corporation;", "aff_unique_url": "https://www.cam.ac.uk;https://www.microsoft.com;https://www.imperial.ac.uk", "aff_unique_abbr": "Cambridge;Microsoft;ICL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United Kingdom;United States" }, { "id": "Y13EvAJlhQ", "title": "Instructive Dialogue Summarization with Query Aggregations", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Conventional dialogue summarization methods directly generate summaries and do not consider user's specific interests. This poses challenges in cases where the users are more focused on particular topics or aspects. With the advancement of instruction-finetuned language models, we introduce instruction-tuning to dialogues to expand the capability set of dialogue summarization models. To overcome the scarcity of instructive dialogue summarization data, we propose a three-step approach to synthesize high-quality query-based summarization triples. This process involves summary-anchored query generation, query filtering and query-based summary generation. By training a unified model called InstructDS (Instructive Dialogue Summarization) on three summarization datasets with multi-purpose instructive triples, we expand the capability of dialogue summarization models. We evaluate our method on four datasets, including dialogue summarization and dialogue reading comprehension. Experimental results show that our approach outperforms the state-of-the-art models and even models with larger sizes. Additionally, our model exhibits higher generalizability and faithfulness, as confirmed by human subjective evaluations.", "keywords": "Dialogue Summarization;Query-based Dialogue Summarizaiton;Dialogue Reading Comprehension", "primary_area": "", "supplementary_material": "", "author": "Bin Wang;Zhengyuan Liu;Nancy F. Chen", "authorids": "~Bin_Wang14;~Zhengyuan_Liu2;~Nancy_F._Chen1", "gender": "M;M;", "homepage": "https://binwang28.github.io/;;http://alum.mit.edu/www/nancychen", "dblp": "13/1898-40;229/9236;84/8761", "google_scholar": "jUrRMv4AAAAJ;;https://scholar.google.com.sg/citations?user=K3Z9UiAAAAAJ", "or_profile": "~Bin_Wang14;~Zhengyuan_Liu2;~Nancy_F._Chen1", "aff": "National University of Singapore;I2R;I2R, A*STAR", "aff_domain": "nus.edu.sg;astar.edu.sg;i2r.a-star.edu.sg", "position": "Research Fellow;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nwang2023instructive,\ntitle={Instructive Dialogue Summarization with Query Aggregations},\nauthor={Bin Wang and Zhengyuan Liu and Nancy F. Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Y13EvAJlhQ}\n}", "github": "", "project": "", "reviewers": "M5kz;hwUr;YtPF", "site": "https://openreview.net/forum?id=Y13EvAJlhQ", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;4", "excitement": "4;3;4", "reproducibility": "5;4;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-9760-8343;;0000-0003-0872-5877", "linkedin": "bin-wang-3b7054140/;;nancy-chen-4644865/?originalSubdomain=sg", "aff_unique_index": "0;1;2", "aff_unique_norm": "National University of Singapore;Institute for Infocomm Research;A*STAR", "aff_unique_dep": ";;Institute for Infocomm Research", "aff_unique_url": "https://www.nus.edu.sg;https://www.i2r.a-star.edu.sg;https://www.a-star.edu.sg", "aff_unique_abbr": "NUS;I2R;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "id": "Y28GzovPql", "title": "RoBoCoP: A Comprehensive ROmance BOrrowing COgnate Package and Benchmark for Multilingual Cognate Identification", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The identification of cognates is a fundamental process in historical linguistics, on which any further research is based. Even though there are several cognate databases for Romance languages, they are rather scattered, incomplete, noisy, contain unreliable information, or have uncertain availability.\nIn this paper we introduce a comprehensive database of Romance cognates and borrowings based on the etymological information provided by the dictionaries. We extract pairs of cognates between any two Romance languages by parsing electronic dictionaries of Romanian, Italian, Spanish, Portuguese and French. Based on this resource, we propose a strong benchmark for the automatic detection of cognates, by applying machine learning and deep learning based methods on any two pairs of Romance languages. We find that automatic identification of cognates is possible with accuracy averaging around 94% for the more difficult task formulations.", "keywords": "cognates;borrowings;historical lingusitics;database;romance languages;language resources;lexicon", "primary_area": "", "supplementary_material": "", "author": "Liviu P Dinu;Ana Sabina Uban;Alina Maria Cristea;Anca Daniela Dinu;Ioan-Bogdan Iordache;Simona Georgescu;Laurentiu Zoicas", "authorids": "~Liviu_P_Dinu1;~Ana_Sabina_Uban1;~Alina_Maria_Cristea1;~Anca_Daniela_Dinu1;~Ioan-Bogdan_Iordache1;~Simona_Georgescu1;~Laurentiu_Zoicas1", "gender": "M;F;F;F;M;F;M", "homepage": "https://nlp.unibuc.ro/people/liviu.html;;https://nlp.unibuc.ro/people/alina.html;http://limbimoderne.lls.unibuc.ro/catedra/;;https://unibuc.ro/user/simona.georgescu/;", "dblp": "50/3644.html;174/7148.html;127/0096;58/6009;305/9693;;", "google_scholar": "https://scholar.google.ro/citations?user=2SHcMNAAAAAJ;https://scholar.google.gr/citations?hl=en;tG1KT38AAAAJ;Y4kIfCoAAAAJ;wOvradYAAAAJ;;", "or_profile": "~Liviu_P_Dinu1;~Ana_Sabina_Uban1;~Alina_Maria_Cristea1;~Anca_Daniela_Dinu1;~Ioan-Bogdan_Iordache1;~Simona_Georgescu1;~Laurentiu_Zoicas1", "aff": "University of Bucharest;Universitatea Bucuresti;;University of Bucharest;University of Bucharest;University of Bucharest;University of Bucharest", "aff_domain": "unibuc.ro;unibuc.ro;;unibuc.ro;unibuc.ro;unibuc.ro;unibuc.ro", "position": "Full Professor;Assistant Professor;;Lecturer;MS student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\ndinu2023robocop,\ntitle={RoBoCoP: A Comprehensive {RO}mance {BO}rrowing {CO}gnate Package and Benchmark for Multilingual Cognate Identification},\nauthor={Liviu P Dinu and Ana Sabina Uban and Alina Maria Cristea and Anca Daniela Dinu and Ioan-Bogdan Iordache and Simona Georgescu and Laurentiu Zoicas},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Y28GzovPql}\n}", "github": "", "project": "", "reviewers": "kDVS;U5uj;HFjy", "site": "https://openreview.net/forum?id=Y28GzovPql", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;5;4", "excitement": "4;3;4", "reproducibility": "4;4;4", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-4611-3516;;;0000-0001-9175-7233", "linkedin": ";ana-uban;;;;;", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of Bucharest", "aff_unique_dep": "", "aff_unique_url": "https://www.unibuc.ro", "aff_unique_abbr": "Unibuc", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Romania" }, { "id": "Y2wUa9n7sr", "title": "VISTA: Visual-Textual Knowledge Graph Representation Learning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Knowledge graphs represent human knowledge using triplets composed of entities and relations. While most existing knowledge graph embedding methods only consider the structure of a knowledge graph, a few recently proposed multimodal methods utilize images or text descriptions of entities in a knowledge graph. In this paper, we propose visual-textual knowledge graphs (VTKGs), where not only entities but also triplets can be explained using images, and both entities and relations can accompany text descriptions. By compiling visually expressible commonsense knowledge, we construct new benchmark datasets where triplets themselves are explained by images, and the meanings of entities and relations are described using text. We propose VISTA, a knowledge graph representation learning method for VTKGs, which incorporates the visual and textual representations of entities and relations using entity encoding, relation encoding, and triplet decoding transformers. Experiments show that VISTA outperforms state-of-the-art knowledge graph completion methods in real-world VTKGs.", "keywords": "Knowledge Graph;Multimodality;Representation Learning;Knowledge Graph Completion;Transformer", "primary_area": "", "supplementary_material": "", "author": "Jaejun Lee;Chanyoung Chung;Hochang Lee;Sungho Jo;Joyce Jiyoung Whang", "authorids": "~Jaejun_Lee1;~Chanyoung_Chung1;~Hochang_Lee1;~Sungho_Jo1;~Joyce_Jiyoung_Whang2", "gender": ";M;M;M;F", "homepage": "https://jaejunlee714.github.io/;;;http://nmail.kaist.ac.kr/wordpress/index.php/professor-jo-sungho/;http://bdi-lab.kaist.ac.kr/", "dblp": ";297/0341;;18/3943;121/4230", "google_scholar": "G5UMYkUAAAAJ;Qg9oTYoAAAAJ;;;TLrKglQAAAAJ", "or_profile": "~Jaejun_Lee1;~Chanyoung_Chung1;~Hochang_Lee1;~Sungho_Jo1;~Joyce_Jiyoung_Whang2", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;KAIST", "aff_domain": "kaist.edu;kaist.ac.kr;kaist.edu;kaist.ac.kr;kaist.ac.kr", "position": "MS student;PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nlee2023vista,\ntitle={{VISTA}: Visual-Textual Knowledge Graph Representation Learning},\nauthor={Jaejun Lee and Chanyoung Chung and Hochang Lee and Sungho Jo and Joyce Jiyoung Whang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Y2wUa9n7sr}\n}", "github": "", "project": "", "reviewers": "V1gZ;pVWp;iJiR;6NUN;bFL1;jTyB;K6Ph", "site": "https://openreview.net/forum?id=Y2wUa9n7sr", "pdf_size": 0, "rating": "3;3;3;3;3;3;3", "confidence": "3;4;5;3;3;2;3", "excitement": "3;3;5;3;3;3;3", "reproducibility": "2;3;5;2;3;4;3", "correctness": "3;2;5;3;3;3;3", "rating_avg": 3.0, "confidence_avg": 3.2857142857142856, "excitement_avg": 3.2857142857142856, "reproducibility_avg": 3.142857142857143, "correctness_avg": 3.142857142857143, "replies_avg": 21, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-6948-6462;0000-0003-4891-3901;0000-0002-5172-8136;0000-0002-7618-362X;0000-0002-4773-3194", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "Y30NTg87od", "title": "Implicit Sense-labeled Connective Recognition as Text Generation", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Implicit Discourse Relation Recognition (IDRR) involves identifying the sense label of an implicit connective between adjacent text spans. \nThis has traditionally been approached as a classification task. \nHowever, some downstream tasks require more than just a sense label as well as the specific connective used.\nThis paper presents Implicit Sense-labeled Connective Recognition (ISCR), which identifies the implicit connectives and their sense labels between adjacent text spans. \nISCR can be treated as a classification task, but a large number of potential categories, sense labels, and uneven distribution of instances among them make this difficult. \nInstead, this paper handles the task as a text-generation task, using an encoder-decoder model to generate both connectives and their sense labels. \nHere, we explore a classification method and three kinds of text-generation methods.\nFrom our evaluation results on PDTB-3.0, we found that our method outperforms the conventional classification-based method.", "keywords": "Implicit Discourse Relation Recognition;Implicit Sense-labeled Connective Recognition;Encoder-Decoder;PDTB-3.0", "primary_area": "", "supplementary_material": "", "author": "Yui Oka;Tsutomu Hirao", "authorids": "~Yui_Oka1;~Tsutomu_Hirao2", "gender": "F;M", "homepage": "https://okayu1015.github.io/profile/;", "dblp": "280/0114;68/6820.html", "google_scholar": ";https://scholar.google.co.jp/citations?user=bvfYQ4oAAAAJ", "or_profile": "~Yui_Oka1;~Tsutomu_Hirao2", "aff": "NTT Communication Science Laboratories;NTT Communication Science Laboratories", "aff_domain": "hco.ntt.co.jp;ntt.com", "position": "Researcher;Researher", "bibtex": "@inproceedings{\noka2023implicit,\ntitle={Implicit Sense-labeled Connective Recognition as Text Generation},\nauthor={Yui Oka and Tsutomu Hirao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Y30NTg87od}\n}", "github": "", "project": "", "reviewers": "ZGAK;jhFv;mtYh", "site": "https://openreview.net/forum?id=Y30NTg87od", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "2;4;2", "reproducibility": "4;4;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "NTT Communication Science Laboratories", "aff_unique_dep": "", "aff_unique_url": "https://www.ntt-csl.com", "aff_unique_abbr": "NTT CSL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "id": "Y6w2prqvjM", "title": "On the Challenges of Using Black-Box APIs for Toxicity Evaluation in Research", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Perception of toxicity evolves over time and often differs between geographies and cultural backgrounds. Similarly, black-box commercially available APIs for detecting toxicity, such as the Perspective API, are not static, but frequently retrained to address any unattended weaknesses and biases. We evaluate the implications of these changes on the reproducibility of findings that compare the relative merits of models and methods that aim to curb toxicity. Our findings suggest that research that relied on inherited automatic toxicity scores to compare models and techniques may have resulted in inaccurate findings. Rescoring all models from HELM, a widely respected living benchmark, for toxicity with the recent version of the API led to a different ranking of widely used foundation models. We suggest caution in applying apples-to-apples comparisons between studies and call for a more structured approach to evaluating toxicity over time.", "keywords": "toxicity;black-box API;HELM;evaluation;Real Toxicity Prompts;benchmarking", "primary_area": "", "supplementary_material": "", "author": "Luiza Amador Pozzobon;Beyza Ermis;Patrick Lewis;Sara Hooker", "authorids": "~Luiza_Amador_Pozzobon1;~Beyza_Ermis1;~Patrick_Lewis2;~Sara_Hooker2", "gender": "F;F;M;", "homepage": ";https://www.cmpe.boun.edu.tr/people/beyza.ermi%C5%9F;https://patricklewis.io;https://www.sarahooker.me/", "dblp": ";117/9290;227/3197;210/2611", "google_scholar": "vaCOqncAAAAJ;v2cMiCAAAAAJ;JN7Zg-kAAAAJ;2xy6h3sAAAAJ", "or_profile": "~Luiza_Amador_Pozzobon1;~Beyza_Ermis1;~Patrick_Lewis2;~Sara_Hooker1", "aff": "Cohere For AI;Cohere AI;Cohere;Cohere For AI", "aff_domain": "cohere.com;cohere.com;cohere.ai;cohere.com", "position": "Researcher;Researcher;Research Scientist;Principal Researcher", "bibtex": "@inproceedings{\npozzobon2023on,\ntitle={On the Challenges of Using Black-Box {API}s for Toxicity Evaluation in Research},\nauthor={Luiza Amador Pozzobon and Beyza Ermis and Patrick Lewis and Sara Hooker},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Y6w2prqvjM}\n}", "github": "", "project": "", "reviewers": "5s8T;Lwgv;U24C", "site": "https://openreview.net/forum?id=Y6w2prqvjM", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;4;4", "excitement": "4;4;3", "reproducibility": "5;4;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-2192-9543;", "linkedin": "luizapozzobon/;;patrick-s-h-lewis/;", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Cohere;Cohere AI", "aff_unique_dep": "Cohere AI;", "aff_unique_url": "https://cohere.ai;https://cohere.ai", "aff_unique_abbr": "Cohere;Cohere AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Canada" }, { "id": "Y7Wx7usMtc", "title": "Natural Disaster Tweets Classification Using Multimodal Data", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Social media platforms are extensively used for expressing opinions or conveying information. The information available on such platforms can be used for various humanitarian and disaster-related tasks as distributing messages in different formats through social media is quick and easy. Often this useful information during disaster events goes to waste as efficient systems don\u2019t exist which can turn these unstructured data into meaningful format which can ultimately assist aid agencies. In disaster identification and assessment, information available is naturally multimodal, however, most existing work has been solely focused on single modalities e.g. images or texts separately. When information from different modalities are integrated , it produces significantly better results. In this paper, we have explored different models which can lead to the development of a system that deals with multimodal datasets and can perform sequential hierarchical classification. Specifically, we aim to find the damage and its severity along with classifying the data into humanitarian categories. The different stages in the hierarchical classification have had their respective models selected by researching with many different modality specific models and approaches of multimodal classification including multi task learning. The hierarchical model can give results at different abstraction levels according to the use cases. Through extensive quantitative and qualitative analysis, we show how our system is effective in classifying the multimodal tweets along with an excellent computational efficiency and assessment performance. \nWith the help of our approach, we aim to support disaster management through identification of situations involving humanitarian tragedies and aid in assessing the severity and type of damage.", "keywords": "Multimodal Data;Multi task learning;NLP of Social Media data;AI for social good", "primary_area": "", "supplementary_material": "", "author": "Mohammad Abdul Basit;Bashir Alam;Zubaida Fatima;Salman Shaikh", "authorids": "~Mohammad_Abdul_Basit1;~Bashir_Alam1;~Zubaida_Fatima1;~Salman_Shaikh1", "gender": "M;M;F;M", "homepage": ";;;https://cemse.kaust.edu.sa/cs/people/person/salman-g-shaikh", "dblp": ";;;", "google_scholar": ";V4_vFnAAAAAJ;;", "or_profile": "~Mohammad_Abdul_Basit1;~Bashir_Alam1;~Zubaida_Fatima1;~Salman_Shaikh1", "aff": "Publicis Sapient;Jamia Millia Islamia ;Indraprastha Institute of Information Technology, Delhi;King Abdullah University of Science and Technology", "aff_domain": "publicissapient.com;jmi.ac.in;iiitd.ac.in;kaust.edu.sa", "position": "Software Engineer;Full Professor;Undergrad student;MS student", "bibtex": "@inproceedings{\nbasit2023natural,\ntitle={Natural Disaster Tweets Classification Using Multimodal Data},\nauthor={Mohammad Abdul Basit and Bashir Alam and Zubaida Fatima and Salman Shaikh},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Y7Wx7usMtc}\n}", "github": "", "project": "", "reviewers": "j8SL;SknH;iDm4", "site": "https://openreview.net/forum?id=Y7Wx7usMtc", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;4;4", "reproducibility": "3;3;5", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "mohammad-abdul-basit-0526091a4/;;zubaida-fatima-1a3502256/;", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Publicis Sapient;Jamia Millia Islamia;Indraprastha Institute of Information Technology;King Abdullah University of Science and Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.publicissapient.com;https://www.jmi.ac.in;http://www.iiitd.ac.in;https://www.kast.kau.edu.sa", "aff_unique_abbr": "Publicis Sapient;JMI;IIIT-D;KAUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Delhi", "aff_country_unique_index": "0;1;1;2", "aff_country_unique": "United States;India;Saudi Arabia" }, { "id": "Y7kK2HcxDK", "title": "GDA: Grammar-based Data Augmentation for Text Classification using Slot Information", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recent studies propose various data augmentation approaches to resolve the low-resource problem in natural language processing tasks. Data augmentation is a successful solution to this problem and recent strategies give variation on sentence structures to boost performance. However, these approaches can potentially lead to semantic errors and produce semantically noisy data due to the unregulated variation of sentence structures. In an effort to combat these semantic errors, we leverage slot information, the representation of the context of keywords from a sentence, and form a data augmentation strategy which we propose, called GDA. Our strategy employs algorithms that construct and manipulate rules of context-aware grammar, utilizing this slot information. The algorithms extract recurrent patterns by distinguishing words with slots and form the \"rules of grammar\"---a set of injective relations between a sentence's semantics and its syntactical structure---to augment the dataset. The augmentation is done in an automated manner with the constructed rules and thus, GDA is explainable and reliable without any human intervention. We evaluate GDA with state-of-the-art data augmentation techniques, including those using pre-trained language models, and the result illustrates that GDA outperforms all other data augmentation methods by 19.38%. Extensive experiments show that GDA is an effective data augmentation strategy that incorporates word semantics for more accurate and diverse data.", "keywords": "data augmentation;rules of grammar;text classification", "primary_area": "", "supplementary_material": "", "author": "Joonghyuk Hahn;Hyunjoon Cheon;Elizabeth Grace Orwig;Su-Hyeon Kim;Sang-Ki Ko;Yo-Sub Han", "authorids": "~Joonghyuk_Hahn1;~Hyunjoon_Cheon1;~Elizabeth_Grace_Orwig1;~Su-Hyeon_Kim1;~Sang-Ki_Ko1;~Yo-Sub_Han1", "gender": "M;;F;M;;M", "homepage": "https://peer0.github.io;;;https://sites.google.com/site/sangkikotoc/home;http://toc.yonsei.ac.kr/~emmous/;https://github.com/suhyeon0123", "dblp": "304/4027;228/6835;;71/9491.html;h/YoSubHan;296/0145", "google_scholar": "08ccS2oAAAAJ;;;https://scholar.google.com/scholar?hl=en;yDOh26sAAAAJ;", "or_profile": "~Joonghyuk_Hahn1;~Hyunjoon_Cheon1;~Elizabeth_Grace_Orwig1;~Sang-Ki_Ko1;~Yo-Sub_Han1;~Kim_Su-Hyeon1", "aff": "Yonsei University;Yonsei University;Yonsei University;Kangwon National University;Yonsei University;Kangwon National University", "aff_domain": "yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr;kangwon.ac.kr;yonsei.ac.kr;kangwon.ac.kr", "position": "PhD student;PhD student;PhD student;Assistant Professor;Full Professor;MS student", "bibtex": "@inproceedings{\nhahn2023gda,\ntitle={{GDA}: Grammar-based Data Augmentation for Text Classification using Slot Information},\nauthor={Joonghyuk Hahn and Hyunjoon Cheon and Elizabeth Grace Orwig and Su-Hyeon Kim and Sang-Ki Ko and Yo-Sub Han},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Y7kK2HcxDK}\n}", "github": "", "project": "", "reviewers": "YPDx;VFyi;2Dha;LAee", "site": "https://openreview.net/forum?id=Y7kK2HcxDK", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;3;3;4", "excitement": "3;3;4;3", "reproducibility": "4;3;3;4", "correctness": "3;3;3;3", "rating_avg": 3.0, "confidence_avg": 3.25, "excitement_avg": 3.25, "reproducibility_avg": 3.5, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0000-5890-4916;;;;;", "linkedin": "joonghyuk-hahn;;elizabeth-orwig/;;;", "aff_unique_index": "0;0;0;1;0;1", "aff_unique_norm": "Yonsei University;Kangwon National University", "aff_unique_dep": ";", "aff_unique_url": "https://www.yonsei.ac.kr;http://www.kangwon.ac.kr", "aff_unique_abbr": "Yonsei;KNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "YGK9cd0bHz", "title": "WiCE: Real-World Entailment for Claims in Wikipedia", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Textual entailment models are increasingly applied in settings like fact-checking, presupposition verification in question answering, or summary evaluation. However, these represent a significant domain shift from existing entailment datasets, and models underperform as a result. We propose WiCE, a new fine-grained textual entailment dataset built on natural claim and evidence pairs extracted from Wikipedia. In addition to standard claim-level entailment, WiCE provides entailment judgments over sub-sentence units of the claim, and a minimal subset of evidence sentences that support each subclaim. To support this, we propose an automatic claim decomposition strategy using GPT-3.5 which we show is also effective at improving entailment models' performance on multiple datasets at test time. Finally, we show that real claims in our dataset involve challenging verification and retrieval problems that existing models fail to address.", "keywords": "natural language inference;textual entailment;fact-checking", "primary_area": "", "supplementary_material": "", "author": "Ryo Kamoi;Tanya Goyal;Juan Diego Rodriguez;Greg Durrett", "authorids": "~Ryo_Kamoi1;~Tanya_Goyal1;~Juan_Diego_Rodriguez1;~Greg_Durrett1", "gender": "M;F;;M", "homepage": "https://ryokamoi.github.io/;;;http://www.cs.utexas.edu/~gdurrett/", "dblp": "254/2890;176/9145;;69/7968", "google_scholar": "4OWTLKAAAAAJ;w72MSFoAAAAJ;;https://scholar.google.com.tw/citations?user=EpQ_sDEAAAAJ", "or_profile": "~Ryo_Kamoi1;~Tanya_Goyal1;~Juan_Diego_Rodriguez1;~Greg_Durrett1", "aff": "Pennsylvania State University;University of Texas, Austin;;University of Texas, Austin", "aff_domain": "psu.edu;utexas.edu;;utexas.edu", "position": "PhD student;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nkamoi2023wice,\ntitle={Wi{CE}: Real-World Entailment for Claims in Wikipedia},\nauthor={Ryo Kamoi and Tanya Goyal and Juan Diego Rodriguez and Greg Durrett},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=YGK9cd0bHz}\n}", "github": "", "project": "", "reviewers": "rBnT;rRJ9;SxkM", "site": "https://openreview.net/forum?id=YGK9cd0bHz", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "4;4;4", "reproducibility": "4;4;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-8442-4171;;;", "linkedin": "ryokamoi/;;;", "aff_unique_index": "0;1;1", "aff_unique_norm": "Pennsylvania State University;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": "https://www.psu.edu;https://www.utexas.edu", "aff_unique_abbr": "PSU;UT Austin", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "YGUUT6CkbB", "title": "StrAE: Autoencoding for Pre-Trained Embeddings using Explicit Structure", "track": "main", "status": "Long Main", "tldr": "", "abstract": "This work presents StrAE: a Structured Autoencoder framework that through strict adherence to explicit structure, and use of a novel contrastive objective over tree-structured representations, enables effective learning of multi-level representations. Through comparison over different forms of structure, we verify that our results are directly attributable to the informativeness of the structure provided as input, and show that this is not the case for existing tree models. We then further extend StrAE to allow the model to define its own compositions using a simple localised-merge algorithm. This variant, called Self-StrAE, outperforms baselines that don't involve explicit hierarchical compositions, and is comparable to models given informative structure (e.g. constituency parses). Our experiments are conducted in a data-constrained (circa 10M tokens) setting to help tease apart the contribution of the inductive bias to effective learning. However, we find that this framework can be robust to scale, and when extended to a much larger dataset (circa 100M tokens), our 430 parameter model performs comparably to a 6-layer RoBERTa many orders of magnitude larger in size. Our findings support the utility of incorporating explicit composition as an inductive bias for effective representation learning.", "keywords": "NLP;Representation Learning;Structure;Semantics;Syntax;Induction;Composition", "primary_area": "", "supplementary_material": "", "author": "Mattia Opper;Victor Prokhorov;Siddharth N", "authorids": "~Mattia_Opper1;~Victor_Prokhorov1;~Siddharth_N1", "gender": "M;;M", "homepage": "https://mopper97.github.io/;https://victorprokhorov.github.io/;https://homepages.inf.ed.ac.uk/snaraya3/", "dblp": ";203/8964;67/8366", "google_scholar": "02E6E3EAAAAJ;https://scholar.google.co.uk/citations?user=IQlUyHEAAAAJ;V7D7hxMAAAAJ", "or_profile": "~Mattia_Opper1;~Victor_Prokhorov1;~Siddharth_N1", "aff": "University of Edinburgh, University of Edinburgh;University of Edinburgh, University of Edinburgh;University of Edinburgh", "aff_domain": "ed.ac.uk;ed.ac.uk;ed.ac.uk", "position": "PhD student;Postdoc;Reader (Associate Professor)", "bibtex": "@inproceedings{\nopper2023strae,\ntitle={Str{AE}: Autoencoding for Pre-Trained Embeddings using Explicit Structure},\nauthor={Mattia Opper and Victor Prokhorov and Siddharth N},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=YGUUT6CkbB}\n}", "github": "", "project": "", "reviewers": "xyAG;D1Qk;pRFX", "site": "https://openreview.net/forum?id=YGUUT6CkbB", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;3;4", "reproducibility": "4;4;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-4911-7333", "linkedin": "mattia-opper-45243b105/;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Edinburgh", "aff_unique_dep": "", "aff_unique_url": "https://www.ed.ac.uk", "aff_unique_abbr": "Edinburgh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "YGYaxZVsJK", "title": "Pseudointelligence: A Unifying Lens on Language Model Evaluation", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "With large language models surpassing human performance on an increasing number of benchmarks, we must take a principled approach for targeted evaluation of model capabilities. Inspired by pseudorandomness, we propose pseudointelligence, which captures the maxim that \"(perceived) intelligence lies in the eye of the beholder.\" That is, that claims of intelligence are meaningful only when their evaluator is taken into account. Concretely, we propose a complexity-theoretic framework of model evaluation cast as a dynamic interaction between a model and a learned evaluator. We demonstrate that this framework can be used to reason about two case studies in language model evaluation, as well as analyze existing evaluation methods.", "keywords": "Evaluation;Large Language Models;Learning Theory;Computational Complexity", "primary_area": "", "supplementary_material": "", "author": "Shikhar Murty;Orr Paradise;Pratyusha Sharma", "authorids": "~Shikhar_Murty1;~Orr_Paradise1;~Pratyusha_Sharma1", "gender": "M;M;F", "homepage": "https://murtyshikhar.github.io/;https://people.eecs.berkeley.edu/~orrp/;https://pratyushasharma.github.io/", "dblp": "202/2040;236/4369;228/7904", "google_scholar": "https://scholar.google.ca/citations?user=ubAcojQAAAAJ;9At07_kAAAAJ;RGiCLUgAAAAJ", "or_profile": "~Shikhar_Murty1;~Orr_Paradise1;~Pratyusha_Sharma1", "aff": "Stanford University;University of California, Berkeley;Massachusetts Institute of Technology", "aff_domain": "cs.stanford.edu;berkeley.edu;mit.edu", "position": "PhD student;PhD student;PhD student", "bibtex": "@inproceedings{\nmurty2023pseudointelligence,\ntitle={Pseudointelligence: A Unifying Lens on Language Model Evaluation},\nauthor={Shikhar Murty and Orr Paradise and Pratyusha Sharma},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=YGYaxZVsJK}\n}", "github": "", "project": "", "reviewers": "WYM6;Mo8o;6eqq", "site": "https://openreview.net/forum?id=YGYaxZVsJK", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;4", "excitement": "2;4;4", "reproducibility": "", "correctness": "2;3;4", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;1;2", "aff_unique_norm": "Stanford University;University of California, Berkeley;Massachusetts Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.stanford.edu;https://www.berkeley.edu;https://web.mit.edu", "aff_unique_abbr": "Stanford;UC Berkeley;MIT", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Stanford;Berkeley;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "YHWXlESeS8", "title": "Have LLMs Advanced Enough? A Challenging Problem Solving Benchmark For Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The performance of large language models (LLMs) on existing reasoning benchmarks has significantly improved over the past years. In response, we present JEEBench, a considerably more challenging benchmark dataset for evaluating the problem solving abilities of LLMs. We curate 515 challenging pre-engineering mathematics, physics and chemistry problems from the highly competitive IIT JEE-Advanced exam. Long-horizon reasoning on top of deep in-domain knowledge is essential for solving problems in this benchmark. Our evaluation on various open-source and proprietary models reveals that the highest performance, even after using techniques like self-consistency, self-refinement and chain-of-thought prompting, is less than 40%. The typical failure modes of GPT-4, the best model, are errors in algebraic manipulation, difficulty in grounding abstract concepts into mathematical equations accurately and failure in retrieving relevant domain-specific concepts. We also observe that by mere prompting, GPT-4 is unable to assess risk introduced by negative marking for incorrect answers. For this, we develop a post-hoc confidence-thresholding method over self-consistency, which enables effective response selection. We hope that our challenging benchmark will guide future re-search in problem-solving using LLMs.", "keywords": "Reasoning;Large Language Model;Benchmark;Problem Solving", "primary_area": "", "supplementary_material": "", "author": "Daman Arora;Himanshu Gaurav Singh;Mausam .", "authorids": "~Daman_Arora1;~Himanshu_Gaurav_Singh1;~Mausam_.1", "gender": "M;M;M", "homepage": "https://daman1209arora.github.io/;https://hgaurav2k.github.io;http://www.cse.iitd.ac.in/~mausam", "dblp": "331/1657;;30/6391.html", "google_scholar": "vMOWEMAAAAAJ;qav_fQYAAAAJ;https://scholar.google.co.in/citations?hl=en", "or_profile": "~Daman_Arora1;~Himanshu_Gaurav_Singh1;~Mausam_Mausam2", "aff": "Indian Institute of Technology, Delhi;Indian Institute of Technology, Delhi;Indian Institute of Technology Delhi", "aff_domain": "iitd.ac.in;iitd.ac.in;iitd.ac.in", "position": "Undergrad student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\narora2023have,\ntitle={Have {LLM}s Advanced Enough? A Challenging Problem Solving Benchmark For Large Language Models},\nauthor={Daman Arora and Himanshu Gaurav Singh and Mausam .},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=YHWXlESeS8}\n}", "github": "", "project": "", "reviewers": "8fFX;Kktx;Yhxr", "site": "https://openreview.net/forum?id=YHWXlESeS8", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;5", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-4088-4296", "linkedin": ";himanshu-gaurav-singh/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Indian Institute of Technology Delhi", "aff_unique_dep": "", "aff_unique_url": "https://www.iitdelhi.ac.in", "aff_unique_abbr": "IIT Delhi", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Delhi", "aff_country_unique_index": "0;0;0", "aff_country_unique": "India" }, { "id": "YJMUVwLcEi", "title": "Lexical Repetitions Lead to Rote Learning: Unveiling the Impact of Lexical Overlap in Train and Test Reference Summaries", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Ideal summarization models should generalize to novel summary-worthy content without remembering reference training summaries by rote. However, a single average performance score on the entire test set is inadequate in determining such model competencies. We propose a fine-grained evaluation protocol by partitioning a test set based on the lexical similarity of reference test summaries with training summaries. We observe up to a 5x (1.2x) difference in ROUGE-2 (entity recall) scores between the subsets with the lowest and highest similarity. Next, we show that such training repetitions also make a model vulnerable to rote learning, reproducing data artifacts such as factual errors, especially when reference test summaries are lexically close to training summaries. Consequently, we propose to limit lexical repetitions in training summaries during both supervised fine-tuning and likelihood calibration stages to improve the performance on novel test cases while retaining average performance. Our automatic and human evaluations on novel test subsets and recent news articles show that limiting lexical repetitions in training summaries can prevent rote learning and improve generalization.", "keywords": "Lexical Diversity;Summarization;Data-Centric AI", "primary_area": "", "supplementary_material": "", "author": "Prafulla Kumar Choubey;Alexander Fabbri;Caiming Xiong;Chien-Sheng Wu", "authorids": "~Prafulla_Kumar_Choubey2;~Alexander_Fabbri1;~Caiming_Xiong1;~Chien-Sheng_Wu1", "gender": "M;M;M;M", "homepage": ";https://alex-fabbri.github.io;http://cmxiong.com/;http://jasonwu0731.github.io", "dblp": "203/8260;203/8539;80/7282;180/5537", "google_scholar": "k7aMOCsAAAAJ;GgfJdhwAAAAJ;vaSdahkAAAAJ;1G4GV2EAAAAJ", "or_profile": "~Prafulla_Kumar_Choubey2;~Alexander_Fabbri1;~Caiming_Xiong1;~Chien-Sheng_Wu1", "aff": "SalesForce.com;SalesForce.com;Salesforce Research;Salesforce AI", "aff_domain": "salesforce.com;salesforce.com;salesforce.com;salesforce.com", "position": "Researcher;Researcher;Research Scientist;Researcher", "bibtex": "@inproceedings{\nchoubey2023lexical,\ntitle={Lexical Repetitions Lead to Rote Learning: Unveiling the Impact of Lexical Overlap in Train and Test Reference Summaries},\nauthor={Prafulla Kumar Choubey and Alexander Fabbri and Caiming Xiong and Chien-Sheng Wu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=YJMUVwLcEi}\n}", "github": "", "project": "", "reviewers": "Luya;ewgz;BtBR", "site": "https://openreview.net/forum?id=YJMUVwLcEi", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;2;4", "excitement": "4;2;4", "reproducibility": "3;4;3", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;caiming-xiong-150a1417;chien-sheng-jason-wu/", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Salesforce", "aff_unique_dep": "", "aff_unique_url": "https://www.salesforce.com", "aff_unique_abbr": "Salesforce", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "YKKcbwztwH", "title": "Parameter-Efficient Cross-lingual Transfer of Vision and Language Models via Translation-based Alignment", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Pre-trained vision and language models such as CLIP have witnessed remarkable success in connecting images and texts with a primary focus on English texts. Despite recent efforts to extend CLIP to support other languages, disparities in performance among different languages have been observed due to uneven resource availability.\nAdditionally, current cross-lingual transfer methods of those pre-trained models would consume excessive resources for a large number of languages. \nTherefore, we propose a new parameter-efficient cross-lingual transfer learning framework that utilizes a translation-based alignment method to mitigate multilingual disparities and explores parameter-efficient fine-tuning methods for parameter-efficient cross-lingual transfer. \nExtensive experiments on XTD and Multi30K datasets, covering 11 languages under zero-shot, few-shot, and full-dataset learning scenarios, show that our framework significantly reduces the multilingual disparities among languages and improves cross-lingual transfer results, especially in low-resource scenarios, while only keeping and fine-tuning an extremely small number of parameters compared to the full model (e.g., Our framework only requires 0.16\\% additional parameters of a full-model for each language in the few-shot learning scenario).", "keywords": "Cross-lingual Transfer;Vision and Language;Parameter Efficiency", "primary_area": "", "supplementary_material": "", "author": "Zhen Zhang;Jialu Wang;Xin Eric Wang", "authorids": "~Zhen_Zhang16;~Jialu_Wang1;~Xin_Eric_Wang2", "gender": "M;;M", "homepage": "https://namezhenzhang.github.io/;https://people.ucsc.edu/~jwang470/;https://eric-xw.github.io", "dblp": "19/5112-15.html;195/2701;10/5630-61", "google_scholar": "MzTxKGkAAAAJ;HOtDeN0AAAAJ;YjqluE0AAAAJ", "or_profile": "~Zhen_Zhang16;~Jialu_Wang1;~Xin_Eric_Wang2", "aff": "Tsinghua University;University of California, Santa Cruz;University of California, Santa Cruz", "aff_domain": "tsinghua.edu.cn;ucsc.edu;ucsc.edu", "position": "Undergrad student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023parameterefficient,\ntitle={Parameter-Efficient Cross-lingual Transfer of Vision and Language Models via Translation-based Alignment},\nauthor={Zhen Zhang and Jialu Wang and Xin Eric Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=YKKcbwztwH}\n}", "github": "", "project": "", "reviewers": "tqnJ;WwLy;qRQE", "site": "https://openreview.net/forum?id=YKKcbwztwH", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "2;2;2", "reproducibility": "4;3;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 2.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0002-5585-9114;;0000-0003-2605-5504", "linkedin": ";;", "aff_unique_index": "0;1;1", "aff_unique_norm": "Tsinghua University;University of California, Santa Cruz", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.ucsc.edu", "aff_unique_abbr": "THU;UCSC", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Santa Cruz", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "id": "YMlYb8cWgE", "title": "RECAL: Sample-Relation Guided Confidence Calibration over Tabular Data", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Tabular-format data is widely adopted in various real-world applications. Various machine learning models have achieved remarkable success in both industrial applications and data-science competitions. Despite these successes, most current machine learning methods for tabular data lack accurate confidence estimation, which is needed by some high-risk sensitive applications such as credit modeling and financial fraud detection. In this paper, we study the confidence estimation of machine learning models applied to tabular data. The key finding of our paper is that a real-world tabular dataset typically contains implicit sample relations, and this can further help to obtain a more accurate estimation. To this end, we introduce a general post-training confidence calibration framework named RECAL to calibrate the predictive confidence of current machine learning models by employing graph neural networks to model the relations between different samples. We perform extensive experiments on tabular datasets with both implicit and explicit graph structures and show that RECAL can significantly improve the calibration quality compared to the conventional method without considering the sample relations.", "keywords": "Confidence Calibration;Tabular Data;Element-Wise Temperature Scaling", "primary_area": "", "supplementary_material": "", "author": "Wang HaoTian;Zhen Zhang;Mengting Hu;Qichao Wang;Liang Chen;Yatao Bian;Bingzhe Wu", "authorids": "~Wang_HaoTian2;~Zhen_Zhang30;~Mengting_Hu1;~Qichao_Wang1;~Liang_Chen17;~Yatao_Bian1;~Bingzhe_Wu1", "gender": "M;F;M;M;M;M;M", "homepage": ";https://hmt2014.github.io/homepage/;https://qichaos-wang.github.io/;;https://chenliang.tech/;https://yataobian.com;", "dblp": ";;;207/4843;https://dblp.uni-trier.de/pid/01/5394-1;222/2694;", "google_scholar": ";cYxJCNIAAAAJ;;_3hgtf8AAAAJ;pGZtPjcAAAAJ;oZBTlBkAAAAJ;", "or_profile": "~Wang_HaoTian2;~Mengting_Hu1;~Qichao_Wang1;~Bingzhe_Wu1;~Liang_Chen7;~An_Bian1;~zhen_zhang28", "aff": "Nankai University;Nankai University;SUN YAT-SEN UNIVERSITY;Tencent AI Lab;;Tencent AI Lab;Nankai University", "aff_domain": "nankai.edu.cn;nankai.edu.cn;sysu.edu.cn;tencent.com;;tencent.com;nankai.edu.cn", "position": "PhD student;Assistant Professor;MS student;Researcher;;Senior researcher ;MS student", "bibtex": "@inproceedings{\nhaotian2023recal,\ntitle={{RECAL}: Sample-Relation Guided Confidence Calibration over Tabular Data},\nauthor={Wang HaoTian and Zhen Zhang and Mengting Hu and Qichao Wang and Liang Chen and Yatao Bian and Bingzhe Wu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=YMlYb8cWgE}\n}", "github": "", "project": "", "reviewers": "tBxh;nFCr;XNjM", "site": "https://openreview.net/forum?id=YMlYb8cWgE", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;2;4", "excitement": "3;3;4", "reproducibility": "4;4;3", "correctness": "4;4;4", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-5010-0315;0000-0003-1536-5400;;;;0000-0002-2368-4084;", "linkedin": ";;;;;;zhenzhangleon/", "aff_unique_index": "0;0;1;2;2;0", "aff_unique_norm": "Nankai University;Sun Yat-sen University;Tencent", "aff_unique_dep": ";;Tencent AI Lab", "aff_unique_url": "http://www.nankai.edu.cn;http://www.sysu.edu.cn;https://ai.tencent.com", "aff_unique_abbr": "NKU;SYSU;Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "YN6FtojPxD", "title": "Quantifying the Dialect Gap and its Correlates Across Languages", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Historically, researchers and consumers have noticed a decrease in quality when applying NLP tools to minority variants of languages (i.e. Puerto Rican Spanish or Swiss German), but studies exploring this have been limited to a select few languages. Additionally, past studies have mainly been conducted in a monolingual context, so cross-linguistic trends have not been identified and tied to external factors. In this work, we conduct a comprehensive evaluation of the most influential, state-of-the-art large language models (LLMs) across two high-use applications, machine translation and automatic speech recognition, to assess their functionality on the regional dialects of several high- and low-resource languages. Additionally, we analyze how the regional dialect gap is correlated with economic, social, and linguistic factors. The impact of training data, including related factors like dataset size and its construction procedure, is shown to be significant but not consistent across models or languages, meaning a one-size-fits-all approach cannot be taken in solving the dialect gap. This work will lay the foundation for furthering the field of dialectal NLP by laying out evident disparities and identifying possible pathways for addressing them through mindful data collection.", "keywords": "dialect gap;machine translation;automatic speech recognition;performance correlation;multilingual", "primary_area": "", "supplementary_material": "", "author": "Anjali Kantharuban;Ivan Vuli\u0107;Anna Korhonen", "authorids": "~Anjali_Kantharuban1;~Ivan_Vuli\u01071;~Anna_Korhonen1", "gender": "F;M;", "homepage": "http://www.anjaliruban.com;https://sites.google.com/site/ivanvulic/;https://sites.google.com/site/annakorhonen/", "dblp": ";77/9768;14/6532", "google_scholar": ";ZX8js60AAAAJ;https://scholar.google.co.uk/citations?user=SCoVoOYAAAAJ", "or_profile": "~Anjali_Kantharuban1;~Ivan_Vuli\u01071;~Anna_Korhonen1", "aff": "University of Cambridge;PolyAI Limited;University of Cambridge", "aff_domain": "cam.ac.uk;poly-ai.com;cam.ac.uk", "position": "MS student;Senior Scientist;Professor", "bibtex": "@inproceedings{\nkantharuban2023quantifying,\ntitle={Quantifying the Dialect Gap and its Correlates Across Languages},\nauthor={Anjali Kantharuban and Ivan Vuli{\\'c} and Anna Korhonen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=YN6FtojPxD}\n}", "github": "", "project": "", "reviewers": "J4pE;7UB3;V96V;rTFa", "site": "https://openreview.net/forum?id=YN6FtojPxD", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "5;4;4;2", "excitement": "2;4;4;3", "reproducibility": "5;4;4;5", "correctness": "3;4;4;3", "rating_avg": 3.0, "confidence_avg": 3.75, "excitement_avg": 3.25, "reproducibility_avg": 4.5, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";ivan-vuli%C4%87-286b4a81/;anna-korhonen-534a9b5/", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Cambridge;PolyAI Limited", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.poly.ai", "aff_unique_abbr": "Cambridge;PolyAI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "YO2VZBcinK", "title": "Topic-DPR: Topic-based Prompts for Dense Passage Retrieval", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Prompt-based learning's efficacy across numerous natural language processing tasks has led to its integration into dense passage retrieval. Prior research has mainly focused on enhancing the semantic understanding of pre-trained language models by optimizing a single vector as a continuous prompt. This approach, however, leads to a semantic space collapse; identical semantic information seeps into all representations, causing their distributions to converge in a restricted region. This hinders differentiation between relevant and irrelevant passages during dense retrieval. To tackle this issue, we present Topic-DPR, a dense passage retrieval model that uses topic-based prompts. Unlike the single prompt method, multiple topic-based prompts are established over a probabilistic simplex and optimized simultaneously through contrastive learning. This encourages representations to align with their topic distributions, improving space uniformity. Furthermore, we introduce a novel positive and negative sampling strategy, leveraging semi-structured data to boost dense retrieval efficiency. Experimental results from two datasets affirm that our method surpasses previous state-of-the-art retrieval techniques.", "keywords": "Continuous Prompt;Dense Passage Retrieval;Topic Modeling", "primary_area": "", "supplementary_material": "", "author": "Qingfa Xiao;Shuangyin Li;Lei Chen", "authorids": "~Qingfa_Xiao1;~Shuangyin_Li1;~Lei_Chen7", "gender": "M;M;M", "homepage": ";http://www.shuangyinli.cn/;http://www.cs.ust.hk/~leichen/", "dblp": "352/5387;133/1966;c/LeiChen0002", "google_scholar": ";LQITVaEAAAAJ;gtglwgYAAAAJ", "or_profile": "~Qingfa_Xiao1;~Shuangyin_Li1;~Lei_Chen7", "aff": "Hong Kong University of Science and Technology (Guangzhou);South China Normal University;Hong Kong University of Science and Technology", "aff_domain": "hkust-gz.edu.cn;scnu.edu.cn;hkust.edu", "position": "Intern;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nxiao2023topicdpr,\ntitle={Topic-{DPR}: Topic-based Prompts for Dense Passage Retrieval},\nauthor={Qingfa Xiao and Shuangyin Li and Lei Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=YO2VZBcinK}\n}", "github": "", "project": "", "reviewers": "q4iy;ZAsx;P8b2", "site": "https://openreview.net/forum?id=YO2VZBcinK", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "2;4;3", "reproducibility": "4;4;3", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-3481-4403;0000-0001-6404-3438;0000-0002-8257-5806", "linkedin": ";shuangyinli/;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Hong Kong University of Science and Technology;South China Normal University", "aff_unique_dep": ";", "aff_unique_url": "https://www.ust.hk;http://www.scnu.edu.cn", "aff_unique_abbr": "HKUST;SCNU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "YQzgk43sFB", "title": "Entity Disambiguation on a Tight Labeling Budget", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Many real-world NLP applications face the challenge of training an entity disambiguation model for a specific domain with a small labeling budget. In this setting there is often access to a large unlabeled pool of documents. It is then natural to ask the question: which samples\nshould be selected for annotation? In this paper we propose a solution that combines feature diversity with low rank correction. Our sampling strategy is formulated in the context of bilinear tensor models. Our experiments show that the proposed approach can significantly reduce the amount of labeled data necessary to achieve a given performance.", "keywords": "entity linking;learning under a budget;tensor bilinear model", "primary_area": "", "supplementary_material": "", "author": "Audi Primadhanty;Ariadna Quattoni", "authorids": "~Audi_Primadhanty1;~Ariadna_Quattoni2", "gender": "F;F", "homepage": ";https://www.cs.upc.edu/~aquattoni/", "dblp": "131/5718;77/3898", "google_scholar": ";https://scholar.google.es/citations?user=D1okUccAAAAJ", "or_profile": "~Audi_Primadhanty1;~Ariadna_Quattoni2", "aff": "Universitat Politecnica de Catalunya;Universidad Polit\u00e9cnica de Cataluna", "aff_domain": "cs.upc.edu;upc.edu", "position": "Postdoc;Researcher", "bibtex": "@inproceedings{\nprimadhanty2023entity,\ntitle={Entity Disambiguation on a Tight Labeling Budget},\nauthor={Audi Primadhanty and Ariadna Quattoni},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=YQzgk43sFB}\n}", "github": "", "project": "", "reviewers": "XGHN;d5Yd;U5Tk", "site": "https://openreview.net/forum?id=YQzgk43sFB", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;2;2", "excitement": "3;3;3", "reproducibility": "3;4;5", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 2.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Universitat Polit\u00e8cnica de Catalunya", "aff_unique_dep": "", "aff_unique_url": "https://www.upc.edu", "aff_unique_abbr": "UPC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Spain" }, { "id": "YSWLs0G5va", "title": "Joint Entity and Relation Extraction with Span Pruning and Hypergraph Neural Networks", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Entity and Relation Extraction (ERE) is an important task in information extraction. Recent marker-based pipeline models achieve state-of-the-art performance, but still suffer from the error propagation issue. Also, most of current ERE models do not take into account higher-order interactions between multiple entities and relations, while higher-order modeling could be beneficial.In this work, we propose HyperGraph neural network for ERE (HGERE), which is built upon the PL-marker (a state-of-the-art marker-based pipleline model). To alleviate error propagation, we use a high-recall pruner mechanism to transfer the burden of entity identification and labeling from the NER module to the joint module of our model. For higher-order modeling, we build a hypergraph, where nodes are entities (provided by the span pruner) and relations thereof, and hyperedges encode interactions between two different relations or between a relation and its associated subject and object entities. We then run a hypergraph neural network for higher-order inference by applying message passing over the built hypergraph. Experiments on three widely used benchmarks (ACE2004, ACE2005 and SciERC) for ERE task show significant improvements over the previous state-of-the-art PL-marker.", "keywords": "Entity and Relation Extraction;high-order inference;hypergraph neural network", "primary_area": "", "supplementary_material": "", "author": "Zhaohui Yan;Songlin Yang;Wei Liu;Kewei Tu", "authorids": "~Zhaohui_Yan1;~Songlin_Yang1;~Wei_Liu25;~Kewei_Tu1", "gender": "M;F;M;M", "homepage": ";https://sustcsonglin.github.io;https://vpeterv.github.io/;https://faculty.sist.shanghaitech.edu.cn/faculty/tukw/", "dblp": "50/1907;;49/3283-131;22/918", "google_scholar": "R5bvjGMAAAAJ;1chlis0AAAAJ;https://scholar.google.com/citations?hl=zh-CN;5gi3Pm0AAAAJ", "or_profile": "~Zhaohui_Yan1;~Songlin_Yang1;~Wei_Liu25;~Kewei_Tu1", "aff": "Shanghaitech University;ShanghaiTech University;ShanghaiTech University;ShanghaiTech University", "aff_domain": "shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn", "position": "PhD student;MS student;MS student;Associate Professor", "bibtex": "@inproceedings{\nyan2023joint,\ntitle={Joint Entity and Relation Extraction with Span Pruning and Hypergraph Neural Networks},\nauthor={Zhaohui Yan and Songlin Yang and Wei Liu and Kewei Tu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=YSWLs0G5va}\n}", "github": "", "project": "", "reviewers": "tBFS;YBzL;g3Cy", "site": "https://openreview.net/forum?id=YSWLs0G5va", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;3;3", "reproducibility": "4;4;5", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-2195-2310;", "linkedin": ";;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "ShanghaiTech University", "aff_unique_dep": "", "aff_unique_url": "https://www.shanghaitech.edu.cn", "aff_unique_abbr": "ShanghaiTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "YTo9KGNZ3U", "title": "Measuring and Mitigating Constraint Violations of In-Context Learning for Utterance-to-API Semantic Parsing", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In executable task-oriented semantic parsing, the system aims to translate users' utterances in natural language to machine-interpretable programs (API calls) that can be executed according to pre-defined API specifications.\nWith the popularity of Large Language Models (LLMs), in-context learning offers a strong baseline for such scenarios, especially in data-limited regimes.\nHowever, LLMs are known to hallucinate and therefore pose a formidable challenge in constraining generated content. \nThus, it remains uncertain if LLMs can effectively perform task-oriented utterance-to-API generation, where respecting the API's structural and task-specific constraints is crucial.\nIn this work, we seek to measure, analyze and mitigate such constraints violations. First, we identify the categories of various constraints in obtaining API-semantics from task-oriented utterances, and define fine-grained metrics that complement traditional ones. Second, we leverage these metrics to conduct a detailed error analysis of constraints violations seen in state-of-the-art LLMs, which motivates us to investigate two popular mitigation strategies-- Semantic-Retrieval of Demonstrations (SRD) and API-aware Constrained Decoding (API-CD). \nOur experiments show that these strategies are effective at reducing constraints violations and improving the quality of the generated API calls, but require careful consideration given their implementation complexity and latency.", "keywords": "executable semantic parsing;task-oriented semantic parsing;utterance-to-API generation;in-context learning", "primary_area": "", "supplementary_material": "", "author": "Shufan Wang;S\u00e9bastien Jean;Sailik Sengupta;James Gung;Nikolaos Pappas;Yi Zhang", "authorids": "~Shufan_Wang1;~S\u00e9bastien_Jean1;~Sailik_Sengupta1;~James_Gung1;~Nikolaos_Pappas1;~Yi_Zhang13", "gender": "M;M;M;M;M;M", "homepage": "https://people.cs.umass.edu/~shufanwang/;;https://sailik1991.github.io/;https://jgung.github.io/about/;http://nik0spapp.github.io/;", "dblp": "192/1552;75/4203;139/7992;116/0530;36/8968-2.html;64/6544-3", "google_scholar": ";;Hlm-ti8AAAAJ;Xqc8wk0AAAAJ;https://scholar.google.ch/citations?user=daiFj_cAAAAJ;sxs6h_wAAAAJ", "or_profile": "~Shufan_Wang1;~S\u00e9bastien_Jean1;~Sailik_Sengupta1;~James_Gung1;~Nikolaos_Pappas1;~Yi_Zhang13", "aff": "University of Massachusetts, Amherst;Amazon;Amazon;AWS AI Labs;AWS AI Labs;Amazon", "aff_domain": "umass.edu;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com", "position": "PhD student;Amazon;Researcher;Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nwang2023measuring,\ntitle={Measuring and Mitigating Constraint Violations of In-Context Learning for Utterance-to-{API} Semantic Parsing},\nauthor={Shufan Wang and S{\\'e}bastien Jean and Sailik Sengupta and James Gung and Nikolaos Pappas and Yi Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=YTo9KGNZ3U}\n}", "github": "", "project": "", "reviewers": "dJAL;13ew;56zM", "site": "https://openreview.net/forum?id=YTo9KGNZ3U", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;3;3", "reproducibility": "5;4;4", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-2004-8111;", "linkedin": ";;sailiks/;;nik0spapp/;", "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "University of Massachusetts Amherst;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.umass.edu;https://www.amazon.com", "aff_unique_abbr": "UMass Amherst;Amazon", "aff_campus_unique_index": "0", "aff_campus_unique": "Amherst;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "YWbEDZh5ga", "title": "On Robustness of Finetuned Transformer-based NLP Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Transformer-based pretrained models like BERT, GPT-2 and T5 have been finetuned for a large number of natural language processing (NLP) tasks, and have been shown to be very effective. However, while finetuning, what changes across layers in these models with respect to pretrained checkpoints is under-studied. Further, how robust are these models to perturbations in input text? Does the robustness vary depending on the NLP task for which the models have been finetuned? While there exists some work on studying robustness of BERT finetuned for a few NLP tasks, there is no rigorous study which compares this robustness across encoder only, decoder only and encoder-decoder models. \n\nIn this paper, we characterize changes between pretrained and finetuned language model representations across layers using two metrics: CKA and STIR. Further, we study the robustness of three language models (BERT, GPT-2 and T5) with eight different text perturbations on classification tasks from General Language Understanding Evaluation (GLUE) benchmark, and generation tasks like summarization, free-form generation and question generation. GPT-2 representations are more robust than BERT and T5 across multiple types of input perturbation. Although models exhibit good robustness broadly, dropping nouns, verbs or changing characters are the most impactful.\nOverall, this study provides valuable insights into perturbation-specific weaknesses of popular Transformer-based models which should be kept in mind when passing inputs.", "keywords": "Transformers;Language Models;finetuning;Perturbations;Robustness;Representation Similarity;CKA;STIR", "primary_area": "", "supplementary_material": "", "author": "Pavan Kalyan Reddy Neerudu;SUBBA REDDY OOTA;mounika marreddy;venkateswara Rao Kagita;Manish Gupta", "authorids": "~Pavan_Kalyan_Reddy_Neerudu1;~SUBBA_REDDY_OOTA1;~mounika_marreddy1;~venkateswara_Rao_Kagita1;~Manish_Gupta1", "gender": ";M;F;;M", "homepage": "https://pavanneerudu.github.io/;https://sites.google.com/view/subbareddyoota300/home?authuser=0;;;https://sites.google.com/view/manishg/", "dblp": ";190/1709;206/3366;;g/ManishGupta1.html", "google_scholar": ";https://scholar.google.co.in/citations?user=4Uz0LngAAAAJ;Ikqyo5sAAAAJ;https://scholar.google.co.in/citations?user=aso5fJ0AAAAJ;https://scholar.google.co.in/citations?user=eX9PSu0AAAAJ", "or_profile": "~Pavan_Kalyan_Reddy_Neerudu1;~SUBBA_REDDY_OOTA1;~mounika_marreddy1;~venkateswara_Rao_Kagita1;~Manish_Gupta1", "aff": "National Institute of Technology Warangal;MPI-SWS;Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;National Institute of Technology Warangal;Microsoft", "aff_domain": "nitw.ac.in;mpi-sws.org;uni-bonn.de;nitw.ac.in;microsoft.com", "position": "Undergrad student;Visiting Scholar;Postdoc;Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\nneerudu2023on,\ntitle={On Robustness of Finetuned Transformer-based {NLP} Models},\nauthor={Pavan Kalyan Reddy Neerudu and SUBBA REDDY OOTA and mounika marreddy and venkateswara Rao Kagita and Manish Gupta},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=YWbEDZh5ga}\n}", "github": "", "project": "", "reviewers": "C8jx;vTiM;4B1d", "site": "https://openreview.net/forum?id=YWbEDZh5ga", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;3;2", "reproducibility": "3;3;3", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-5975-622X;;;0000-0002-2843-3110", "linkedin": "https://linkedin.com/in/pavankalyanreddyneerudu;subba-reddy-oota-11a91254/;;;manishsgupta/", "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "National Institute of Technology, Warangal;Max Planck Institute for Software Systems;Rheinische Friedrich-Wilhelms Universit\u00e4t Bonn;Microsoft", "aff_unique_dep": ";;;Microsoft Corporation", "aff_unique_url": "https://www.nitw.ac.in;https://www.mpi-sws.org;https://www.uni-bonn.de/;https://www.microsoft.com", "aff_unique_abbr": "NIT Warangal;MPI-SWS;Uni Bonn;Microsoft", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Warangal;", "aff_country_unique_index": "0;1;1;0;2", "aff_country_unique": "India;Germany;United States" }, { "id": "YZJ3oewPcu", "title": "Language Model Quality Correlates with Psychometric Predictive Power in Multiple Languages", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Surprisal theory (Hale, 2001; Levy, 2008) posits that a word\u2019s reading time is proportional to its surprisal (i.e., to its negative log probability given the proceeding context). Since we are unable to access a word\u2019s ground-truth probability, surprisal theory has been empirically tested using surprisal estimates from language models (LMs). Under the premise that surprisal theory holds, we would expect that higher quality language models provide more powerful predictors of human reading behavior---a conjecture we dub the quality--power (QP) hypothesis. Unfortunately, empirical support for the QP hypothesis is mixed. Some studies in English have found correlations between LM quality and predictive power, but other studies using Japanese data, as well as using larger English LMs, find no such correlations. In this work, we conduct a systematic crosslinguistic assessment of the QP hypothesis. We train LMs from scratch on small- and medium-sized datasets from 13 languages (across five language families) and assess their ability to predict eye tracking data. We find correlations between LM quality and power in eleven of these thirteen languages, suggesting that, within the range of model classes and sizes tested, better language models are indeed better predictors of human language processing behaviors.", "keywords": "Cognitive Modeling;Language Models;Eye Tracking Data;Cross-linguistic Analysis", "primary_area": "", "supplementary_material": "", "author": "Ethan Wilcox;Clara Meister;Ryan Cotterell;Tiago Pimentel", "authorids": "~Ethan_Wilcox1;~Clara_Meister1;~Ryan_Cotterell1;~Tiago_Pimentel1", "gender": ";M;F;Not Specified", "homepage": "https://wilcoxeg.github.io/;https://tpimentelms.github.io/;https://cimeister.github.io/;https://rycolab.io/", "dblp": "227/3505;203/8292;245/7485.html;146/4361.html", "google_scholar": "5jzLBBwAAAAJ;XjZ8NRsAAAAJ;quJhNH8AAAAJ;DexOqtoAAAAJ", "or_profile": "~Ethan_Wilcox1;~Tiago_Pimentel1;~Clara_Isabel_Meister1;~Ryan_D_Cotterell1", "aff": "Georgetown University;University of Cambridge;Swiss Federal Institute of Technology;Swiss Federal Institute of Technology", "aff_domain": "georgetown.edu;cam.ac.uk;ethz.ch;ethz.ch", "position": "Assistant Professor;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwilcox2023language,\ntitle={Language Model Quality Correlates with Psychometric Predictive Power in Multiple Languages},\nauthor={Ethan Wilcox and Clara Meister and Ryan Cotterell and Tiago Pimentel},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=YZJ3oewPcu}\n}", "github": "", "project": "", "reviewers": "fGt4;6sX4;Z8nG", "site": "https://openreview.net/forum?id=YZJ3oewPcu", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;4;5", "excitement": "5;4;4", "reproducibility": "5;2;1", "correctness": "5;2;3", "rating_avg": 5.0, "confidence_avg": 4.666666666666667, "excitement_avg": 4.333333333333333, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-5128-9890;;0000-0002-3775-4426;", "linkedin": ";;;", "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Georgetown University;University of Cambridge;Swiss Federal Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.georgetown.edu;https://www.cam.ac.uk;https://www.ethz.ch", "aff_unique_abbr": "GU;Cambridge;ETH Zurich", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;1;2;2", "aff_country_unique": "United States;United Kingdom;Switzerland" }, { "id": "YaxyQwG2TP", "title": "Content- and Topology-Aware Representation Learning for Scientific Multi-Literature", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Representation learning forms an essential building block in the development of natural language processing architectures. To date, mainstream approaches focus on learning textual information at the sentence- or document-level, unfortunately, overlooking the inter-document connections. This omission decreases the potency of downstream applications, particularly in multi-document settings. To address this issue, embeddings equipped with latent semantic and rich relatedness information are needed. In this paper, we propose SMRC$^{2}$, which extends representation learning to the multi-document level. Our model jointly learns latent semantic information from content and rich relatedness information from topological networks. Unlike previous studies, our work takes multi-document as input and integrates both semantic and relatedness information using a shared space via language model and graph structure. Our extensive experiments confirm the superiority and effectiveness of our approach. To encourage further research in scientific multi-literature representation learning, we will release our code and a new dataset from the biomedical domain.", "keywords": "Representation Learning;Graph-Text Joint Learning;Wasserstein Distance", "primary_area": "", "supplementary_material": "", "author": "Kai Zhang;Kaisong Song;Yangyang Kang;Xiaozhong Liu", "authorids": "~Kai_Zhang18;~Kaisong_Song1;~Yangyang_Kang1;~Xiaozhong_Liu2", "gender": "M;M;M;M", "homepage": "https://users.wpi.edu/~kzhang8/;https://sites.google.com/site/kaisongsong;;https://www.wpi.edu/people/faculty/xliu14", "dblp": ";30/11037;162/0109;11/6389.html", "google_scholar": "jL6dEN4AAAAJ;Ms678voAAAAJ;https://scholar.google.com/citations?hl=zh-CN;1BUByMcAAAAJ", "or_profile": "~Kai_Zhang18;~Kaisong_Song1;~Yangyang_Kang1;~Xiaozhong_Liu2", "aff": "Worcester Polytechnic Institute;Alibaba Group;Alibaba Group;Worcester Polytechnic Institute", "aff_domain": "wpi.edu;alibaba-inc.com;alibaba.com;wpi.edu", "position": "PhD student;Algorithm Expert;Staff Algorithm Engineer;Associate Professor", "bibtex": "@inproceedings{\nzhang2023content,\ntitle={Content- and Topology-Aware Representation Learning for Scientific Multi-Literature},\nauthor={Kai Zhang and Kaisong Song and Yangyang Kang and Xiaozhong Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=YaxyQwG2TP}\n}", "github": "", "project": "", "reviewers": "FNgn;phqf;LfTX", "site": "https://openreview.net/forum?id=YaxyQwG2TP", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;2;4", "excitement": "3;4;4", "reproducibility": "3;4;4", "correctness": "3;4;3", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-4783-6705;0000-0002-5979-7769;;", "linkedin": "kai-z-b527201b9/;;;", "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Worcester Polytechnic Institute;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.wpi.edu;https://www.alibaba.com", "aff_unique_abbr": "WPI;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United States;China" }, { "id": "YbCkHTqZGn", "title": "Faithful Model Evaluation for Model-Based Metrics", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Statistical significance testing is used in natural language processing (NLP) to determine whether the results of a study or experiment are likely to be due to chance or if they reflect a genuine relationship. A key step in significance testing is the estimation of confidence interval which is a function of sample variance. Sample variance calculation is straightforward when evaluating against ground truth. However, in many cases, a metric model is often used for evaluation. For example, to compare toxicity of two large language models, a toxicity classifier is used for evaluation. Existing works usually do not consider the variance change due to metric model errors, which can lead to wrong conclusions. In this work, we establish the mathematical foundation of significance testing for model-based metrics. With experiments on public benchmark datasets and a production system, we show that considering metric model errors to calculate sample variances for model-based metrics changes the conclusions in certain experiments.", "keywords": "Evaluation;Model-Based Metrics", "primary_area": "", "supplementary_material": "", "author": "Qian Hu;Palash Goyal;Rahul Gupta", "authorids": "~Qian_Hu4;~Palash_Goyal1;~Rahul_Gupta3", "gender": "M;M;M", "homepage": ";;", "dblp": ";183/3699;", "google_scholar": "CLleKDAAAAAJ;kNeah3kAAAAJ;1CFrm2YAAAAJ", "or_profile": "~Qian_Hu4;~Palash_Goyal1;~Rahul_Gupta3", "aff": "Amazon;Amazon;Amazon", "aff_domain": "amazon.com;amazon.com;amazon.com", "position": "Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nhu2023faithful,\ntitle={Faithful Model Evaluation for Model-Based Metrics},\nauthor={Qian Hu and Palash Goyal and Rahul Gupta},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=YbCkHTqZGn}\n}", "github": "", "project": "", "reviewers": "eZXe;sWNL;ZfFZ", "site": "https://openreview.net/forum?id=YbCkHTqZGn", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;3", "excitement": "4;3;3", "reproducibility": "4;5;4", "correctness": "4;3;3", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-2455-2160;", "linkedin": ";palash-goyal-19b11445/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Amazon", "aff_unique_dep": "Amazon.com, Inc.", "aff_unique_url": "https://www.amazon.com", "aff_unique_abbr": "Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "Yg5uDwWQti", "title": "Beat LLMs at Their Own Game: Zero-Shot LLM-Generated Text Detection via Querying ChatGPT", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Large language models (LLMs), e.g., ChatGPT, have revolutionized the domain of natural language processing because of their excellent performance on various tasks. Despite their great potential, LLMs also incur serious concerns as they are likely to be misused. There are already reported cases of academic cheating by using LLMs. Thus, it is a pressing problem to identify LLM-generated texts. In this work, we design a zero-shot black-box method for detecting LLM-generated texts. The key idea is to revise the text to be detected using the ChatGPT model. Our method is based on the intuition that the ChatGPT model will make fewer revisions to LLM-generated texts than it does to human-written texts, because the texts generated by LLMs are more in accord with the generation logic and statistical patterns learned by LLMs like ChatGPT. Thus, if the text to be detected and its ChatGPT-revised version have a higher degree of similarity, the text is more likely to be LLM-generated. Extensive experiments on various datasets and tasks show that our method can effectively detect LLM-generated texts. Moreover, compared with other detection methods, our method has better generalization ability and is more stable across various datasets. The codes are publicly available at https://github.com/thunlp/LLM-generated-text-detection.", "keywords": "LLM-generated text detection;large language model", "primary_area": "", "supplementary_material": "", "author": "Biru Zhu;Lifan Yuan;Ganqu Cui;Yangyi Chen;Chong Fu;Bingxiang He;Yangdong Deng;Zhiyuan Liu;Maosong Sun;Ming Gu", "authorids": "~Biru_Zhu1;~Lifan_Yuan1;~Ganqu_Cui1;~Yangyi_Chen1;~Chong_Fu2;~Bingxiang_He1;~Yangdong_Deng1;~Zhiyuan_Liu1;~Maosong_Sun1;~Ming_Gu2", "gender": "F;;M;M;;M;M;M;M;F", "homepage": ";;https://cgq15.github.io/;https://yangyi-chen.github.io/;;https://hbx-hbx.github.io/;http://www.thss.tsinghua.edu.cn/publish/soften/3131/2014/20140115102144786540201/20140115102144786540201_.html;http://nlp.csai.tsinghua.edu.cn/~lzy;https://www.cs.tsinghua.edu.cn/csen/info/1312/4394.htm;", "dblp": "286/7971.html;;232/3064;05/10083;;322/5932;90/5987;53/3245-1;95/3291-1;76/2502-1", "google_scholar": ";;3IVSzZgAAAAJ;https://scholar.google.com/citations?hl=en;;mb36VikAAAAJ;;dT0v5u0AAAAJ;https://scholar.google.com.tw/citations?user=zIgT0HMAAAAJ;", "or_profile": "~Biru_Zhu1;~Lifan_Yuan1;~Ganqu_Cui1;~Yangyi_Chen1;~Chong_Fu2;~Bingxiang_He1;~Yangdong_Deng1;~Zhiyuan_Liu1;~Maosong_Sun1;~Ming_Gu2", "aff": "Tsinghua University;;Tsinghua University;Department of Computer Science, University of Illinois at Urbana-Champaign;;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;;tsinghua.edu.cn;cs.illinois.edu;;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;;PhD student;PhD student;;Undergrad student;Associate Professor;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhu2023beat,\ntitle={Beat {LLM}s at Their Own Game: Zero-Shot {LLM}-Generated Text Detection via Querying Chat{GPT}},\nauthor={Biru Zhu and Lifan Yuan and Ganqu Cui and Yangyi Chen and Chong Fu and Bingxiang He and Yangdong Deng and Zhiyuan Liu and Maosong Sun and Ming Gu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Yg5uDwWQti}\n}", "github": "", "project": "", "reviewers": "s23n;zd6D;EYmp", "site": "https://openreview.net/forum?id=Yg5uDwWQti", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;3", "excitement": "3;3;3", "reproducibility": "3;4;4", "correctness": "4;4;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 10, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;0000-0002-7709-2543;;", "linkedin": ";;;yangyi-chen-4006a11b2/;;;;;;", "aff_unique_index": "0;0;1;0;0;0;0;0", "aff_unique_norm": "Tsinghua University;University of Illinois Urbana-Champaign", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www.tsinghua.edu.cn;https://illinois.edu", "aff_unique_abbr": "THU;UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;1;0;0;0;0;0", "aff_country_unique": "China;United States" }, { "id": "YiITnAhKQd", "title": "Somali Information Retrieval Corpus: Bridging the Gap between Query Translation and Dedicated Language Resources", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Despite the growing use of the Somali language in various online domains, research on Somali language information retrieval remains limited and primarily relies on query translation due to the lack of a dedicated corpus. To address this problem, we collaborated with language experts and natural language processing (NLP) researchers to create an annotated corpus for Somali information retrieval. This corpus comprises 2335 documents collected from various well-known online sites, such as hiiraan online, dhacdo net, and Somali poetry books. We explain how the corpus was constructed, and develop a Somali language information retrieval system using a pseudo-relevance feedback (PRF) query expansion technique on the corpus. Note that collecting such a data set for the low-resourced Somali language can help overcome NLP barriers, such as the lack of electronically available data sets. Which, if available, can enable the development of various NLP tools and applications such as question-answering and text classification. It also provides researchers with a valuable resource for investigating and developing new techniques and approaches for Somali.", "keywords": "Somali language;low-resource;information retrieval", "primary_area": "", "supplementary_material": "", "author": "Abdisalam Mahamed Badel;Ting Zhong;Wenxin Tai;Fan Zhou", "authorids": "~Abdisalam_Mahamed_Badel1;~Ting_Zhong2;~Wenxin_Tai1;~Fan_Zhou11", "gender": ";F;M;M", "homepage": ";;https://wxtai.github.io/;https://sise.uestc.edu.cn/info/1035/9375.htm", "dblp": ";73/9481.html;284/4234;63/3122-2", "google_scholar": ";Mdr0XDkAAAAJ;YyxocAIAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN", "or_profile": "~Abdisalam_Mahamed_Badel1;~Ting_Zhong2;~Wenxin_Tai1;~Fan_Zhou11", "aff": ";University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China", "aff_domain": ";uestc.edu.cn;uestc.edu.cn;uestc.edu.cn", "position": ";Full Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nbadel2023somali,\ntitle={Somali Information Retrieval Corpus: Bridging the Gap between Query Translation and Dedicated Language Resources},\nauthor={Abdisalam Mahamed Badel and Ting Zhong and Wenxin Tai and Fan Zhou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=YiITnAhKQd}\n}", "github": "", "project": "", "reviewers": "ReAG;PkYX;gWGd", "site": "https://openreview.net/forum?id=YiITnAhKQd", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-8163-3146;0000-0001-7364-8324;0000-0002-8038-8150", "linkedin": ";;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Electronic Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "https://www.uestc.edu.cn", "aff_unique_abbr": "UESTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "YivRtscaFW", "title": "Universal Self-Adaptive Prompting", "track": "main", "status": "Long Main", "tldr": "", "abstract": "A hallmark of modern large language models (LLMs) is their impressive general zero-shot and few-shot abilities, often elicited through in-context learning (ICL) via prompting. However, while highly coveted and being the most general, zero-shot performances in LLMs are still typically weaker due to the lack of guidance and the difficulty of applying existing automatic prompt design methods in general tasks when ground-truth labels are unavailable. In this study, we address this by presenting Universal Self-Adaptive Prompting (USP), an automatic prompt design approach specifically tailored for zero-shot learning (while compatible with few-shot). Requiring only a small amount of unlabeled data and an inference-only LLM, USP is highly versatile: to achieve universal prompting, USP categorizes a possible NLP task into one of the three possible task types and then uses a corresponding selector to select the most suitable queries and zero-shot model-generated responses as pseudo-demonstrations, thereby generalizing ICL to the zero-shot setup in a fully automated way. We evaluate USP with PaLM and PaLM 2 models and demonstrate performances that are considerably stronger than standard zero-shot baselines and often comparable to or even superior to few-shot baselines across more than 40 natural language understanding, natural language generation, and reasoning tasks.", "keywords": "large language models;prompting;zero-shot;in-context learning", "primary_area": "", "supplementary_material": "", "author": "Xingchen Wan;Ruoxi Sun;Hootan Nakhost;Hanjun Dai;Julian Martin Eisenschlos;Sercan O Arik;Tomas Pfister", "authorids": "~Xingchen_Wan1;~Ruoxi_Sun2;~Hootan_Nakhost1;~Hanjun_Dai1;~Julian_Martin_Eisenschlos1;~Sercan_O_Arik1;~Tomas_Pfister1", "gender": "M;F;M;M;M;M;M", "homepage": "https://xingchen.one;;;https://hanjun-dai.github.io;https://eisenjulian.github.io/;https://www.sercanarik.com/;http://tomas.pfister.fi", "dblp": "255/7214;72/7683;56/5649;144/7311;262/3990;;14/8360", "google_scholar": "6KkohssAAAAJ;ut1-7LAAAAAJ;https://scholar.google.ca/citations?user=Bk-fDi0AAAAJ;obpl7GQAAAAJ;2uAC2NQAAAAJ;;ahSpJOAAAAAJ", "or_profile": "~Xingchen_Wan1;~Ruoxi_Sun2;~Hootan_Nakhost1;~Hanjun_Dai1;~Julian_Martin_Eisenschlos1;~Sercan_O_Arik1;~Tomas_Pfister1", "aff": "University of Oxford;Google;Google;Google Research;Universidad Nacional de C\u00f3rdoba;Google;Google", "aff_domain": "robots.ox.ac.uk;google.com;google.com;google.com;unc.edu.ar;google.com;google.com", "position": "PhD student;Google;Researcher;Researcher;PhD student;Research Scientist;Head of Research @ Cloud AI", "bibtex": "@inproceedings{\nwan2023universal,\ntitle={Universal Self-Adaptive Prompting},\nauthor={Xingchen Wan and Ruoxi Sun and Hootan Nakhost and Hanjun Dai and Julian Martin Eisenschlos and Sercan O Arik and Tomas Pfister},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=YivRtscaFW}\n}", "github": "", "project": "", "reviewers": "uuzn;GeRr;4kJc", "site": "https://openreview.net/forum?id=YivRtscaFW", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;3", "excitement": "4;4;4", "reproducibility": "2;4;4", "correctness": "4;3;3", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-0074-0597;;;;;0000-0001-6333-1729;0009-0004-4088-8718", "linkedin": ";;;hanjun-dai;eisenjulian/;;", "aff_unique_index": "0;1;1;1;2;1;1", "aff_unique_norm": "University of Oxford;Google;Universidad Nacional de C\u00f3rdoba", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.ox.ac.uk;https://www.google.com;https://www.unc.edu.ar", "aff_unique_abbr": "Oxford;Google;UNC", "aff_campus_unique_index": "1;1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;1;2;1;1", "aff_country_unique": "United Kingdom;United States;Argentina" }, { "id": "YllS5zEzVq", "title": "Probing LLMs for Joint Encoding of Linguistic Categories", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large Language Models (LLMs) exhibit impressive performance on a range of NLP tasks, due to the general-purpose linguistic knowledge acquired during pretraining. Existing model interpretability research (Tenney et al., 2019) suggests that a linguistic hierarchy emerges in the LLM layers, with lower layers better suited to solving syntactic tasks and higher layers employed for semantic processing. Yet, little is known about how encodings of different linguistic phenomena interact within the models and to what extent processing of linguistically-related categories relies on the same, shared model representations. In this paper, we propose a framework for testing the joint encoding of linguistic categories in LLMs. Focusing on syntax, we find evidence of joint encoding both at the same (related part-of-speech (POS) classes) and different (POS classes and related syntactic dependency relations) levels of linguistic hierarchy. Our cross-lingual experiments show that the same patterns hold across languages in multilingual LLMs.", "keywords": "language model;transformer;interpretability;probing;syntax;pos;dependency;multilingual", "primary_area": "", "supplementary_material": "", "author": "Giulio Starace;Konstantinos Papakostas;Rochelle Choenni;Apostolos Panagiotopoulos;Matteo Rosati;Alina Leidinger;Ekaterina Shutova", "authorids": "~Giulio_Starace1;~Konstantinos_Papakostas1;~Rochelle_Choenni1;~Apostolos_Panagiotopoulos1;~Matteo_Rosati1;~Alina_Leidinger1;~Ekaterina_Shutova1", "gender": "M;M;F;M;M;;F", "homepage": "https://www.giuliostarace.com;https://din0s.me;https://rochellechoenni.github.io/;;;;https://www.shutova.org/", "dblp": "321/9886;344/5427;238/0597;307/2685.html;;;33/8156", "google_scholar": "S57H0dgAAAAJ;a-ZfdusAAAAJ;https://scholar.google.nl/citations?user=-_WbyoMAAAAJ;https://scholar.google.gr/citations?user=0bU9eNEAAAAJ;;;jqOFBGoAAAAJ", "or_profile": "~Giulio_Starace1;~Konstantinos_Papakostas1;~Rochelle_Choenni1;~Apostolos_Panagiotopoulos1;~Matteo_Rosati1;~Alina_Leidinger1;~Ekaterina_Shutova1", "aff": "University of Amsterdam;University of Amsterdam;University of Amsterdam;University of Amsterdam;University of Amsterdam;;University of Amsterdam", "aff_domain": "uva.nl;uva.nl;uva.nl;uva.nl;uva.nl;;uva.nl", "position": "MS student;MS student;PhD student;MS student;MS student;;Associate Professor", "bibtex": "@inproceedings{\nstarace2023probing,\ntitle={Probing {LLM}s for Joint Encoding of Linguistic Categories},\nauthor={Giulio Starace and Konstantinos Papakostas and Rochelle Choenni and Apostolos Panagiotopoulos and Matteo Rosati and Alina Leidinger and Ekaterina Shutova},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=YllS5zEzVq}\n}", "github": "", "project": "", "reviewers": "ZArK;HBxT;neqX", "site": "https://openreview.net/forum?id=YllS5zEzVq", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;3;3", "reproducibility": "4;4;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-5284-4238;;;;;;", "linkedin": "https://linkedin.com/in/giuliostarace;https://linkedin.com/in/dinos-papakostas/;;apostpanag/;matteo-rosati-49233a170/;;", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of Amsterdam", "aff_unique_dep": "", "aff_unique_url": "https://www.uva.nl", "aff_unique_abbr": "UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Netherlands" }, { "id": "Ynxo6lene2", "title": "Symbolic Planning and Code Generation for Grounded Dialogue", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) excel at processing and generating text and code. However, LLMs have had limited applicability in grounded task-oriented dialogue as they are difficult to steer toward task objectives and fail to handle novel grounding. We present a modular and interpretable grounded dialogue system that addresses these shortcomings by composing LLMs with a symbolic planner and grounded code execution. Our system, consists of a reader and planner: the reader leverages an LLM to convert partner utterances into executable code, calling functions that perform grounding. The translated code's output is stored to track dialogue state, while a symbolic planner determines the next appropriate response. We evaluate our system's performance on the demanding OneCommon dialogue task, involving collaborative reference resolution on abstract images of scattered dots. Our system substantially outperforms the previous state-of-the-art, including improving task success in human evaluations from 56\\% to 69\\% in the most challenging setting.", "keywords": "dialogue;information;planning;code;grounding", "primary_area": "", "supplementary_material": "", "author": "Justin T Chiu;Wenting Zhao;Derek Chen;Saujas Vaduguru;Alexander M Rush;Daniel Fried", "authorids": "~Justin_T_Chiu1;~Wenting_Zhao1;~Derek_Chen1;~Saujas_Vaduguru1;~Alexander_M_Rush1;~Daniel_Fried1", "gender": ";M;M;M;M;M", "homepage": ";https://www.morethanoneturn.com;https://saujasv.github.io/;http://rush.seas.harvard.edu/;https://dpfried.github.io/;", "dblp": "41/10049-2.html;225/7737;294/8886;http://dblp.uni-trier.de/pers/hd/r/Rush:Alexander_M=;117/4804;278/2437.html", "google_scholar": "sycHskQAAAAJ;Xmv0998AAAAJ;U2MUXuMAAAAJ;LIjnUGgAAAAJ;sJDqACEAAAAJ;043r6toAAAAJ", "or_profile": "~Wenting_Zhao1;~Derek_Chen1;~Saujas_Vaduguru1;~Alexander_M_Rush1;~Daniel_Fried1;~Justin_Chiu1", "aff": "Cornell University;Columbia University;Carnegie Mellon University;School of Engineering and Applied Sciences, Harvard University;Carnegie Mellon University;Cornell University", "aff_domain": "cornell.edu;columbia.edu;cmu.edu;seas.harvard.edu;cmu.edu;cornell.edu", "position": "PhD student;Researcher;PhD student;Assistant Professor;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nchiu2023symbolic,\ntitle={Symbolic Planning and Code Generation for Grounded Dialogue},\nauthor={Justin T Chiu and Wenting Zhao and Derek Chen and Saujas Vaduguru and Alexander M Rush and Daniel Fried},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Ynxo6lene2}\n}", "github": "", "project": "", "reviewers": "pMuZ;soD2;mUba", "site": "https://openreview.net/forum?id=Ynxo6lene2", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "3;3;4", "reproducibility": "3;2;3", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-9900-1606;;", "linkedin": ";derekchen14/;;sasha-rush-a69b6917/;;", "aff_unique_index": "0;1;2;3;2;0", "aff_unique_norm": "Cornell University;Columbia University;Carnegie Mellon University;Harvard University", "aff_unique_dep": ";;;School of Engineering and Applied Sciences", "aff_unique_url": "https://www.cornell.edu;https://www.columbia.edu;https://www.cmu.edu;https://www.harvard.edu", "aff_unique_abbr": "Cornell;Columbia;CMU;Harvard", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "YoKptDpMtt", "title": "Effects of sub-word segmentation on performance of transformer language models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Language modeling is a fundamental task in natural language processing, which has been thoroughly explored with various architectures and hyperparameters. However, few studies focus on the effect of sub-word segmentation on the performance of language models (LMs). In this paper, we compare GPT and BERT models trained with the statistical segmentation algorithm BPE vs. two unsupervised algorithms for morphological segmentation \u2014 Morfessor and StateMorph. We train the models for several languages \u2014 including ones with very rich morphology \u2014 and compare their performance with different segmentation algorithms, vocabulary sizes, and model sizes.\nThe results show that training with morphological segmentation allows the LMs to:\n(1) achieve lower perplexity,\n(2) converge more efficiently in terms of training time,\nand\n(3) achieve equivalent or better evaluation scores on downstream tasks. \nLastly, we show that (4) LMs of smaller size using morphological segmentation can perform comparably to models of larger size trained with BPE \u2014 both in terms of (1) perplexity and (3) scores on downstream tasks. Points (2) and (4) impact on sustainability, since they reduce the model cost; and while 2 reduces cost only in the training phase, 4 does so also in the inference phase.", "keywords": "Language modeling;Natural language processing;Morphological segmentation", "primary_area": "", "supplementary_material": "", "author": "Jue Hou;Anisia Katinskaia;Anh-Duc Vu;Roman Yangarber", "authorids": "~Jue_Hou1;~Anisia_Katinskaia1;~Anh-Duc_Vu1;~Roman_Yangarber2", "gender": "M;F;M;M", "homepage": ";https://researchportal.helsinki.fi/en/persons/anisia-katinskaia;;", "dblp": ";204/1361;56/4582.html;", "google_scholar": "_TTPb5EAAAAJ;Fefa8gkAAAAJ;jb5mzrwAAAAJ;K33mwBEAAAAJ", "or_profile": "~Jue_Hou1;~Anisia_Katinskaia1;~Roman_Yangarber2;~Duc_Anh_Vu2", "aff": "University of Helsinki;University of Helsinki;University of Helsinki;University of Helsinki, University of Helsinki", "aff_domain": "helsinki.fi;helsinki.fi;helsinki.fi;cs.helsinki.fi", "position": "PhD student;PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nhou2023effects,\ntitle={Effects of sub-word segmentation on performance of transformer language models},\nauthor={Jue Hou and Anisia Katinskaia and Anh-Duc Vu and Roman Yangarber},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=YoKptDpMtt}\n}", "github": "", "project": "", "reviewers": "knhq;HPcL;VFFv", "site": "https://openreview.net/forum?id=YoKptDpMtt", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "4;4;3", "reproducibility": "4;4;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-9404-2022;;0000-0001-5264-9870;0009-0005-1186-0510", "linkedin": "https://linkedin.com/in/jue-hou-8bb238136;anisiakatinskaia/;;anh-duc-vu-b37627158/", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Helsinki", "aff_unique_dep": "", "aff_unique_url": "https://www.helsinki.fi", "aff_unique_abbr": "UH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Finland" }, { "id": "YokfK5VOoz", "title": "Copyright Violations and Large Language Models", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Language models may memorize more than just facts, including entire chunks of texts seen during training. Fair use exemptions to copyright laws typically allow for limited use of copyrighted material without permission from the copyright holder, but typically for extraction of information from copyrighted materials, rather than {\\em verbatim} reproduction. This work explores the issue of copyright violations and large language models through the lens of verbatim memorization, focusing on possible redistribution of copyrighted text. We present experiments with a range of language models over a collection of popular books and coding problems, providing a conservative characterization of the extent to which language models can redistribute these materials. Overall, this research highlights the need for further examination and the potential impact on future developments in natural language processing to ensure adherence to copyright regulations. Code is at https://github.com/coastalcph/CopyrightLLMs.", "keywords": "language models;copyright;NLP;LLMs", "primary_area": "", "supplementary_material": "", "author": "Antonia Karamolegkou;Jiaang Li;Li Zhou;Anders S\u00f8gaard", "authorids": "~Antonia_Karamolegkou1;~Jiaang_Li3;~Li_Zhou4;~Anders_S\u00f8gaard1", "gender": "F;;F;M", "homepage": "https://antoniakrm.github.io/;;https://lizhou21.github.io/;https://anderssoegaard.github.io/", "dblp": "294/2937;;;30/2756", "google_scholar": "hibFL4QAAAAJ;;https://scholar.google.com.hk/citations?user=BLWhoYcAAAAJ;https://scholar.google.com.tw/citations?user=x3I4CrYAAAAJ", "or_profile": "~Antonia_Karamolegkou1;~Jiaang_Li3;~Li_Zhou4;~Anders_S\u00f8gaard1", "aff": "University of Copenhagen;;University of Electronic Science and Technology of China;Copenhagen University", "aff_domain": "diku.dk;;uestc.edu.cn;ku.dk", "position": "PhD student;;PhD student;Full Professor", "bibtex": "@inproceedings{\nkaramolegkou2023copyright,\ntitle={Copyright Violations and Large Language Models},\nauthor={Antonia Karamolegkou and Jiaang Li and Li Zhou and Anders S{\\o}gaard},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=YokfK5VOoz}\n}", "github": "", "project": "", "reviewers": "AggV;AChs;8WJ6", "site": "https://openreview.net/forum?id=YokfK5VOoz", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;2;3", "excitement": "3;4;4", "reproducibility": "4;5;2", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-6458-0986;;;", "linkedin": ";;;", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Copenhagen;University of Electronic Science and Technology of China", "aff_unique_dep": ";", "aff_unique_url": "https://www.ku.dk;https://www.uestc.edu.cn", "aff_unique_abbr": "UCPH;UESTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Denmark;China" }, { "id": "Yt4QAWQJ2o", "title": "Stylized Dialogue Generation with Feature-Guided Knowledge Augmentation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Stylized dialogue generation systems aim to produce coherent and context-aware dialogues while effectively emulating the desired style. \nGenerating stylized dialogue is valuable yet challenging due to the scarce parallel data. Existing methods often synthesize pseudo data through back translation, yet suffer from noisy and context-agnostic style signals caused by insufficient guidance on target style features. \nTo address this, we propose the knowledge-augmented stylized dialogue generation model, which includes a feature-guided style knowledge selection module that utilizes context and response features. Specifically, we retrieve dialogue-related style sentences from style corpus to explicitly provide clear style signals. We design a feature-guided selection module with response-related contrastive learning and style responsiveness Kullback-Leibler losses to enhance generation at both semantic and stylized levels. Our approach demonstrates satisfactory performance on two public stylized dialogue benchmarks in both automatic and human evaluations.", "keywords": "Stylized Dialogue Generation;Knowledge Augmentation;Feature-guided Selection", "primary_area": "", "supplementary_material": "", "author": "Jinpeng Li;Zekai Zhang;Xiuying Chen;Dongyan Zhao;Rui Yan", "authorids": "~Jinpeng_Li2;~Zekai_Zhang4;~Xiuying_Chen1;~Dongyan_Zhao2;~Rui_Yan2", "gender": "M;F;M;M;M", "homepage": ";https://iriscxy.github.io/;https://www.wict.pku.edu.cn/zhaodongyan/en/;https://gsai.ruc.edu.cn/english/ruiyan;", "dblp": ";33/11343.html;63/1870;19/2405-1;95/2448-3.html", "google_scholar": ";COUnAF4AAAAJ;lhR8-68AAAAJ;eLw6g-UAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Zekai_Zhang4;~Xiuying_Chen1;~Dongyan_Zhao2;~Rui_Yan2;~Jin_Peng_Li1", "aff": ";King Abdullah University of Science and Technology;Peking University;Renmin University of China;Peking University", "aff_domain": ";kaust.edu.sa;pku.edu.cn;ruc.edu.cn;pku.edu.cn", "position": ";PhD student;Full Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nli2023stylized,\ntitle={Stylized Dialogue Generation with Feature-Guided Knowledge Augmentation},\nauthor={Jinpeng Li and Zekai Zhang and Xiuying Chen and Dongyan Zhao and Rui Yan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Yt4QAWQJ2o}\n}", "github": "", "project": "", "reviewers": "Dqb9;ZsDS;ehjU", "site": "https://openreview.net/forum?id=Yt4QAWQJ2o", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "3;3;3", "reproducibility": "4;4;4", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-3356-6823;0000-0003-4501-5110", "linkedin": "zekai-zhang-65924526a/;;;;", "aff_unique_index": "0;1;2;1", "aff_unique_norm": "King Abdullah University of Science and Technology;Peking University;Renmin University of China", "aff_unique_dep": ";;", "aff_unique_url": "https://www.kast.kau.edu.sa;http://www.pku.edu.cn;http://www.ruc.edu.cn", "aff_unique_abbr": "KAUST;Peking U;RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Saudi Arabia;China" }, { "id": "YvzA0hFCF3", "title": "A Challenging Multimodal Video Summary: Simultaneously Extracting and Generating Keyframe-Caption Pairs from Video", "track": "main", "status": "Long Main", "tldr": "", "abstract": "This paper proposes a practical multimodal video summarization task setting and a dataset to train and evaluate the task. The target task involves summarizing a given video into a predefined number of keyframe-caption pairs and displaying them in a listable format to grasp the video content quickly. This task aims to extract crucial scenes from the video in the form of images (keyframes) and generate corresponding captions explaining each keyframe's situation. This task is useful as a practical application and presents a highly challenging problem worthy of study. Specifically, achieving simultaneous optimization of the keyframe selection performance and caption quality necessitates careful consideration of the mutual dependence on both preceding and subsequent keyframes and captions. To facilitate subsequent research in this field, we also construct a dataset by expanding upon existing datasets and propose an evaluation framework.\nFurthermore, we develop two baseline systems and report their respective performance.", "keywords": "Video Summarization;Multimodality", "primary_area": "", "supplementary_material": "", "author": "Keito Kudo;Haruki Nagasawa;Jun Suzuki;Nobuyuki Shimizu", "authorids": "~Keito_Kudo1;~Haruki_Nagasawa1;~Jun_Suzuki1;~Nobuyuki_Shimizu1", "gender": "M;M;M;M", "homepage": "http://www.cl.ecei.tohoku.ac.jp/~keitonlp/;https://right-lynx-4dd.notion.site/92049ea770c1473386ae2a6ba469c3da?pvs=4;https://www.nlp.ecei.tohoku.ac.jp/~jun/;", "dblp": "340/3857;;78/6923;84/2107", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;https://scholar.google.co.jp/citations?user=XO5CrIsAAAAJ;https://scholar.google.co.jp/citations?user=_3zBwwQAAAAJ", "or_profile": "~Keito_Kudo1;~Haruki_Nagasawa1;~Jun_Suzuki1;~Nobuyuki_Shimizu1", "aff": "Tohoku University;Tohoku University;Tohoku University;Yahoo Japan Corporation", "aff_domain": "tohoku.ac.jp;tohoku.ac.jp;tohoku.ac.jp;yahoo-corp.jp", "position": "MS student;MS student;Full Professor;Researcher", "bibtex": "@inproceedings{\nkudo2023a,\ntitle={A Challenging Multimodal Video Summary: Simultaneously Extracting and Generating Keyframe-Caption Pairs from Video},\nauthor={Keito Kudo and Haruki Nagasawa and Jun Suzuki and Nobuyuki Shimizu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=YvzA0hFCF3}\n}", "github": "", "project": "", "reviewers": "nTTm;wL5Q;ZpMr", "site": "https://openreview.net/forum?id=YvzA0hFCF3", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;5", "excitement": "4;3;4", "reproducibility": "4;4;5", "correctness": "5;3;4", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-2108-1340;", "linkedin": ";;;", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Tohoku University;Yahoo Japan Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.tohoku.ac.jp;https://www.yahoo.co.jp", "aff_unique_abbr": "Tohoku U;Yahoo Japan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Japan" }, { "id": "Yz4VKLeZMG", "title": "From Heuristic to Analytic: Cognitively Motivated Strategies for Coherent Physical Commonsense Reasoning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Pre-trained language models (PLMs) have shown impressive performance in various language tasks. However, they are prone to spurious correlations, and often generate illusory information. In real-world applications, PLMs should justify decisions with formalized, coherent reasoning chains, but this challenge remains under-explored. Cognitive psychology theorizes that humans are capable of utilizing fast and intuitive *heuristic* thinking to make decisions based on past experience, then rationalizing the decisions through slower and deliberative *analytic* reasoning. We incorporate these interlinked dual processes in fine-tuning and in-context learning with PLMs, applying them to two language understanding tasks that require coherent physical commonsense reasoning. We show that our proposed Heuristic-Analytic Reasoning (HAR) strategies drastically improve the coherence of rationalizations for model decisions, yielding state-of-the-art results on Tiered Reasoning for Intuitive Physics (TRIP). We also find that this improved coherence is a direct result of more faithful attention to relevant language context in each step of reasoning. Our findings suggest that human-like reasoning strategies can effectively improve the coherence and reliability of PLM reasoning.", "keywords": "commonsense reasoning;physical commonsense;language model analysis;cognitive modeling", "primary_area": "", "supplementary_material": "", "author": "Zheyuan Zhang;Shane Storks;Fengyuan Hu;Sungryull Sohn;Moontae Lee;Honglak Lee;Joyce Chai", "authorids": "~Zheyuan_Zhang4;~Shane_Storks1;~Fengyuan_Hu2;~Sungryull_Sohn1;~Moontae_Lee1;~Honglak_Lee2;~Joyce_Chai2", "gender": "M;M;;M;;;", "homepage": "https://cozheyuanzhangde.github.io/;https://www.shanestorks.com;;;https://moontae.people.uic.edu;;", "dblp": ";239/4098;;172/9884;132/1761;;", "google_scholar": "hYMYxOQAAAAJ;;;https://scholar.google.com/citations?hl=en;BMvYy9cAAAAJ;;", "or_profile": "~Zheyuan_Zhang4;~Shane_Storks1;~Fengyuan_Hu2;~Sungryull_Sohn1;~Moontae_Lee1;~Honglak_Lee2;~Joyce_Chai2", "aff": "University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;;LG AI Research;University of Illinois, Chicago;;", "aff_domain": "umich.edu;umich.edu;;lgresearch.ai;uic.edu;;", "position": "MS student;PhD student;;Researcher;Assistant Professor;;", "bibtex": "@inproceedings{\nzhang2023from,\ntitle={From Heuristic to Analytic: Cognitively Motivated Strategies for Coherent Physical Commonsense Reasoning},\nauthor={Zheyuan Zhang and Shane Storks and Fengyuan Hu and Sungryull Sohn and Moontae Lee and Honglak Lee and Joyce Chai},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Yz4VKLeZMG}\n}", "github": "", "project": "", "reviewers": "5psW;CnKW;nRZC;KHzS", "site": "https://openreview.net/forum?id=Yz4VKLeZMG", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;4;3;3", "excitement": "3;4;4;3", "reproducibility": "4;4;4;4", "correctness": "4;3;3;4", "rating_avg": 4.0, "confidence_avg": 3.5, "excitement_avg": 3.5, "reproducibility_avg": 4.0, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0001-5542-3463;;", "linkedin": "zheyuan-brian-zhang;;;;moontae-lee-975248123/;;", "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of Michigan;LG;University of Illinois at Chicago", "aff_unique_dep": ";LG AI Research;", "aff_unique_url": "https://www.umich.edu;https://www.lgaires.com;https://www.uic.edu", "aff_unique_abbr": "UM;LG AI;UIC", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Ann Arbor;;Chicago", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;South Korea" }, { "id": "Yzi6LM20E2", "title": "Two Directions for Clinical Data Generation with Large Language Models: Data-to-Label and Label-to-Data", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models (LLMs) can generate natural language texts for various domains and tasks, but their potential for clinical text mining, a domain with scarce, sensitive, and imbalanced medical data, is under-explored. We investigate whether LLMs can augment clinical data for detecting Alzheimer's Disease (AD)-related signs and symptoms from electronic health records (EHRs), a challenging task that requires high expertise. We create a novel pragmatic taxonomy for AD sign and symptom progression based on expert knowledge and generated three datasets: (1) a gold dataset annotated by human experts on longitudinal EHRs of AD patients; (2) a silver dataset created by the data-to-label method, which labels sentences from a public EHR collection with AD-related signs and symptoms; and (3) a bronze dataset created by the label-to-data method which generates sentences with AD-related signs and symptoms based on the label definition.\nWe train a system to detect AD-related signs and symptoms from EHRs. We find that the silver and bronze datasets improves the system performance, outperforming the system using only the gold dataset. This shows that LLMs can generate synthetic clinical data for a complex task by incorporating expert knowledge, and our label-to-data method can produce datasets that are free of sensitive information, while maintaining acceptable quality.", "keywords": "Large language model;clinical text processing;Alzheimer's Disease;data augmentation", "primary_area": "", "supplementary_material": "", "author": "Rumeng Li;Xun Wang;hong yu", "authorids": "~Rumeng_Li1;~Xun_Wang5;~hong_yu1", "gender": "F;;F", "homepage": ";;http://bio-nlp.org/", "dblp": "153/9602;;", "google_scholar": ";;TyXe64wAAAAJ", "or_profile": "~Rumeng_Li1;~Xun_Wang5;~hong_yu1", "aff": "University of Massachusetts, Amherst;;University of Massachusetts at Lowell", "aff_domain": "umass.edu;;uml.edu", "position": "PhD student;;Full Professor", "bibtex": "@inproceedings{\nli2023two,\ntitle={Two Directions for Clinical Data Generation with Large Language Models: Data-to-Label and Label-to-Data},\nauthor={Rumeng Li and Xun Wang and hong yu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Yzi6LM20E2}\n}", "github": "", "project": "", "reviewers": "AkGZ;Qqnk;5zTH", "site": "https://openreview.net/forum?id=Yzi6LM20E2", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;5;3", "excitement": "2;3;2", "reproducibility": "2;1;2", "correctness": "2;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.3333333333333335, "reproducibility_avg": 1.6666666666666667, "correctness_avg": 2.6666666666666665, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;1", "aff_unique_norm": "University of Massachusetts Amherst;University of Massachusetts Lowell", "aff_unique_dep": ";", "aff_unique_url": "https://www.umass.edu;https://www.uml.edu", "aff_unique_abbr": "UMass Amherst;UMass Lowell", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Amherst;Lowell", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "Z1wGHeHBrk", "title": "VIPHY: Probing \u201cVisible\u201d Physical Commonsense Knowledge", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Vision-language models (VLMs) have shown remarkable performance on visual reasoning tasks (e.g. attributes, location). While such tasks measure the requisite knowledge to ground and reason over a given visual instance, they do not, however, measure the ability of VLMs to retain and generalize such knowledge. In this work, we evaluate VLMs' ability to acquire \"visible\" physical knowledge -- the information that is easily accessible from images of static scenes, particularly along the dimensions of object color, size, and space. We build an automatic pipeline to derive a comprehensive knowledge resource for calibrating and probing these models. Our results indicate a severe gap between model and human performance across all three dimensions. Furthermore, we demonstrate that a caption pretrained LM significantly outperforms VLMs on both size and spatial tasks -- highlighting that despite sufficient access to ground language with visual modality, they struggle to retain such knowledge.", "keywords": "Commonsense;Evaluation;Vision-Language Model", "primary_area": "", "supplementary_material": "", "author": "Shikhar Singh;Ehsan Qasemi;Muhao Chen", "authorids": "~Shikhar_Singh1;~Ehsan_Qasemi1;~Muhao_Chen1", "gender": ";M;M", "homepage": ";http://ehsanqasemi.com/;https://muhaochen.github.io/", "dblp": ";161/4621.html;173/2608", "google_scholar": "4lf0UeoAAAAJ;2snRpBQAAAAJ;k79yEZkAAAAJ", "or_profile": "~Shikhar_Singh1;~Ehsan_Qasemi1;~Muhao_Chen1", "aff": "University of Southern California;USC/ISI;University of Southern California", "aff_domain": "usc.edu;isi.edu;usc.edu", "position": "MS student;Researcher;Assistant Research Professor", "bibtex": "@inproceedings{\nsingh2023viphy,\ntitle={{VIPHY}: Probing {\\textquotedblleft}Visible{\\textquotedblright} Physical Commonsense Knowledge},\nauthor={Shikhar Singh and Ehsan Qasemi and Muhao Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Z1wGHeHBrk}\n}", "github": "", "project": "", "reviewers": "Vefi;2fNQ;jjqR;btZN", "site": "https://openreview.net/forum?id=Z1wGHeHBrk", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;4;3;3", "excitement": "3;4;2;2", "reproducibility": "4;4;4;5", "correctness": "3;3;3;4", "rating_avg": 3.0, "confidence_avg": 3.25, "excitement_avg": 2.75, "reproducibility_avg": 4.25, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-0118-3147", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "", "aff_unique_url": "https://www.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Los Angeles;ISI", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "Z2JBNkaJ7k", "title": "CiteBench: A Benchmark for Scientific Citation Text Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Science progresses by building upon the prior body of knowledge documented in scientific publications. The acceleration of research makes it hard to stay up-to-date with the recent developments and to summarize the ever-growing body of prior work. To address this, the task of citation text generation aims to produce accurate textual summaries given a set of papers-to-cite and the citing paper context. Due to otherwise rare explicit anchoring of cited documents in the citing paper, citation text generation provides an excellent opportunity to study how humans aggregate and synthesize textual knowledge from sources. Yet, existing studies are based upon widely diverging task definitions, which makes it hard to study this task systematically. To address this challenge, we propose CiteBench: a benchmark for citation text generation that unifies multiple diverse datasets and enables standardized evaluation of citation text generation models across task designs and domains. Using the new benchmark, we investigate the performance of multiple strong baselines, test their transferability between the datasets, and deliver new insights into the task definition and evaluation to guide future research in citation text generation. We make the code for CiteBench publicly available at https://github.com/UKPLab/citebench.", "keywords": "related work generation;citation text generation;scientific document processing;multi-document summarization;summarization;benchmark;evaluation", "primary_area": "", "supplementary_material": "", "author": "Martin Funkquist;Ilia Kuznetsov;Yufang Hou;Iryna Gurevych", "authorids": "~Martin_Funkquist2;~Ilia_Kuznetsov1;~Yufang_Hou2;~Iryna_Gurevych1", "gender": "M;;F;", "homepage": ";;https://yufanghou.github.io/;", "dblp": "336/4475.html;;;", "google_scholar": "66O8kRAAAAAJ;;-fBym-EAAAAJ;", "or_profile": "~Martin_Funkquist2;~Ilia_Kuznetsov1;~Yufang_Hou2;~Iryna_Gurevych1", "aff": "Link\u00f6ping University;;IBM Research Ireland;", "aff_domain": "liu.se;;ibm.com;", "position": "PhD student;;Principal Researcher;", "bibtex": "@inproceedings{\nfunkquist2023citebench,\ntitle={CiteBench: A Benchmark for Scientific Citation Text Generation},\nauthor={Martin Funkquist and Ilia Kuznetsov and Yufang Hou and Iryna Gurevych},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Z2JBNkaJ7k}\n}", "github": "", "project": "", "reviewers": "Z3w2;cNNt;GP3Z", "site": "https://openreview.net/forum?id=Z2JBNkaJ7k", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;5;4", "excitement": "4;3;5", "reproducibility": "5;3;5", "correctness": "4;3;5", "rating_avg": 4.0, "confidence_avg": 4.666666666666667, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-2354-1486;;;", "linkedin": "martin-funkquist-534a49b3/;;;", "aff_unique_index": "0;1", "aff_unique_norm": "Link\u00f6ping University;IBM", "aff_unique_dep": ";Research", "aff_unique_url": "https://www.liu.se;https://www.ibm.com/research", "aff_unique_abbr": "LiU;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Sweden;Ireland" }, { "id": "Z5VthlliRt", "title": "A Study on Accessing Linguistic Information in Pre-Trained Language Models by Using Prompts", "track": "main", "status": "Short Main", "tldr": "", "abstract": "We study whether linguistic information in pre-trained multilingual language models can be accessed by human language: So far, there is no easy method to directly obtain linguistic information and gain insights into the linguistic principles encoded in such models. We use the technique of prompting and formulate linguistic tasks to test the LM\u2019s access to explicit grammatical principles and study how effective this method is at providing access to linguistic features. Our experiments on German, Icelandic and Spanish show that some linguistic properties can in fact be accessed through prompting, whereas others are harder to capture.", "keywords": "prompting for linguistic information;morphology;morphological features", "primary_area": "", "supplementary_material": "", "author": "Marion Di Marco;Katharina H\u00e4mmerl;Alexander Fraser", "authorids": "~Marion_Di_Marco1;~Katharina_H\u00e4mmerl1;~Alexander_Fraser1", "gender": "F;;M", "homepage": "https://www.cis.lmu.de/~dimarco/;;https://alexfraser.github.io/", "dblp": "60/8154;316/9896;145/8377.html", "google_scholar": ";nEet7wwAAAAJ;4ZIZK08AAAAJ", "or_profile": "~Marion_Di_Marco1;~Katharina_H\u00e4mmerl1;~Alexander_Fraser1", "aff": "LMU M\u00fcnchen, Center for Information and Language Processing;CIS, LMU Munich;LMU Munich", "aff_domain": "lmu.de;lmu.de;lmu.de", "position": "Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\nmarco2023a,\ntitle={A Study on Accessing Linguistic Information in Pre-Trained Language Models by Using Prompts},\nauthor={Marion Di Marco and Katharina H{\\\"a}mmerl and Alexander Fraser},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Z5VthlliRt}\n}", "github": "", "project": "", "reviewers": "uGEU;CAHM;SkRy", "site": "https://openreview.net/forum?id=Z5VthlliRt", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;4;4", "reproducibility": "3;4;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";kat-haem/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Ludwig Maximilian University of Munich", "aff_unique_dep": "Center for Information and Language Processing", "aff_unique_url": "https://www.lmu.de", "aff_unique_abbr": "LMU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Munich", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "Z65Wq2dvxB", "title": "Speak, Memory: An Archaeology of Books Known to ChatGPT/GPT-4", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In this work, we carry out a data archaeology to infer books that are known to ChatGPT and GPT-4 using a name cloze membership inference query. We find that OpenAI models have memorized a wide collection of copyrighted materials, and that the degree of memorization is tied to the frequency with which passages of those books appear on the web. The ability of these models to memorize an unknown set of books complicates assessments of measurement validity for cultural analytics by contaminating test data; we show that models perform much better on memorized books than on non-memorized books for downstream tasks. We argue that this supports a case for open models whose training data is known.", "keywords": "membership inference;memorization;cultural analytics;large language models", "primary_area": "", "supplementary_material": "", "author": "Kent K. Chang;Mackenzie Hanh Cramer;Sandeep Soni;David Bamman", "authorids": "~Kent_K._Chang1;~Mackenzie_Hanh_Cramer1;~Sandeep_Soni1;~David_Bamman1", "gender": "M;F;M;M", "homepage": "https://kentkc.org;;http://sandeepsoni.github.io/;http://people.ischool.berkeley.edu/~dbamman/", "dblp": "345/9888;;130/2538;39/5799", "google_scholar": "Lp9MREUAAAAJ;;_OzUlMkAAAAJ;https://scholar.google.com.tw/citations?user=RkA1y54AAAAJ", "or_profile": "~Kent_K._Chang1;~Mackenzie_Hanh_Cramer1;~Sandeep_Soni1;~David_Bamman1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;berkeley.edu", "position": "PhD student;Undergrad student;Postdoc;Associate Professor", "bibtex": "@inproceedings{\nchang2023speak,\ntitle={Speak, Memory: An Archaeology of Books Known to Chat{GPT}/{GPT}-4},\nauthor={Kent K. Chang and Mackenzie Hanh Cramer and Sandeep Soni and David Bamman},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Z65Wq2dvxB}\n}", "github": "", "project": "", "reviewers": "swER;gM9J;2wkf", "site": "https://openreview.net/forum?id=Z65Wq2dvxB", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;4;4", "reproducibility": "3;5;5", "correctness": "4;4;5", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";mackenzie-cramer-068134225/;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "Z7O1kA3pjB", "title": "Summarizing Multiple Documents with Conversational Structure for Meta-Review Generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We present PeerSum, a novel dataset for generating meta-reviews of scientific papers. The meta-reviews can be interpreted as abstractive summaries of reviews, multi-turn discussions and the paper abstract. These source documents have a rich inter-document relationship with an explicit hierarchical conversational structure, cross-references and (occasionally) conflicting information. To introduce the structural inductive bias into pre-trained language models, we introduce RAMMER (Relationship-aware Multi-task Meta-review Generator), a model that uses sparse attention based on the conversational structure and a multi-task training objective that predicts metadata features (e.g., review ratings). Our experimental results show that RAMMER outperforms other strong baseline models in terms of a suite of automatic evaluation metrics. Further analyses, however, reveal that RAMMER and other models struggle to handle conflicts in source documents, suggesting meta-review generation is a challenging task and a promising avenue for further research.", "keywords": "Multi-document Summarization;Text Generation;Multi-task Learning;Meta-review Generation;Inter-document Relationships", "primary_area": "", "supplementary_material": "", "author": "Miao Li;Eduard Hovy;Jey Han Lau", "authorids": "~Miao_Li2;~Eduard_Hovy1;~Jey_Han_Lau2", "gender": "M;;M", "homepage": "https://oaimli.github.io/;https://jeyhan.my/;http://www.cs.cmu.edu/~hovy", "dblp": ";32/9014.html;47/2454", "google_scholar": "ySkFXwoAAAAJ;https://scholar.google.com.au/citations?user=MFi65f4AAAAJ;https://scholar.google.com.tw/citations?user=PUFxrroAAAAJ", "or_profile": "~Miao_Li2;~Jey_Han_Lau2;~Eduard_H_Hovy1", "aff": "The University of Melbourne;The University of Melbourne;Carnegie Mellon University", "aff_domain": "unimelb.edu.au;unimelb.edu.au;cmu.edu", "position": "PhD student;Senior Lecturer;Adjunct Professor", "bibtex": "@inproceedings{\nli2023summarizing,\ntitle={Summarizing Multiple Documents with Conversational Structure for Meta-Review Generation},\nauthor={Miao Li and Eduard Hovy and Jey Han Lau},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Z7O1kA3pjB}\n}", "github": "", "project": "", "reviewers": "h33m;ULo8;iSYN", "site": "https://openreview.net/forum?id=Z7O1kA3pjB", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "4;4;3", "reproducibility": "4;5;3", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-1669-7063;0000-0002-1647-4628;", "linkedin": "oaimli/;;", "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Melbourne;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.unimelb.edu.au;https://www.cmu.edu", "aff_unique_abbr": "UniMelb;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Australia;United States" }, { "id": "Z8p4FX15fa", "title": "Simple Temporal Adaptation to Changing Label Sets: Hashtag Prediction via Dense KNN", "track": "main", "status": "Short Main", "tldr": "", "abstract": "User-generated social media data is constantly changing as new trends influence online discussion and personal information is deleted due to privacy concerns.\nHowever, traditional NLP models rely on fixed training datasets, which means they are unable to adapt to temporal change---both test distribution shift and deleted training data---without frequent, costly re-training.\nIn this paper, we study temporal adaptation through the task of longitudinal hashtag prediction and propose a non-parametric dense retrieval technique, which does not require re-training, as a simple but effective solution.\nIn experiments on a newly collected, publicly available, year-long Twitter dataset exhibiting temporal distribution shift, our method improves by 64% over the best static parametric baseline while avoiding costly gradient-based re-training.\nOur approach is also particularly well-suited to dynamically deleted user data in line with data privacy laws, with negligible computational cost/performance loss.", "keywords": "Retrieval Based Models;Ethics;Deletion;Temporal Adaptation;non-parametric", "primary_area": "", "supplementary_material": "", "author": "Niloofar Mireshghallah;Nikolai Vogler;Junxian He;Omar Florez;Ahmed El-Kishky;Taylor Berg-Kirkpatrick", "authorids": "~Niloofar_Mireshghallah1;~Nikolai_Vogler1;~Junxian_He1;~Omar_Florez1;~Ahmed_El-Kishky1;~Taylor_Berg-Kirkpatrick1", "gender": ";M;M;;M;F", "homepage": ";https://jxhe.github.io;https://www.linkedin.com/in/omar-u-florez-35338015/;https://ahelk.github.io;https://cseweb.ucsd.edu/~tberg/;https://homes.cs.washington.edu/~niloofar/", "dblp": "201/7173;188/6127.html;;119/2313;22/8160;241/9430.html", "google_scholar": ";BIFGeoUAAAAJ;;A4frpBcAAAAJ;mN6_BKAAAAAJ;WUCu45YAAAAJ", "or_profile": "~Nikolai_Vogler1;~Junxian_He1;~Omar_Florez1;~Ahmed_El-Kishky1;~Taylor_Berg-Kirkpatrick1;~Fatemehsadat_Mireshghallah1", "aff": "University of California, San Diego;Hong Kong University of Science and Technology;Twitter;Twitter;University of California, San Diego;University of California, San Diego", "aff_domain": "ucsd.edu;ust.hk;twitter.com;twitter.com;ucsd.edu;ucsd.edu", "position": "PhD student;Assistant Professor;Researcher;Researcher;Associate Professor;PhD student", "bibtex": "@inproceedings{\nmireshghallah2023simple,\ntitle={Simple Temporal Adaptation to Changing Label Sets: Hashtag Prediction via Dense {KNN}},\nauthor={Niloofar Mireshghallah and Nikolai Vogler and Junxian He and Omar Florez and Ahmed El-Kishky and Taylor Berg-Kirkpatrick},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Z8p4FX15fa}\n}", "github": "", "project": "", "reviewers": "9dfY;aHrb;sW7e", "site": "https://openreview.net/forum?id=Z8p4FX15fa", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;3", "excitement": "3;3;4", "reproducibility": "3;3;3", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";;omar-u-florez-35338015/;;;", "aff_unique_index": "0;1;2;2;0;0", "aff_unique_norm": "University of California, San Diego;Hong Kong University of Science and Technology;Twitter, Inc.", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucsd.edu;https://www.ust.hk;https://twitter.com", "aff_unique_abbr": "UCSD;HKUST;Twitter", "aff_campus_unique_index": "0;1;0;0", "aff_campus_unique": "San Diego;Hong Kong SAR;", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "United States;China" }, { "id": "ZAHyZ3CBds", "title": "JointMatch: A Unified Approach for Diverse and Collaborative Pseudo-Labeling to Semi-Supervised Text Classification", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Semi-supervised text classification (SSTC) has gained increasing attention due to its ability to leverage unlabeled data. However, existing approaches based on pseudo-labeling suffer from the issues of pseudo-label bias and error accumulation. In this paper, we propose JointMatch, a holistic approach for SSTC that addresses these challenges by unifying ideas from recent semi-supervised learning and the task of learning with noise. JointMatch adaptively adjusts classwise thresholds based on the learning status of different classes to mitigate model bias towards current easy classes. Additionally, JointMatch alleviates error accumulation by utilizing two differently initialized networks to teach each other in a cross-labeling manner. To maintain divergence between the two networks for mutual learning, we introduce a strategy that weighs more disagreement data while also allowing the utilization of high-quality agreement data for training. Experimental results on benchmark datasets demonstrate the superior performance of JointMatch, achieving a significant 5.13\\% improvement on average. Notably, JointMatch delivers impressive results even in the extremely-scarce-label setting, obtaining 86\\% accuracy on AG News with only 5 labels per class. We make our code available at https://github.com/HenryPengZou/JointMatch.", "keywords": "Semi-supervised learning;text classification;pseudo-labeling;adaptive local thresholding;cross-labeling;weighted disagreement and agreement update.", "primary_area": "", "supplementary_material": "", "author": "Henry Peng Zou;Cornelia Caragea", "authorids": "~Henry_Peng_Zou1;~Cornelia_Caragea2", "gender": "Not Specified;", "homepage": "https://github.com/HenryPengZou;https://www.cs.uic.edu/~cornelia/", "dblp": "359/3792;69/6680.html", "google_scholar": "1qN70bIAAAAJ;vkX6VV4AAAAJ", "or_profile": "~Henry_Peng_Zou1;~Cornelia_Caragea2", "aff": "University of Illinois at Chicago;University of Illinois at Chicago", "aff_domain": "uic.edu;uic.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nzou2023jointmatch,\ntitle={JointMatch: A Unified Approach for Diverse and Collaborative Pseudo-Labeling to Semi-Supervised Text Classification},\nauthor={Henry Peng Zou and Cornelia Caragea},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ZAHyZ3CBds}\n}", "github": "", "project": "", "reviewers": "zxwi;Tckf;3ZiN", "site": "https://openreview.net/forum?id=ZAHyZ3CBds", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;3;4", "excitement": "2;4;4", "reproducibility": "4;4;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": "henry-peng-zou/;", "aff_unique_index": "0;0", "aff_unique_norm": "University of Illinois at Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.uic.edu", "aff_unique_abbr": "UIC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Chicago", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "ZB66oX17CQ", "title": "A Fine-Grained Taxonomy of Replies to Hate Speech", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Countering rather than censoring hate speech has emerged as a promising strategy to address hatred. There are many types of counterspeech in user-generated content: addressing the hateful content or its author, generic requests, well-reasoned counter arguments, insults, etc. The effectiveness of counterspeech, which we define as subsequent incivility, depends on these types. In this paper, we present a theoretically grounded taxonomy of replies to hate speech and a new corpus. We work with real, user-generated hate speech and all the replies it elicits rather than replies generated by a third party. Our analyses provide insights into the content real users reply with as well as which replies are empirically most effective. We also experiment with models to characterize the replies to hate speech, thereby opening the door to estimating whether a reply to hate speech will result in further incivility.", "keywords": "counterspeech;hate speech;taxonomy", "primary_area": "", "supplementary_material": "", "author": "Xinchen Yu;Ashley Zhao;Eduardo Blanco;Lingzi Hong", "authorids": "~Xinchen_Yu1;~Ashley_Zhao1;~Eduardo_Blanco1;~Lingzi_Hong1", "gender": "F;F;M;F", "homepage": "https://sites.google.com/view/xinchen-yu/;https://www.facebook.com/profile.php?id=100081762356783&mibextid=LQQJ4d;https://eduardoblanco.github.io/;", "dblp": "275/9495;;32/369-2;144/3339", "google_scholar": "vJFepfoAAAAJ;;AqGa3-MAAAAJ;H9ymNRQAAAAJ", "or_profile": "~Xinchen_Yu1;~Ashley_Zhao1;~Eduardo_Blanco1;~Lingzi_Hong1", "aff": "University of North Texas;University of North Texas;University of Arizona;University of North Texas", "aff_domain": "unt.edu;unt.edu;arizona.edu;unt.edu", "position": "PhD student;Undergrad student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nyu2023a,\ntitle={A Fine-Grained Taxonomy of Replies to Hate Speech},\nauthor={Xinchen Yu and Ashley Zhao and Eduardo Blanco and Lingzi Hong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ZB66oX17CQ}\n}", "github": "", "project": "", "reviewers": "hSjz;LEi4;v5qd", "site": "https://openreview.net/forum?id=ZB66oX17CQ", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;4", "excitement": "3;4;3", "reproducibility": "4;4;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-8608-8653;;;0000-0001-8412-8180", "linkedin": ";;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of North Texas;University of Arizona", "aff_unique_dep": ";", "aff_unique_url": "https://www.unt.edu;https://www.arizona.edu", "aff_unique_abbr": "UNT;UA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "ZBi4ijmOzs", "title": "End-to-End Single-Channel Speaker-Turn Aware Conversational Speech Translation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Conventional speech-to-text translation (ST) systems are trained on single-speaker utterances, and they may not generalize to real-life scenarios where the audio contains conversations by multiple speakers. In this paper, we tackle single-channel multi-speaker conversational ST with an end-to-end and multi-task training model, named Speaker-Turn Aware Conversational Speech Translation, that combines automatic speech recognition, speech translation and speaker turn detection using special tokens in a serialized labeling format. We run experiments on the Fisher-CALLHOME corpus, which we adapted by merging the two single-speaker channels into one multi-speaker channel, thus representing the more realistic and challenging scenario with multi-speaker turns and cross-talk. Experimental results across single- and multi-speaker conditions and against conventional ST systems, show that our model outperforms the reference systems on the multi-speaker condition, while attaining comparable performance on the single-speaker condition. We release scripts for data processing and model training.", "keywords": "speech translation;conversation;speaker-turn detection", "primary_area": "", "supplementary_material": "", "author": "Juan Pablo Zuluaga Gomez;Zhaocheng Huang;Xing Niu;Rohit Paturi;Sundararajan Srinivasan;Prashant Mathur;Brian Thompson;Marcello Federico", "authorids": "~Juan_Pablo_Zuluaga_Gomez1;~Zhaocheng_Huang1;~Xing_Niu1;~Rohit_Paturi1;~Sundararajan_Srinivasan1;~Prashant_Mathur1;~Brian_Thompson3;~Marcello_Federico1", "gender": "M;M;;M;M;M;;M", "homepage": "https://juanpzuluaga.github.io/;https://sites.google.com/site/zhaochenghuang7;http://xingniu.org/;;;;;http://www.marcellofederico.net", "dblp": "251/8496;170/5320.html;87/9555;173/6480;94/8761.html;74/11032;;f/MarcelloFederico", "google_scholar": "https://scholar.google.ru/citations?user=_9_Ja2MAAAAJ;b7EJv_0AAAAJ;45heFpgAAAAJ;L4SicBgAAAAJ;IQM_q_QAAAAJ;QgWJzakAAAAJ;;WaGw_qYAAAAJ", "or_profile": "~Juan_Pablo_Zuluaga_Gomez1;~Zhaocheng_Huang1;~Xing_Niu1;~Rohit_Paturi1;~Sundararajan_Srinivasan1;~Prashant_Mathur1;~Brian_Thompson3;~Marcello_Federico1", "aff": "Idiap Research Institute;Amazon;Amazon;Amazon / AWS AI Labs;Amazon;Amazon;;Amazon", "aff_domain": "idiap.ch;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;;amazon.com", "position": "PhD student;Applied Scientist;Applied Scientist;Senior Applied Scientist;Principal Researcher;Senior Applied Scientist;;Principal Researcher", "bibtex": "@inproceedings{\ngomez2023endtoend,\ntitle={End-to-End Single-Channel Speaker-Turn Aware Conversational Speech Translation},\nauthor={Juan Pablo Zuluaga Gomez and Zhaocheng Huang and Xing Niu and Rohit Paturi and Sundararajan Srinivasan and Prashant Mathur and Brian Thompson and Marcello Federico},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ZBi4ijmOzs}\n}", "github": "", "project": "", "reviewers": "4M4i;JKE7;3SN2;WTTK;RZ8H;BkF5", "site": "https://openreview.net/forum?id=ZBi4ijmOzs", "pdf_size": 0, "rating": "5;5;5;5;5;5", "confidence": "3;4;5;3;4;4", "excitement": "4;3;4;3;4;3", "reproducibility": "3;4;4;4;4;3", "correctness": "4;4;3;3;4;3", "rating_avg": 5.0, "confidence_avg": 3.8333333333333335, "excitement_avg": 3.5, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.5, "replies_avg": 19, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-6947-2706;0000-0002-7915-6790;;;;;;0000-0001-8095-0038", "linkedin": "juan-pablo-zuluaga-gomez-co/;zhaocheng-david-huang-74593892/;;rohit-paturi-02147526/;sundararajan-srinivasan-28024225;;;marcello-federico-82a7516", "aff_unique_index": "0;1;1;1;1;1;1", "aff_unique_norm": "Idiap Research Institute;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.idiap.ch;https://www.amazon.com", "aff_unique_abbr": "Idiap;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1;1", "aff_country_unique": "Switzerland;United States" }, { "id": "ZE6fN4OO18", "title": "Leveraging Multiple Teachers for Test-Time Adaptation of Language-Guided Classifiers", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recent approaches have explored language- guided classifiers capable of classifying examples from novel tasks when provided with task-specific natural language explanations, instructions or prompts (Sanh et al., 2022; R. Menon et al., 2022). While these classifiers can generalize in zero-shot settings, their task performance often varies substantially between different language explanations in unpredictable ways (Lu et al., 2022; Gonen et al., 2022). Also, current approaches fail to leverage unlabeled examples that may be available in many scenarios. Here, we introduce TALC, a framework that uses data programming to adapt a language-guided classifier for a new task during inference when provided with explanations from multiple teachers and unlabeled test examples. Our results show that TALC consistently outperforms a competitive baseline from prior work by an impressive 9.3% (relative improvement). Further, we demonstrate the robustness of TALC to variations in the quality and quantity of provided explanations, highlighting its potential in scenarios where learning from multiple teachers or a crowd is involved. Our code is available at: https://github.com/WeiKangda/TALC.git.", "keywords": "test-time adaptation;learning from language;data programming", "primary_area": "", "supplementary_material": "", "author": "Kangda Wei;Sayan Ghosh;Rakesh R Menon;Shashank Srivastava", "authorids": "~Kangda_Wei1;~Sayan_Ghosh2;~Rakesh_R_Menon3;~Shashank_Srivastava1", "gender": "M;M;M;M", "homepage": "https://weikangda.github.io/kangda.github.io/;https://sgdgp.github.io/;https://www.ssriva.com/;https://cs.unc.edu/~rrmenon", "dblp": "276/1064.html;http://dblp.uni-trier.de/pers/hd/g/Ghosh_0002:Sayan;;206/6504.html", "google_scholar": "hQ1bio8AAAAJ;https://scholar.google.com/citations?hl=en;-vKI5s0AAAAJ;GyFb98kAAAAJ", "or_profile": "~Kangda_Wei1;~Sayan_Ghosh2;~Shashank_Srivastava1;~Rakesh_R_Menon2", "aff": "Department of Computer Science, University of North Carolina at Chapel Hill;Department of Computer Science, University of North Carolina, Chapel Hill;University of North Carolina at Chapel Hill;Department of Computer Science, University of North Carolina, Chapel Hill", "aff_domain": "cs.unc.edu;cs.unc.edu;unc.edu;cs.unc.edu", "position": "MS student;PhD student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nwei2023leveraging,\ntitle={Leveraging Multiple Teachers for Test-Time Adaptation of Language-Guided Classifiers},\nauthor={Kangda Wei and Sayan Ghosh and Rakesh R Menon and Shashank Srivastava},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ZE6fN4OO18}\n}", "github": "", "project": "", "reviewers": "382n;7rv9;U5MY", "site": "https://openreview.net/forum?id=ZE6fN4OO18", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;2;2", "excitement": "3;3;4", "reproducibility": "3;3;4", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 2.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "kangdawei-40ab541b4/;;;", "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University of North Carolina at Chapel Hill;University of North Carolina", "aff_unique_dep": "Department of Computer Science;Department of Computer Science", "aff_unique_url": "https://www.unc.edu;https://www.unc.edu", "aff_unique_abbr": "UNC Chapel Hill;UNC", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Chapel Hill", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "ZF8Ye9xWZc", "title": "RoMQA: A Benchmark for Robust, Multi-evidence, Multi-answer Question Answering", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We introduce RoMQA, the first benchmark for robust, multi-evidence, multi-answer question answering (QA). RoMQA contains clusters of questions that are derived from related constraints mined from the Wikidata knowledge graph. RoMQA evaluates robustness of QA models to varying constraints by measuring worst-case performance within each question cluster. Compared to prior QA datasets, RoMQA has more human-written questions that require reasoning over more evidence text and have, on average, many more correct answers. In addition, human annotators rate RoMQA questions as more natural or likely to be asked by people. We evaluate state-of-the-art large language models in zero-shot, few-shot, and fine-tuning settings, and find that RoMQA is challenging: zeroshot and few-shot models perform similarly to naive baselines, while supervised retrieval methods perform well below gold evidence upper bounds. Moreover, existing models are not robust to variations in question constraints, but can be made more robust by tuning on clusters of related questions. Our results show that RoMQA is a challenging benchmark for large language models, and provides a quantifiable test to build more robust QA methods.", "keywords": "Question answering", "primary_area": "", "supplementary_material": "", "author": "Victor Zhong;Weijia Shi;Wen-tau Yih;Luke Zettlemoyer", "authorids": "~Victor_Zhong1;~Weijia_Shi1;~Wen-tau_Yih1;~Luke_Zettlemoyer1", "gender": "M;;M;M", "homepage": "http://www.victorzhong.com;https://weijiashi.notion.site/;http://scottyih.org;https://www.cs.washington.edu/people/faculty/lsz/", "dblp": "182/8931;132/80601;07/7129;21/6793", "google_scholar": "lT3YoNkAAAAJ;https://scholar.google.com/citations?hl=en;8rDNIMsAAAAJ;https://scholar.google.com.tw/citations?user=UjpbO6IAAAAJ", "or_profile": "~Victor_Zhong1;~Weijia_Shi1;~Wen-tau_Yih1;~Luke_Zettlemoyer1", "aff": "University of Washington;University of Washington, Seattle;Meta Platforms, Inc.;Meta", "aff_domain": "washington.edu;uw.edu;meta.com;meta.com", "position": "PhD student;PhD student;Research Scientist;Researcher", "bibtex": "@inproceedings{\nzhong2023romqa,\ntitle={Ro{MQA}: A Benchmark for Robust, Multi-evidence, Multi-answer Question Answering},\nauthor={Victor Zhong and Weijia Shi and Wen-tau Yih and Luke Zettlemoyer},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ZF8Ye9xWZc}\n}", "github": "", "project": "", "reviewers": "zaFs;EMCn;Tpkz", "site": "https://openreview.net/forum?id=ZF8Ye9xWZc", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;1;3", "excitement": "3;3;3", "reproducibility": "5;3;3", "correctness": "4;2;3", "rating_avg": 3.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-3200-0000-0011;0000-0003-4263-395X;", "linkedin": "victorzhong;weijia-shi-773768112;scottyih/;luke-zettlemoyer-a0109b226/", "aff_unique_index": "0;0;1;1", "aff_unique_norm": "University of Washington;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.washington.edu;https://www.meta.com", "aff_unique_abbr": "UW;Meta", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "ZJrEDp19kC", "title": "Visually Grounded Continual Language Learning with Selective Specialization", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "A desirable trait of an artificial agent acting in the visual world is to continually learn a sequence of language-informed tasks while striking a balance between sufficiently specializing in each task and building a generalized knowledge for transfer. Selective specialization, i.e., a careful selection of model components to specialize in each task, is a strategy to provide control over this trade-off. However, the design of selection strategies requires insights on the role of each model component in learning rather specialized or generalizable representations, which poses a gap in current research. Thus, our aim with this work is to provide an extensive analysis of selection strategies for visually grounded continual language learning. Due to the lack of suitable benchmarks for this purpose, we introduce two novel diagnostic datasets that provide enough control and flexibility for a thorough model analysis. We assess various heuristics for module specialization strategies as well as quantifiable measures for two different types of model architectures. Finally, we design conceptually simple approaches based on our analysis that outperform common continual learning baselines. Our results demonstrate the need for further efforts towards better aligning continual learning algorithms with the learning behaviors of individual model parts.", "keywords": "continual learning;lifelong learning;vision-language;language grounding", "primary_area": "", "supplementary_material": "", "author": "Kyra Ahrens;Lennart Bengtson;Jae Hee Lee;Stefan Wermter", "authorids": "~Kyra_Ahrens1;~Lennart_Bengtson1;~Jae_Hee_Lee3;~Stefan_Wermter1", "gender": ";;;M", "homepage": ";;;https://www.inf.uni-hamburg.de/en/inst/ab/wtm/people/wermter.html", "dblp": ";;;03/3914", "google_scholar": ";;;uIeaxuAAAAAJ", "or_profile": "~Kyra_Ahrens1;~Lennart_Bengtson1;~Jae_Hee_Lee3;~Stefan_Wermter1", "aff": ";;;University of Hamburg", "aff_domain": ";;;uni-hamburg.de", "position": ";;;Full Professor", "bibtex": "@inproceedings{\nahrens2023visually,\ntitle={Visually Grounded Continual Language Learning with Selective Specialization},\nauthor={Kyra Ahrens and Lennart Bengtson and Jae Hee Lee and Stefan Wermter},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ZJrEDp19kC}\n}", "github": "", "project": "", "reviewers": "LwBz;FZU5;F5yK;Lw5h", "site": "https://openreview.net/forum?id=ZJrEDp19kC", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;4;3;4", "excitement": "2;3;4;3", "reproducibility": "3;5;4;3", "correctness": "3;3;4;3", "rating_avg": 3.0, "confidence_avg": 3.5, "excitement_avg": 3.0, "reproducibility_avg": 3.75, "correctness_avg": 3.25, "replies_avg": 13, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-1343-4775", "linkedin": ";;;stefan-wermter/", "aff_unique_index": "0", "aff_unique_norm": "University of Hamburg", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-hamburg.de", "aff_unique_abbr": "UHH", "aff_country_unique_index": "0", "aff_country_unique": "Germany" }, { "id": "ZJua8VeHCh", "title": "OssCSE: Overcoming Surface Structure Bias in Contrastive Learning for Unsupervised Sentence Embedding", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Contrastive learning has been demonstrated effective in unsupervised sentence representation learning. Given one sentence, positive pairs are obtained by passing the sentence to the encoder twice using the different dropout masks, and negative pairs are obtained by taking another sentence in the same mini-batch. However, the method suffers from the surface structure bias, i.e., sentences with similar surface structures will be regarded as close in semantics while sentences with dissimilar surface structures will be viewed as distinct in semantics. This leads to the result that paraphrasing a sentence that is dissimilar in surface structure will receive a lower semantic similarity score than inserting a negative word into the sentence. In this paper, we first verify the bias by collecting a sentence transformation testset. Then we systematically probe the existing models by proposing novel splits based on benchmark datasets in accordance with semantic and surface structure similarity. We tackle the bias in two aspects: balancing the learning target by augmenting with data that counters the bias, and meanwhile preserving word semantics by leveraging recall loss to prevent catastrophic forgetting. We evaluate our model on standard semantic textual similarity (STS) tasks using different pre-trained backbones and achieve state-of-the-art averaged performance across the STS benchmarks. Particularly, our models that are fine-tuned with $RoBERTa_{base}$ and $RoBERTa_{large}$ achieve significantly better performance on most benchmark datasets.", "keywords": "Unsupervised Sentence Embedding;Contrastive Learning", "primary_area": "", "supplementary_material": "", "author": "Zhan Shi;Guoyin Wang;Ke Bai;Jiwei Li;Xiang Li;qingjun cui;Belinda Zeng;Trishul Chilimbi;Xiaodan Zhu", "authorids": "~Zhan_Shi4;~Guoyin_Wang1;~Ke_Bai1;~Jiwei_Li1;~Xiang_Li43;~qingjun_cui1;~Belinda_Zeng1;~Trishul_Chilimbi1;~Xiaodan_Zhu1", "gender": "M;M;F;M;M;M;Not Specified;;M", "homepage": ";;;https://nlp.stanford.edu/~bdlijiwei/;;https://www.linkedin.com/in/qingjun-cui-70212762/;;;http://www.xiaodanzhu.com", "dblp": ";05/3838-2;33/8570-1;73/5746-1;;;;265/6085.html;93/310.html", "google_scholar": "yROrWMkAAAAJ;https://scholar.google.com/citations?hl=en;;PwU16JEAAAAJ;CwKgYWsAAAAJ;wvaWrz8AAAAJ;;DrNeo_0AAAAJ;https://scholar.google.ca/citations?user=a6MYnuUAAAAJ", "or_profile": "~Zhan_Shi4;~Guoyin_Wang1;~Ke_Bai1;~Jiwei_Li1;~Xiang_Li43;~qingjun_cui1;~Belinda_Zeng1;~Trishul_Chilimbi1;~Xiaodan_Zhu1", "aff": "ByteDance Inc.;Amazon;Duke University;Zhejiang University;Amazon;Amazon;Amazon;Amazon;Queen's University", "aff_domain": "bytedance.com;amazon.com;duke.edu;zju.edu.cn;amazon.com;amazon.com;amazon.com;amazon.com;queensu.ca", "position": "Researcher;Researcher;PhD student;Assistant Professor;Researcher;Researcher;Researcher;Researcher;Associate Professor", "bibtex": "@inproceedings{\nshi2023osscse,\ntitle={Oss{CSE}: Overcoming Surface Structure Bias in Contrastive Learning for Unsupervised Sentence Embedding},\nauthor={Zhan Shi and Guoyin Wang and Ke Bai and Jiwei Li and Xiang Li and qingjun cui and Belinda Zeng and Trishul Chilimbi and Xiaodan Zhu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ZJua8VeHCh}\n}", "github": "", "project": "", "reviewers": "w2Dv;u5Vf;ApYm;nVw5", "site": "https://openreview.net/forum?id=ZJua8VeHCh", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "5;4;4;3", "excitement": "4;3;4;4", "reproducibility": "3;4;4;3", "correctness": "4;3;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.75, "reproducibility_avg": 3.5, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;;0000-0003-3856-3696", "linkedin": ";;;;xiangli729/;qingjun-cui-70212762/;belindazeng/;;xiaodan-zhu-066833101/?originalSubdomain=ca", "aff_unique_index": "0;1;2;3;1;1;1;1;4", "aff_unique_norm": "ByteDance;Amazon;Duke University;Zhejiang University;Queen's University", "aff_unique_dep": ";Amazon.com, Inc.;;;", "aff_unique_url": "https://www.bytedance.com;https://www.amazon.com;https://www.duke.edu;https://www.zju.edu.cn;https://www.queensu.ca", "aff_unique_abbr": "ByteDance;Amazon;Duke;ZJU;Queen's", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1;1;1;1;2", "aff_country_unique": "China;United States;Canada" }, { "id": "ZNQh02cCxt", "title": "Enhancing Abstractiveness of Summarization Models through Calibrated Distillation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In this paper, we propose a novel approach named DisCal to enhance the level of abstractiveness (measured by n-gram overlap) without sacrificing the informativeness (measured by ROUGE) of generated summaries. DisCal exposes diverse pseudo summaries with two supervision to the student model. Firstly, the best pseudo summary is identified in terms of abstractiveness and informativeness and used for sequence-level distillation. Secondly, their ranks are used to ensure the student model to assign higher prediction scores to summaries with higher ranks. Our experiments show that DisCal outperforms prior methods in abstractive summarization distillation, producing highly abstractive and informative summaries.", "keywords": "abstractive summarization;knowledge distillation", "primary_area": "", "supplementary_material": "", "author": "Hwanjun Song;Igor Shalyminov;Hang Su;Siffi Singh;Kaisheng Yao;Saab Mansour", "authorids": "~Hwanjun_Song2;~Igor_Shalyminov1;~Hang_Su7;~Siffi_Singh1;~Kaisheng_Yao2;~Saab_Mansour1", "gender": "M;M;M;F;;Not Specified", "homepage": "https://songhwanjun.github.io/;https://shalyminov.com;;https://scholar.google.com/citations?user=zaXmGr8AAAAJ&hl=en;;", "dblp": "204/3381;205/8962;;;83/766;03/8053", "google_scholar": "Ijzuc-8AAAAJ;TVs0lP8AAAAJ;UxOvKVUAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.de/citations?user=1tCbwIQAAAAJ", "or_profile": "~Hwanjun_Song2;~Igor_Shalyminov1;~Hang_Su7;~Siffi_Singh1;~Kaisheng_Yao2;~Saab_Mansour1", "aff": "Amazon Web Services;Amazon;Amazon;;Amazon;Amazon", "aff_domain": "amazon.com;amazon.com;amazon.com;;amazon.com;amazon.com", "position": "Research Scientist;Researcher;Researcher;;Principal Applied Scientist;Amazon", "bibtex": "@inproceedings{\nsong2023enhancing,\ntitle={Enhancing Abstractiveness of Summarization Models through Calibrated Distillation},\nauthor={Hwanjun Song and Igor Shalyminov and Hang Su and Siffi Singh and Kaisheng Yao and Saab Mansour},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ZNQh02cCxt}\n}", "github": "", "project": "", "reviewers": "U7yc;XooF;1fso", "site": "https://openreview.net/forum?id=ZNQh02cCxt", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;3;3", "reproducibility": "4;4;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-1105-0818;0000-0001-9664-1774;;;;", "linkedin": ";ishalyminov/;;;kaishengyao/;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Amazon", "aff_unique_dep": "Amazon Web Services", "aff_unique_url": "https://aws.amazon.com", "aff_unique_abbr": "AWS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "ZQV5iRPAua", "title": "Evaluating Verifiability in Generative Search Engines", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Generative search engines directly generate responses to user queries, along with in-line citations. A prerequisite trait of a trustworthy generative search engine is verifiability, i.e., systems should cite comprehensively (high citation recall; all statements are fully supported by citations) and accurately (high citation precision; every cite supports its associated statement). We conduct human evaluation to audit four popular generative search engines\u2014Bing Chat, NeevaAI, perplexity.ai, and YouChat\u2014across a diverse set of queries from a variety of sources (e.g., historical Google user queries, dynamically-collected open-ended questions on Reddit, etc.). We find that responses from existing generative search engines are fluent and appear informative, but frequently contain unsupported statements and inaccurate citations: on average, a mere 51.5% of generated sentences are fully supported by citations and only 74.5% of citations support their associated sentence. We believe that these results are concerningly low for systems that may serve as a primary tool for information-seeking users, especially given their facade of trustworthiness. We hope that our results further motivate the development of trustworthy generative search engines and help researchers and users better understand the shortcomings of existing commercial systems.", "keywords": "generative;search;engines;verifiability", "primary_area": "", "supplementary_material": "", "author": "Nelson F. Liu;Tianyi Zhang;Percy Liang", "authorids": "~Nelson_F._Liu1;~Tianyi_Zhang2;~Percy_Liang1", "gender": "M;M;", "homepage": "http://nelsonliu.me;;https://cs.stanford.edu/~pliang/", "dblp": "203/9152;17/322;04/1701", "google_scholar": "ghGDz7MAAAAJ;https://scholar.google.com/citations?hl=en;pouyVyUAAAAJ", "or_profile": "~Nelson_F._Liu1;~Tianyi_Zhang2;~Percy_Liang1", "aff": "Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nliu2023evaluating,\ntitle={Evaluating Verifiability in Generative Search Engines},\nauthor={Nelson F. Liu and Tianyi Zhang and Percy Liang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ZQV5iRPAua}\n}", "github": "", "project": "", "reviewers": "hMXQ;XbQo;qPBo", "site": "https://openreview.net/forum?id=ZQV5iRPAua", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "4;3;2", "reproducibility": "5;4;2", "correctness": "4;4;2", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "ZQrRDCxfhW", "title": "Task-Attentive Transformer Architecture for Continual Learning of Vision-and-Language Tasks Using Knowledge Distillation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The size and the computational load of fine-tuning large-scale pre-trained neural network are becoming two major obstacles in adopting machine learning in many applications. Continual learning (CL) can serve as a remedy through enabling knowledge-transfer across sequentially arriving tasks which relaxes the need to fine-tune all network weights from scratch. However, existing CL algorithms primarily consider learning unimodal vision-only or language-only tasks. We develop a transformer-based CL architecture for learning bimodal vision-and-language tasks based on increasing the number of the learnable parameters dynamically and using knowledge distillation. The new additional parameters are used to specialize the network for each task. Our approach enables sharing information between the tasks while addressing the challenge of catastrophic forgetting. Our approach is scalable learning to a large number of tasks because it requires little memory and time overhead. Our model reaches state-of-the-art performance on challenging vision-and-language tasks.", "keywords": "Multimodal Learning;Continual Learning;Catastrophic forgetting", "primary_area": "", "supplementary_material": "", "author": "Yuliang Cai;Jesse Thomason;Mohammad Rostami", "authorids": "~Yuliang_Cai1;~Jesse_Thomason1;~Mohammad_Rostami1", "gender": "M;M;M", "homepage": ";https://jessethomason.com/;https://viterbi.usc.edu/directory/faculty/Rostami/Mohammad", "dblp": ";130/2863;83/9890", "google_scholar": "4wBqvhkAAAAJ;8BeTDr0AAAAJ;Uzx8nLoAAAAJ", "or_profile": "~Yuliang_Cai1;~Jesse_Thomason1;~Mohammad_Rostami1", "aff": "University of Southern California;Amazon;USC/ISI", "aff_domain": "usc.edu;amazon.com;isi.edu", "position": "PhD student;Visiting Academic;Research Scientist", "bibtex": "@inproceedings{\ncai2023taskattentive,\ntitle={Task-Attentive Transformer Architecture for Continual Learning of Vision-and-Language Tasks Using Knowledge Distillation},\nauthor={Yuliang Cai and Jesse Thomason and Mohammad Rostami},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ZQrRDCxfhW}\n}", "github": "", "project": "", "reviewers": "94xT;inPu;c6E7", "site": "https://openreview.net/forum?id=ZQrRDCxfhW", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "3;2;3", "reproducibility": "4;4;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-9199-0633;", "linkedin": ";jesse-thomason-034746171/;", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Southern California;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.usc.edu;https://www.amazon.com", "aff_unique_abbr": "USC;Amazon", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Los Angeles;;ISI", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "ZSHcpMXWxX", "title": "DUMB: A Dutch Model Benchmark", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We introduce the Dutch Model Benchmark: DUMB. The benchmark includes a diverse set of datasets for low-, medium- and high-resource tasks. The total set of nine tasks includes four tasks that were previously not available in Dutch. Instead of relying on a mean score across tasks, we propose Relative Error Reduction (RER), which compares the DUMB performance of language models to a strong baseline which can be referred to in the future even when assessing different sets of language models. Through a comparison of 14 pre-trained\nlanguage models (mono- and multi-lingual, of varying sizes), we assess the internal consistency of the benchmark tasks, as well as the factors that likely enable high performance. Our results indicate that current Dutch monolingual models under-perform and suggest training larger Dutch models with other architectures and pre-training objectives. At present, the highest performance is achieved by DeBERTaV3 (large), XLM-R (large) and mDeBERTaV3 (base). In addition to highlighting best strategies for training larger Dutch models, DUMB\nwill foster further research on Dutch. A public leaderboard is available at https://dumbench.nl.", "keywords": "Dutch;Benchmark;GLUE;Evaluation;Language Models", "primary_area": "", "supplementary_material": "", "author": "Wietse de Vries;Martijn Wieling;Malvina Nissim", "authorids": "~Wietse_de_Vries2;~Martijn_Wieling1;~Malvina_Nissim1", "gender": "M;F;M", "homepage": "https://www.martijnwieling.nl;https://malvinanissim.github.io;", "dblp": "35/2985;91/2392;245/4242", "google_scholar": "Fzv0QJAAAAAJ;hnTpEOAAAAAJ;https://scholar.google.nl/citations?user=gZkWURYAAAAJ", "or_profile": "~Martijn_Wieling1;~Malvina_Nissim1;~Wietse_De_Vries1", "aff": "University of Groningen;University of Groningen;University of Groningen", "aff_domain": "rug.nl;rug.nl;rug.nl", "position": "Associate Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nvries2023dumb,\ntitle={{DUMB}: A Dutch Model Benchmark},\nauthor={Wietse de Vries and Martijn Wieling and Malvina Nissim},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ZSHcpMXWxX}\n}", "github": "", "project": "", "reviewers": "i3Ck;2NuP;ourb", "site": "https://openreview.net/forum?id=ZSHcpMXWxX", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "4;4;4", "reproducibility": "4;5;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-0434-1526;;", "linkedin": "wieling/;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Groningen", "aff_unique_dep": "", "aff_unique_url": "https://www.rug.nl", "aff_unique_abbr": "RUG", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Netherlands" }, { "id": "ZT3yJWAsrq", "title": "'Person' == Light-skinned, Western Man, and Sexualization of Women of Color: Stereotypes in Stable Diffusion", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We study stereotypes embedded within one of the most popular text-to-image generators: Stable Diffusion. We answer the question: what stereotypes of gender and nationality/continental identity does Stable Diffusion display in the absence of such information i.e. what gender and nationality/continental identity is assigned to 'a person,' or to 'a person from Asia.' Using CLIP-cosine similarity for zero-shot classification of images generated by CLIP-based Stable Diffusion v2.1 verified by manual examination, we chronicle results from 136 prompts (50 results/prompt) of front-facing images of faces from 6 different continents, 27 countries and 3 genders. We observe how Stable Diffusion results of `a person' without any additional gender/nationality information correspond closest to images of men (avg. similarity 0.64) and least with persons of nonbinary gender (avg. similarity 0.41), and to persons from Europe/North America (avg. similarities 0.71 and 0.68, respectively) over Africa/Asia (avg. similarities 0.43 and 0.41, respectively), pointing towards Stable Diffusion having a concerning representation of personhood to be a European/North American man. We also show continental stereotypes and resultant harms e.g. a person from Oceania is deemed to be Australian/New Zealander (avg. similarities 0.77 and 0.74, respectively) over Papua New Guinean (avg. similarity 0.31), pointing to the erasure of Indigenous Oceanic peoples, who form a majority over descendants of colonizers both in Papua New Guinea and in Oceania overall. Finally, we unexpectedly observe a pattern of sexualization of women, specifically Latin American, Mexican, Indian and Egyptian women, confirmed through an NSFW detector and verified by manual examination. This demonstrates how Stable Diffusion perpetuates Western fetishization of women of color through objectification in media, which if left unchecked will worsen this stereotypical representation. All code and relevant data will be made publicly available.", "keywords": "Stable Diffusion;text-to-image generation;representation;bias;stereotypes;sexualization;national identity;nonbinary gender", "primary_area": "", "supplementary_material": "", "author": "Sourojit Ghosh;Aylin Caliskan", "authorids": "~Sourojit_Ghosh1;~Aylin_Caliskan1", "gender": "M;Unspecified", "homepage": "https://sourojitghosh.github.io/;https://faculty.washington.edu/aylin/", "dblp": ";116/4680", "google_scholar": "fNxrh48AAAAJ;zxzZAi0AAAAJ", "or_profile": "~Sourojit_Ghosh1;~Aylin_Caliskan1", "aff": "University of Washington;University of Washington", "aff_domain": "uw.edu;uw.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nghosh2023person,\ntitle={'Person' == Light-skinned, Western Man, and Sexualization of Women of Color: Stereotypes in Stable Diffusion},\nauthor={Sourojit Ghosh and Aylin Caliskan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ZT3yJWAsrq}\n}", "github": "", "project": "", "reviewers": "4Bgr;gPrz;m6ZP", "site": "https://openreview.net/forum?id=ZT3yJWAsrq", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "4;3;3", "reproducibility": "3;4;3", "correctness": "4;3;2", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "University of Washington", "aff_unique_dep": "", "aff_unique_url": "https://www.washington.edu", "aff_unique_abbr": "UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "ZTM90jlGAm", "title": "Sentiment Analysis on Streaming User Reviews via Dual-Channel Dynamic Graph Neural Network", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Sentiment analysis on user reviews has achieved great success thanks to the rapid growth of deep learning techniques. The large number of online streaming reviews also provides the opportunity to model temporal dynamics for users and products on the timeline. However, existing methods model users and products in the real world based on a static assumption and neglect their time-varying characteristics. In this paper, we present DC-DGNN, a dual-channel framework based on a dynamic graph neural network (DGNN) that models temporal user and product dynamics for sentiment analysis. Specifically, a dual-channel text encoder is employed to extract current local and global contexts from review documents for users and products. Moreover, user review streams are integrated into the dynamic graph neural network by treating users and products as nodes and reviews as new edges. Node representations are dynamically updated along with the evolution of the dynamic graph and used for the final score prediction. Experimental results on five real-world datasets demonstrate the superiority of the proposed method.", "keywords": "Sentiment Analysis; Streaming User Reviews; Dynamic Graph Neural Network; Online Review Websites", "primary_area": "", "supplementary_material": "", "author": "Xin Zhang;Linhai Zhang;Deyu Zhou", "authorids": "~Xin_Zhang39;~Linhai_Zhang1;~Deyu_Zhou1", "gender": "F;M;M", "homepage": "https://www.nactem.ac.uk/people.php;;http://palm.seu.edu.cn/zhoudeyu/Home.html", "dblp": ";256/3827.html;79/2854", "google_scholar": "https://scholar.google.co.uk/citations?hl=zh-CN;ZPHhl0AAAAAJ;DvVelLcAAAAJ", "or_profile": "~Xin_Zhang39;~Linhai_Zhang1;~Deyu_Zhou1", "aff": "Southeast University;Southeast University;Southeast University", "aff_domain": "seu.edu.cn;seu.edu.cn;seu.edu.cn", "position": "MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nzhang2023sentiment,\ntitle={Sentiment Analysis on Streaming User Reviews via Dual-Channel Dynamic Graph Neural Network},\nauthor={Xin Zhang and Linhai Zhang and Deyu Zhou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ZTM90jlGAm}\n}", "github": "", "project": "", "reviewers": "axgv;tvcR;XFAW", "site": "https://openreview.net/forum?id=ZTM90jlGAm", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "3;2;4", "reproducibility": "3;3;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-0237-9539;;", "linkedin": ";linhai-zhang-5ab370330/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Southeast University", "aff_unique_dep": "", "aff_unique_url": "https://www.seu.edu.cn/", "aff_unique_abbr": "SEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "ZVy8L79f5f", "title": "Linking Surface Facts to Large-Scale Knowledge Graphs", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Open Information Extraction (OIE) methods extract facts from natural language text in the form of (\"subject\"; \"relation\"; \"object\") triples. These facts are, however, merely surface forms, the ambiguity of which impedes their downstream usage; e.g., the surface phrase \"Michael Jordan\" may refer to either the former basketball player or the university professor. Knowledge Graphs (KGs), on the other hand, contain facts in a canonical (i.e., unambiguous) form, but their coverage is limited by a static schema (i.e., a fixed set of entities and predicates). To bridge this gap, we need the best of both worlds: (i) high coverage of free-text OIEs, and (ii) semantic precision (i.e., monosemy) of KGs. In order to achieve this goal, we propose a new benchmark with novel evaluation protocols that can, for example, measure fact linking performance on a granular triple slot level, while also measuring if a system has the ability to recognize that a surface form has no match in the existing KG. Our extensive evaluation of several baselines show that detection of out-of-KG entities and predicates is more difficult than accurate linking to existing ones, thus calling for more research efforts on this difficult task. We publicly release all resources (data, benchmark and code) on https://github.com/nec-research/fact-linking.", "keywords": "information extraction;fact linking;knowledge graphs;open information extraction", "primary_area": "", "supplementary_material": "", "author": "Gorjan Radevski;Kiril Gashteovski;Chia-Chien Hung;Carolin Lawrence;Goran Glava\u0161", "authorids": "~Gorjan_Radevski1;~Kiril_Gashteovski1;~Chia-Chien_Hung1;~Carolin_Lawrence1;~Goran_Glava\u01611", "gender": "M;M;;;M", "homepage": "https://gorjanradevski.github.io/personal/about/;https://dws.informatik.uni-mannheim.de/en/people/researchers/kiril-gashteovski/;;https://carolinlawrence.github.io/;https://sites.google.com/view/goranglavas", "dblp": "278/2232;205/9043;;191/6056;50/11059", "google_scholar": "GKJnjkgAAAAJ;ZO5DW7MAAAAJ;;9xtF8-MAAAAJ;Ym0myOwAAAAJ", "or_profile": "~Gorjan_Radevski1;~Kiril_Gashteovski1;~Chia-Chien_Hung1;~Carolin_Lawrence1;~Goran_Glava\u01611", "aff": "Department of Electrical Engineering, KU Leuven, Belgium, KU Leuven;NEC Laboratories Europe;;NEC Laboratories Europe;Julius-Maximilians-Universit\u00e4t W\u00fcrzburg", "aff_domain": "esat.kuleuven.be;neclab.eu;;neclab.eu;uni-wuerzburg.de", "position": "PhD student;Researcher;;Researcher;Full Professor", "bibtex": "@inproceedings{\nradevski2023linking,\ntitle={Linking Surface Facts to Large-Scale Knowledge Graphs},\nauthor={Gorjan Radevski and Kiril Gashteovski and Chia-Chien Hung and Carolin Lawrence and Goran Glava{\\v{s}}},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ZVy8L79f5f}\n}", "github": "", "project": "", "reviewers": "WyCA;PQB3;kopq", "site": "https://openreview.net/forum?id=ZVy8L79f5f", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "3;3;3", "reproducibility": "4;4;3", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "gorjan-radevski/;;;carolin-lawrence/;goran-glava\u0161-8484b420", "aff_unique_index": "0;1;1;2", "aff_unique_norm": "KU Leuven;NEC Laboratories Europe;Julius-Maximilians-Universit\u00e4t W\u00fcrzburg", "aff_unique_dep": "Department of Electrical Engineering;;", "aff_unique_url": "https://www.kuleuven.be;https://www.nec-labs.eu;https://www.uni-wuerzburg.de", "aff_unique_abbr": "KU Leuven;NEC LE;JMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;2", "aff_country_unique": "Belgium;Unknown;Germany" }, { "id": "ZWpJFq6RRU", "title": "Accented Speech Recognition With Accent-specific Codebooks", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Speech accents pose a significant challenge to state-of-the-art automatic speech recognition (ASR) systems. Degradation in performance across underrepresented accents is a severe deterrent to the inclusive adoption of ASR. In this work, we propose a novel accent adaptation approach for end-to-end ASR systems using cross-attention with a trainable set of codebooks. These learnable codebooks capture accent-specific information and are integrated within the ASR encoder layers. The model is trained on accented English speech, while the test data also contained accents which were not seen during training. On the Mozilla Common Voice multi-accented dataset, we show that our proposed approach yields significant performance gains not only on the seen English accents (up to 37% relative improvement in word error rate) but also on the unseen accents (up to 5% relative improvement in WER). Further, we illustrate benefits for a zero-shot transfer setup on the L2Artic dataset. We also compare the performance with other approaches based on accent adversarial training.", "keywords": "accented speech recognition;cross-attention;codebooks;conformer;domain adaptation", "primary_area": "", "supplementary_material": "", "author": "Darshan Deepak Prabhu;Preethi Jyothi;Sriram Ganapathy;Vinit Unni", "authorids": "~Darshan_Deepak_Prabhu1;~Preethi_Jyothi2;~Sriram_Ganapathy1;~Vinit_Unni2", "gender": "M;F;M;", "homepage": "https://www.cse.iitb.ac.in/~darshanp/;http://www.cse.iitb.ac.in/~pjyothi;http://leap.ee.iisc.ac.in/sriram/;https://www.cse.iitb.ac.in/~vinit", "dblp": ";01/9014;23/4298.html;", "google_scholar": "F_EUt6YAAAAJ;https://scholar.google.co.in/citations?user=QN_uhu8AAAAJ;cgpzrtcAAAAJ;https://scholar.google.fr/citations?user=NU250BoAAAAJ", "or_profile": "~Darshan_Deepak_Prabhu1;~Preethi_Jyothi2;~Sriram_Ganapathy1;~Vinit_Unni2", "aff": "Indian Institute of Technology, Bombay;Indian Institute of Technology Bombay;Google DeepMind;Indian Institute of Technology Bombay", "aff_domain": "cse.iitb.ac.in;iitb.ac.in;google.com;iitb.ac.in", "position": "MS student;Associate Professor;Researcher;PhD student", "bibtex": "@inproceedings{\nprabhu2023accented,\ntitle={Accented Speech Recognition With Accent-specific Codebooks},\nauthor={Darshan Deepak Prabhu and Preethi Jyothi and Sriram Ganapathy and Vinit Unni},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ZWpJFq6RRU}\n}", "github": "", "project": "", "reviewers": "78aE;ndKt;tMje", "site": "https://openreview.net/forum?id=ZWpJFq6RRU", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "4;4;4", "reproducibility": "4;3;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "darshan-prabhu/;;;vinit-unni/?originalSubdomain=in", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Indian Institute of Technology Bombay;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.iitb.ac.in;https://deepmind.com", "aff_unique_abbr": "IIT Bombay;DeepMind", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Bombay;", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "India;United Kingdom" }, { "id": "ZZ3PL3qT9f", "title": "Causal Document-Grounded Dialogue Pre-training", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The goal of document-grounded dialogue (DocGD) is to generate a response by anchoring the evidence in a supporting document in accordance with the dialogue context. This entails four causally interconnected variables. While task-specific pre-training has significantly enhanced performances on numerous downstream tasks, existing DocGD methods still rely on general pre-trained language models without a specifically tailored pre-training approach that explicitly captures the causal relationships. To address this, we present the first causally-complete dataset construction strategy for developing million-scale DocGD pre-training corpora. Additionally, we propose a causally-perturbed pre-training strategy to better capture causality by introducing perturbations on the variables and optimizing the overall causal effect. Experiments conducted on three benchmark datasets demonstrate that our causal pre-training yields substantial and consistent improvements in fully-supervised, low-resource, few-shot, and zero-shot settings.", "keywords": "Document-grounded dialogue;task specific pretraining;causal effect", "primary_area": "", "supplementary_material": "", "author": "Yingxiu Zhao;Bowen Yu;Bowen Li;Haiyang Yu;Jinyang Li;Chao Wang;Fei Huang;Yongbin Li;Nevin L. Zhang", "authorids": "~Yingxiu_Zhao1;~Bowen_Yu3;~Bowen_Li8;~Haiyang_Yu3;~Jinyang_Li4;~Chao_Wang10;~Fei_Huang1;~Yongbin_Li2;~Nevin_L._Zhang1", "gender": "F;M;;M;M;;M;M;M", "homepage": ";https://yubowen-ph.github.io/;;;http://jinyang-li.me/;;https://yongbin-li.github.io/;https://sites.google.com/view/fei-huang;https://cse.hkust.edu.hk/~lzhang/teach/courses.html", "dblp": ";95/10266-2.html;75/10470-2;90/6643-3;79/572-3;;;h/FeiHuang.html;https://dblp.uni-trier.de/pid/z/NevinLianwenZhang.html", "google_scholar": "https://scholar.google.com/citations?hl=en;oHoEp34AAAAJ;RLWXNf8AAAAJ;VhWV-1wAAAAJ;https://scholar.google.com/citations?hl=en;;xF5VrokAAAAJ;9r98PpoAAAAJ;", "or_profile": "~Yingxiu_Zhao1;~Bowen_Yu3;~Bowen_Li8;~Haiyang_Yu3;~Jinyang_Li4;~Chao_Wang10;~Yongbin_Li2;~Fei_Huang2;~Nevin_Zhang1", "aff": "Hong Kong University of Science and Technology;Alibaba Group;International Innovation Center of Tsinghua University, Shanghai;Alibaba Group;The University of Hong Kong;;Alibaba Group;Alibaba Group US;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;alibaba-inc.com;tsinghua.edu.cn;alibaba-inc.com;hku.hk;;alibaba-inc.com;alibaba-inc.com;ust.hk", "position": "PhD student;Researcher;Researcher;Researcher;PhD student;;Researcher;Senior Research Director;Full Professor", "bibtex": "@inproceedings{\nzhao2023causal,\ntitle={Causal Document-Grounded Dialogue Pre-training},\nauthor={Yingxiu Zhao and Bowen Yu and Bowen Li and Haiyang Yu and Jinyang Li and Chao Wang and Fei Huang and Yongbin Li and Nevin L. Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ZZ3PL3qT9f}\n}", "github": "", "project": "", "reviewers": "DA1s;oY7u;y1SZ", "site": "https://openreview.net/forum?id=ZZ3PL3qT9f", "pdf_size": 0, "rating": "4;4;4", "confidence": "1;4;3", "excitement": "3;4;3", "reproducibility": "3;4;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-5218-9920;0000-0002-6804-1859;;;;;;;", "linkedin": ";;;;;;;fei-huang-cas-cmu;", "aff_unique_index": "0;1;2;1;3;1;1;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Alibaba Group;Tsinghua University;University of Hong Kong", "aff_unique_dep": ";;International Innovation Center;", "aff_unique_url": "https://www.ust.hk;https://www.alibaba.com;https://www.tsinghua.edu.cn;https://www.hku.hk", "aff_unique_abbr": "HKUST;Alibaba;THU;HKU", "aff_campus_unique_index": "0;2;0;0", "aff_campus_unique": "Hong Kong SAR;;Shanghai", "aff_country_unique_index": "0;0;0;0;0;0;1;0", "aff_country_unique": "China;United States" }, { "id": "Ze2IIzaSF3", "title": "HiddenTables and PyQTax: A Cooperative Game and Dataset For TableQA to Ensure Scale and Data Privacy Across a Myriad of Taxonomies", "track": "main", "status": "Long Main", "tldr": "", "abstract": "A myriad of different Large Language Models (LLMs) face a common challenge in contextually analyzing table question-answering tasks. These challenges are engendered from (1) finite context windows for large tables, (2) multi-faceted discrepancies amongst tokenization patterns against cell boundaries, and (3) various limitations stemming from data confidentiality in the process of using external models such as gpt-35-turbo. We propose a cooperative game dubbed \"HiddenTables\" as a potential resolution to this challenge. In essence, \"HiddenTables\" is played between the code-generating LLM \"Solver\" and the \"Oracle\" which evaluates the ability of the LLM agents to solve TableQA tasks. This game is based on natural language schemas and importantly, ensures the security of the underlying data. We provide evidential experiments on a diverse set of tables that demonstrate an LLM's collective inability to generalize and perform on complex queries, handle compositional dependencies, and align natural language to programmatic commands when concrete table schemas are provided. Unlike encoder-based models, we have pushed the boundaries of \"HiddenTables\" to not be limited by the number of rows - therefore we exhibit improved efficiency in prompt and completion tokens. Our infrastructure has spawned a new dataset \"PyQTax\" that spans across 116,671 question-table-answer triplets and provides additional fine-grained breakdowns and labels for varying question taxonomies. Therefore, in tandem with our academic contributions regarding LLMs' deficiency in TableQA tasks, \"HiddenTables\" is a tactile manifestation of how LLMs can interact with massive datasets while ensuring data security and minimizing generation costs.", "keywords": "table question answering;dataset;large language models;data privacy;agents;cooperative game", "primary_area": "", "supplementary_material": "", "author": "William Watson;Nicole Cho;Tucker Balch;Manuela Veloso", "authorids": "~William_Watson2;~Nicole_Cho1;~Tucker_Balch2;~Manuela_Veloso1", "gender": ";F;M;F", "homepage": ";;;https://www.cs.cmu.edu/~mmv/", "dblp": "56/8636.html;361/4988.html;;v/ManuelaMVeloso", "google_scholar": "4pujax8AAAAJ;;jM1cT4QAAAAJ;https://scholar.google.com.tw/citations?user=2FbkAzYAAAAJ", "or_profile": "~William_Watson2;~Nicole_Cho1;~Tucker_Balch2;~Manuela_Veloso1", "aff": "J.P. Morgan Chase;J.P. Morgan Chase;J.P. Morgan Chase;School of Computer Science, Carnegie Mellon University", "aff_domain": "jpmorgan.com;jpmorgan.com;jpmorgan.com;cs.cmu.edu", "position": "Researcher;MS student;Managing Director;Full Professor", "bibtex": "@inproceedings{\nwatson2023hiddentables,\ntitle={HiddenTables and Py{QT}ax: A Cooperative Game and Dataset For Table{QA} to Ensure Scale and Data Privacy Across a Myriad of Taxonomies},\nauthor={William Watson and Nicole Cho and Tucker Balch and Manuela Veloso},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Ze2IIzaSF3}\n}", "github": "", "project": "", "reviewers": "6csL;xnAr;fyQe", "site": "https://openreview.net/forum?id=Ze2IIzaSF3", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "3;4;4", "reproducibility": "3;4;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-5516-262X;;0000-0002-5148-2033;", "linkedin": "nextbillyonair;nicole-cho-680/;;", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "JPMorgan Chase & Co.;Carnegie Mellon University", "aff_unique_dep": ";School of Computer Science", "aff_unique_url": "https://www.jpmorganchase.com;https://www.cmu.edu", "aff_unique_abbr": "JPM;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "ZgJSDBU3px", "title": "CaseEncoder: A Knowledge-enhanced Pre-trained Model for Legal Case Encoding", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Legal case retrieval is a critical process for modern legal information systems. While recent studies have utilized pre-trained language models (PLMs) based on the general domain self-supervised pre-training paradigm to build models for legal case retrieval, there are limitations in using general domain PLMs as backbones. Specifically, these models may not fully capture the underlying legal features in legal case documents. To address this issue, we propose CaseEncoder, a legal document encoder that leverages fine-grained legal knowledge in both the data sampling and pre-training phases. In the data sampling phase, we enhance the quality of the training data by utilizing fine-grained law article information to guide the selection of positive and negative examples. In the pre-training phase, we design legal-specific pre-training tasks that align with the judging criteria of relevant legal cases. Based on these tasks, we introduce an innovative loss function called Biased Circle Loss to enhance the model's ability to recognize case relevance in fine grains. Experimental results on multiple benchmarks demonstrate that CaseEncoder significantly outperforms both existing general pre-training models and legal-specific pre-training models in zero-shot legal case retrieval. The source code of CaseEncoder can be found at https://github.com/Anonymous-EMNLP2023/CaseEncoder.", "keywords": "Legal case retrieval;pre-trained language model;knowledge", "primary_area": "", "supplementary_material": "", "author": "Yixiao Ma;Yueyue WU;Weihang Su;Qingyao Ai;Yiqun LIU", "authorids": "~Yixiao_Ma1;~Yueyue_WU1;~Weihang_Su1;~Qingyao_Ai1;~Yiqun_LIU1", "gender": "M;;M;Not Specified;M", "homepage": ";http://www.thuir.cn/members/1_post_wuyueyue.html;;https://qingyaoai.github.io;http://www.thuir.cn/group/~YQLiu/", "dblp": ";;301/7966;169/1808;49/1579", "google_scholar": "gF6c6YcAAAAJ;;xEJc8cgAAAAJ;UKqaI5IAAAAJ;NJOnxh4AAAAJ", "or_profile": "~Yixiao_Ma1;~Yueyue_WU1;~Weihang_Su1;~Qingyao_Ai1;~Yiqun_LIU1", "aff": "Tsinghua University;, Tsinghua University;, Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "mail.tsinghua.edu.cn;cs.tsinghua.edu.cn;cs.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;Postdoc;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nma2023caseencoder,\ntitle={CaseEncoder: A Knowledge-enhanced Pre-trained Model for Legal Case Encoding},\nauthor={Yixiao Ma and Yueyue WU and Weihang Su and Qingyao Ai and Yiqun LIU},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ZgJSDBU3px}\n}", "github": "", "project": "", "reviewers": "8osR;E28B;aaUB", "site": "https://openreview.net/forum?id=ZgJSDBU3px", "pdf_size": 0, "rating": "5;5;5", "confidence": "2;5;3", "excitement": "3;4;4", "reproducibility": "2;4;3", "correctness": "3;5;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-5030-709X;", "linkedin": ";;;qingyao-ai-4ab8306a;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "ZhZFUOV5hb", "title": "Auto Search Indexer for End-to-End Document Retrieval", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Generative retrieval, which is a new advanced paradigm for document retrieval, has recently attracted research interests, since it encodes all documents into the model and directly generates the retrieved documents. However, its power is still underutilized since it heavily relies on the ``preprocessed'' document identifiers (docids), thus limiting its retrieval performance and ability to retrieve new documents. In this paper, we propose a novel fully end-to-end retrieval paradigm. It can not only end-to-end learn the best docids for existing and new documents automatically via a semantic indexing module, but also perform end-to-end document retrieval via an encoder-decoder-based generative model, namely Auto Search Indexer (ASI). Besides, we design a reparameterization mechanism to combine the above two modules into a joint optimization framework. Extensive experimental results demonstrate the superiority of our model over advanced baselines on both public and industrial datasets and also verify the ability to deal with new documents.", "keywords": "Document Retrieval;Generative Retrieval;End to End", "primary_area": "", "supplementary_material": "", "author": "Tianchi Yang;Minghui Song;Zihan Zhang;Haizhen Huang;Weiwei Deng;Feng Sun;Qi Zhang", "authorids": "~Tianchi_Yang1;~Minghui_Song1;~Zihan_Zhang4;~Haizhen_Huang1;~Weiwei_Deng2;~Feng_Sun1;~Qi_Zhang19", "gender": "Not Specified;;M;M;M;M;M", "homepage": ";https://github.com/TriLoo;;;;;", "dblp": "20/2167;;;304/7795;311/3565.html;09/3224;", "google_scholar": "H69Qi-4AAAAJ;;;;;;", "or_profile": "~Tianchi_Yang1;~Minghui_Song1;~Zihan_Zhang4;~Haizhen_Huang1;~Weiwei_Deng2;~Feng_Sun1;~Qi_Zhang19", "aff": "Microsoft;Microsoft;;;Microsoft;;Microsoft", "aff_domain": "microsoft.com;microsoft.com;;;microsoft.com;;microsoft.com", "position": "Researcher;Researcher;;;Researcher;;Researcher", "bibtex": "@inproceedings{\nyang2023auto,\ntitle={Auto Search Indexer for End-to-End Document Retrieval},\nauthor={Tianchi Yang and Minghui Song and Zihan Zhang and Haizhen Huang and Weiwei Deng and Feng Sun and Qi Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ZhZFUOV5hb}\n}", "github": "", "project": "", "reviewers": "WTWX;agxP;rkeY;CdLZ", "site": "https://openreview.net/forum?id=ZhZFUOV5hb", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;3;4;3", "excitement": "3;4;3;4", "reproducibility": "3;3;2;4", "correctness": "4;3;2;4", "rating_avg": 3.0, "confidence_avg": 3.5, "excitement_avg": 3.5, "reproducibility_avg": 3.0, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-1215-8676;;;;0009-0001-4793-9715;;", "linkedin": ";;https://cn.linkedin.com/in/zihan-zhang-916bb7101;haizhen-huang-21a58824/;;feng-sun/;qizhang07/", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Corporation", "aff_unique_url": "https://www.microsoft.com", "aff_unique_abbr": "Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "ZjWkQz9qXn", "title": "Query-based Image Captioning from Multi-context 360\u00b0 Images", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "A 360-degree image captures the entire scene without the limitations of a camera's field of view, which makes it difficult to describe all the contexts in a single caption. We propose a novel task called Query-based Image Captioning (QuIC) for 360-degree images, where a query (words or short phrases) specifies the context to describe. This task is more challenging than the conventional image captioning task, which describes salient objects in images, as it requires fine-grained scene understanding to select the contents consistent with user's intent based on the query. We construct a dataset for the new task that comprises 3,940 360-degree images and 18,459 pairs of queries and captions annotated manually. Experiments demonstrate that fine-tuning image captioning models further on our dataset can generate more diverse and controllable captions from multiple contexts of 360-degree images.", "keywords": "Image Captioning;360-degree image;Vision and Language", "primary_area": "", "supplementary_material": "", "author": "Koki Maeda;Shuhei Kurita;Taiki Miyanishi;Naoaki Okazaki", "authorids": "~Koki_Maeda1;~Shuhei_Kurita1;~Taiki_Miyanishi1;~Naoaki_Okazaki2", "gender": "M;;M;M", "homepage": "https://sites.google.com/view/silviase/english;;http://miyatai.org/;http://www.chokkan.org/", "dblp": ";;45/8008;49/4018", "google_scholar": "https://scholar.google.co.jp/citations?user=TOHpU1IAAAAJ;;https://scholar.google.co.jp/citations?user=yViS5hwAAAAJ;", "or_profile": "~Koki_Maeda1;~Shuhei_Kurita1;~Taiki_Miyanishi1;~Naoaki_Okazaki2", "aff": "Tokyo Institute of Technology;;ATR;Tokyo Institute of Technology", "aff_domain": "titech.ac.jp;;atr.jp;titech.ac.jp", "position": "MS student;;Researcher;Full Professor", "bibtex": "@inproceedings{\nmaeda2023querybased,\ntitle={Query-based Image Captioning from Multi-context 360{\\textdegree} Images},\nauthor={Koki Maeda and Shuhei Kurita and Taiki Miyanishi and Naoaki Okazaki},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ZjWkQz9qXn}\n}", "github": "", "project": "", "reviewers": "WuwK;4vda;jdnx", "site": "https://openreview.net/forum?id=ZjWkQz9qXn", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;3", "excitement": "3;3;3", "reproducibility": "3;4;3", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0008-0529-3152;;;", "linkedin": ";;;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Tokyo Institute of Technology;Advanced Telecommunications Research Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.titech.ac.jp;https://www.atr.jp", "aff_unique_abbr": "Titech;ATR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "id": "ZjfclaOF7M", "title": "UniMath: A Foundational and Multimodal Mathematical Reasoner", "track": "main", "status": "Short Main", "tldr": "", "abstract": "While significant progress has been made in natural language processing (NLP), existing methods exhibit limitations in effectively interpreting and processing diverse mathematical modalities. Therefore, we introduce UniMath, a versatile and unified system designed for multimodal mathematical reasoning tasks. Tackling complex problem-solving in arithmetic, geometry, and table-based math, UniMath utilizes a fine-tuned T5 model augmented with a variational autoencoder (VAE)-based image tokenizer. By jointly training and evaluating the model on three diverse datasets - SVAMP, GeoQA, and TableMWP, UniMath achieves state-of-the-art performance. The model's generalization ability is further demonstrated via fine-tuning on two additional datasets, MathQA and Geo-Proving. Through comprehensive evaluations, we showcase that joint training across diverse math tasks improves overall model performance and enhances its ability to generalize across different mathematical reasoning tasks. This pioneering approach provides a blueprint and inspires further efforts on unified mathematical reasoning with deep learning systems.", "keywords": "Multimodal Math Reasoning;Math Word Problem Solving;Geometry Problem Solving", "primary_area": "", "supplementary_material": "", "author": "Zhenwen Liang;Tianyu Yang;Jipeng Zhang;Xiangliang Zhang", "authorids": "~Zhenwen_Liang1;~Tianyu_Yang6;~Jipeng_Zhang1;~Xiangliang_Zhang1", "gender": "M;;M;F", "homepage": "https://zhenwen-nlp.github.io/;;https://2003pro.github.io/;https://sites.nd.edu/xiangliang-zhang/", "dblp": "226/6083;;;74/1890-1", "google_scholar": "4rKhF2AAAAAJ;;q0De288AAAAJ;BhRJe4wAAAAJ", "or_profile": "~Zhenwen_Liang1;~Tianyu_Yang6;~Jipeng_Zhang1;~Xiangliang_Zhang1", "aff": "University of Notre Dame;;Department of Computer Science and Engineering, The Hong Kong University of Science and Technology;University of Notre Dame", "aff_domain": "nd.edu;;cse.ust.hk;nd.edu", "position": "PhD student;;PhD student;Associate Professor", "bibtex": "@inproceedings{\nliang2023unimath,\ntitle={UniMath: A Foundational and Multimodal Mathematical Reasoner},\nauthor={Zhenwen Liang and Tianyu Yang and Jipeng Zhang and Xiangliang Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ZjfclaOF7M}\n}", "github": "", "project": "", "reviewers": "xULr;FwBq;U2A1", "site": "https://openreview.net/forum?id=ZjfclaOF7M", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;3", "excitement": "4;4;3", "reproducibility": "4;5;3", "correctness": "3;4;3", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-3574-5665", "linkedin": ";;;", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Notre Dame;Hong Kong University of Science and Technology", "aff_unique_dep": ";Department of Computer Science and Engineering", "aff_unique_url": "https://www.nd.edu;https://www.ust.hk", "aff_unique_abbr": "Notre Dame;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United States;China" }, { "id": "ZkR2bWvRpZ", "title": "Prompt-Based Monte-Carlo Tree Search for Goal-oriented Dialogue Policy Planning", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Planning for goal-oriented dialogue often requires simulating future dialogue interactions and estimating task progress. Many approaches thus consider training neural networks to perform look-ahead search algorithms such as A* search and Monte Carlo Tree Search (MCTS). However, this training often require abundant annotated data, which creates challenges when faced with noisy annotations or low-resource settings. We introduce GDP-Zero, an approach using Open-Loop MCTS to perform goal-oriented dialogue policy planning without any model training. GDP-Zero prompts a large language model to act as a policy prior, value function, user simulator, and system model during the tree search. We evaluate GDP-Zero on the goal-oriented task PersuasionForGood, and find that its responses are preferred over ChatGPT up to 59.32% of the time, and are rated more persuasive than ChatGPT during interactive evaluations.", "keywords": "Prompting;MCTS;Dialogue Policy Planning", "primary_area": "", "supplementary_material": "", "author": "Xiao Yu;Maximillian Chen;Zhou Yu", "authorids": "~Xiao_Yu4;~Maximillian_Chen1;~Zhou_Yu1", "gender": "M;F;M", "homepage": ";http://www.cs.columbia.edu/~zhouyu/;https://cs.columbia.edu/~maxchen", "dblp": ";83/3205;271/5890", "google_scholar": "QblBy88AAAAJ;https://scholar.google.com.tw/citations?user=jee2Dy0AAAAJ;EoHN6rAAAAAJ", "or_profile": "~Xiao_Yu4;~Zhou_Yu1;~Max_Chen1", "aff": "Columbia University;Columbia University;Amazon", "aff_domain": "columbia.edu;columbia.edu;amazon.com", "position": "Undergrad student;Assistant Professor;Intern", "bibtex": "@inproceedings{\nyu2023promptbased,\ntitle={Prompt-Based Monte-Carlo Tree Search for Goal-oriented Dialogue Policy Planning},\nauthor={Xiao Yu and Maximillian Chen and Zhou Yu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ZkR2bWvRpZ}\n}", "github": "", "project": "", "reviewers": "qWAy;e7SF;hJkT", "site": "https://openreview.net/forum?id=ZkR2bWvRpZ", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;2;4", "excitement": "3;3;3", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;maximillianchen/", "aff_unique_index": "0;0;1", "aff_unique_norm": "Columbia University;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.columbia.edu;https://www.amazon.com", "aff_unique_abbr": "Columbia;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "Zlm7F7g9FK", "title": "NLI4CT: Multi-Evidence Natural Language Inference for Clinical Trial Reports", "track": "main", "status": "Long Main", "tldr": "", "abstract": "How can we interpret and retrieve medical evidence to support clinical decisions? Clinical trial reports (CTR) amassed over the years contain indispensable information for the development of personalized medicine. However, it is practically infeasible to manually inspect over 400,000+ clinical trial reports in order to find the best evidence for experimental treatments. Natural Language Inference (NLI) offers a potential solution to this problem, by allowing the scalable computation of textual entailment. However, existing NLI models perform poorly on biomedical corpora, and previously published datasets fail to capture the full complexity of inference over CTRs.\n\nIn this work, we present a novel resource to advance research on NLI for reasoning on CTRs. The resource includes two main tasks. Firstly, to determine the inference relation between a natural language statement, and a CTR. Secondly, to retrieve supporting facts to justify the predicted relation. We provide NLI4CT, a corpus of 2400 statements and CTRs, annotated for these tasks. Baselines on this corpus expose the limitations of existing NLI approaches, with 6 state-of-the-art NLI models achieving a maximum F1 score of 0.627. To the best of our knowledge, we are the first to design a task that covers the interpretation of full CTRs. To encourage further work on this challenging dataset, we make the corpus, competition leaderboard, and website, available on CodaLab, and code to replicate the baseline experiments on GitHub.", "keywords": "NLI;Clinical Trial;Textual entailment;Evidence retrieval;NLP", "primary_area": "", "supplementary_material": "", "author": "Mael Jullien;Marco Valentino;Hannah Ruth Frost;Paul O'Regan;D\u00f3nal Landers;Andre Freitas", "authorids": "~Mael_Jullien2;~Marco_Valentino1;~Hannah_Ruth_Frost1;~Paul_O'Regan1;~D\u00f3nal_Landers1;~Andre_Freitas1", "gender": ";M;;Not Specified;M;", "homepage": "https://www.linkedin.com/in/mael-jullien-874506179/;https://www.marcovalentino.net/;;https://digitalecmt.org/;https://www.delondraoncology.com;http://andrefreitas.org", "dblp": ";212/3533;;;;47/9409.html", "google_scholar": ";nnaBYcIAAAAJ;;;;ExmHmMoAAAAJ", "or_profile": "~Mael_Jullien2;~Marco_Valentino1;~Hannah_Ruth_Frost1;~Paul_O'Regan1;~D\u00f3nal_Landers1;~Andre_Freitas1", "aff": "University of Manchester;Idiap Research Institute;;University of Manchester;;University of Manchester", "aff_domain": "manchester.ac.uk;idiap.ch;;cs.manchester.ac.uk;;manchester.ac.uk", "position": "PhD student;Postdoc;;Postdoc;;Associate Professor", "bibtex": "@inproceedings{\njullien2023nlict,\ntitle={{NLI}4{CT}: Multi-Evidence Natural Language Inference for Clinical Trial Reports},\nauthor={Mael Jullien and Marco Valentino and Hannah Ruth Frost and Paul O'Regan and D{\\'o}nal Landers and Andre Freitas},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=Zlm7F7g9FK}\n}", "github": "", "project": "", "reviewers": "hG7J;6bps;iVg5", "site": "https://openreview.net/forum?id=Zlm7F7g9FK", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;5", "excitement": "4;4;4", "reproducibility": "4;5;3", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0001-8376-9779;", "linkedin": ";marco-valentino-844a5ab1/;;;https://linkedin.com/in/donal-landers;andrefreitas/", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Manchester;Idiap Research Institute", "aff_unique_dep": ";", "aff_unique_url": "https://www.manchester.ac.uk;https://www.idiap.ch", "aff_unique_abbr": "UoM;Idiap", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United Kingdom;Switzerland" }, { "id": "ZskD7TlNVZ", "title": "Translating away Translationese without Parallel Data", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Translated texts exhibit systematic linguistic differences compared to original texts in the same language, and these differences are referred to as translationese. Translationese has effects on various cross-lingual natural language processing tasks, potentially leading to biased results. In this paper, we explore a novel approach to reduce translationese in translated texts: translation-based style transfer. As there are no parallel human-translated and original data in the same language, we use a self-supervised approach that can learn from comparable (rather than parallel) mono-lingual original and translated data. However, even this self-supervised approach requires some parallel data for validation. We show how we can eliminate the need for parallel validation data by combining the self-supervised loss with an unsupervised loss. This unsupervised loss leverages the original language model loss over the style-transferred output and a semantic similarity loss between the input and style-transferred output. We evaluate our approach in terms of original vs. translationese binary classification in addition to measuring content preservation and target-style fluency. The results show that our approach is able to reduce translationese classifier accuracy to a level of a random classifier after style transfer while adequately preserving the content and fluency in the target original style.", "keywords": "Translationese Mitigation;Text Style Transfer;Unsupervised Training;Bias Mitigation", "primary_area": "", "supplementary_material": "", "author": "Rricha Jalota;Koel Dutta Chowdhury;Cristina Espa\u00f1a-Bonet;Josef van Genabith", "authorids": "~Rricha_Jalota1;~Koel_Dutta_Chowdhury2;~Cristina_Espa\u00f1a-Bonet1;~Josef_van_Genabith1", "gender": "F;F;F;M", "homepage": ";;https://www.cs.upc.edu/~cristinae/CV/cv.php;", "dblp": ";205/8970.html;59/7935;82/3447", "google_scholar": "ZBzbtGYAAAAJ;t5TOnGkAAAAJ;;rl8S6a8AAAAJ", "or_profile": "~Rricha_Jalota1;~Koel_Dutta_Chowdhury2;~Cristina_Espa\u00f1a-Bonet1;~Josef_van_Genabith1", "aff": "Universit\u00e4t des Saarlandes;Universit\u00e4t des Saarlandes;German Research Center for AI;Universit\u00e4t des Saarlandes", "aff_domain": "uni-saarland.de;uni-saarland.de;dfki.de;uni-saarland.de", "position": "MS student;PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\njalota2023translating,\ntitle={Translating away Translationese without Parallel Data},\nauthor={Rricha Jalota and Koel Dutta Chowdhury and Cristina Espa{\\~n}a-Bonet and Josef van Genabith},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ZskD7TlNVZ}\n}", "github": "", "project": "", "reviewers": "whSa;k8r3;PG8x", "site": "https://openreview.net/forum?id=ZskD7TlNVZ", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;2;4", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-5414-4710;", "linkedin": ";koel-dutta-chowdhury-a53a39123/;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Universit\u00e4t des Saarlandes;German Research Center for Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-saarland.de;https://www.dfki.de/", "aff_unique_abbr": "UDS;DFKI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany" }, { "id": "ZsuPbCxPnA", "title": "Non-autoregressive Text Editing with Copy-aware Latent Alignments", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recent work has witnessed a paradigm shift from Seq2Seq to Seq2Edit in the field of text editing, with the aim of addressing the slow autoregressive inference problem posed by the former. Despite promising results, Seq2Edit approaches still face several challenges such as inflexibility in generation and difficulty in generalizing to other languages. In this work, we propose a novel non-autoregressive text editing method to circumvent the above issues, by modeling the edit process with latent CTC alignments. We make a crucial extension to CTC by introducing the copy operation into the edit space, thus enabling more efficient management of textual overlap in editing. We conduct extensive experiments on GEC and sentence fusion tasks, showing that our proposed method significantly outperforms existing Seq2Edit models and achieves similar or even better results than Seq2Seq with over $4\\times$ speedup. Moreover, it demonstrates good generalizability on German and Russian. In-depth analyses reveal the strengths of our method in terms of the robustness under various scenarios and generating fluent and flexible outputs.", "keywords": "text generation;text editing;grammatical error correction;sentence fusion", "primary_area": "", "supplementary_material": "", "author": "Yu Zhang;Yue Zhang;Leyang Cui;Guohong Fu", "authorids": "~Yu_Zhang36;~Yue_Zhang12;~Leyang_Cui1;~Guohong_Fu1", "gender": "M;M;M;M", "homepage": "https://yzhang.site;https://hillzhang1999.github.io/;https://github.com/Nealcly;http://web.suda.edu.cn/ghfu/", "dblp": "50/671-92;;247/6181;23/5204", "google_scholar": "y3JK-1oAAAAJ;wYEAchYAAAAJ;6YVwZgkAAAAJ;ueOZz5QAAAAJ", "or_profile": "~Yu_Zhang36;~Yue_Zhang12;~Leyang_Cui1;~Guohong_Fu1", "aff": "Soochow University, China;Suzhou University;Tencent AI Lab;Soochow University, China,", "aff_domain": "suda.edu.cn;suda.edu.cn;tencent.com;suda.edu.cn", "position": "PhD student;MS student;Researcher;Full Professor", "bibtex": "@inproceedings{\nzhang2023nonautoregressive,\ntitle={Non-autoregressive Text Editing with Copy-aware Latent Alignments},\nauthor={Yu Zhang and Yue Zhang and Leyang Cui and Guohong Fu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ZsuPbCxPnA}\n}", "github": "", "project": "", "reviewers": "BNoe;M58B;L81y", "site": "https://openreview.net/forum?id=ZsuPbCxPnA", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;1", "excitement": "2;4;4", "reproducibility": "4;4;3", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-8345-3835;;;0000-0001-6882-6181", "linkedin": ";;;", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Soochow University;Suzhou University;Tencent", "aff_unique_dep": ";;Tencent AI Lab", "aff_unique_url": "https://www.soochow.edu.cn;https://www.suda.edu.cn;https://ai.tencent.com", "aff_unique_abbr": "Soochow U;Suda;Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "a0yFO9gKc5", "title": "Benchmarking and Improving Text-to-SQL Generation under Ambiguity", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Research in Text-to-SQL conversion has been largely benchmarked against datasets where each text query corresponds to one correct SQL. However, natural language queries over real-life databases frequently involve significant ambiguity about the intended SQL due to overlapping schema names and multiple confusing relationship paths. To bridge this gap, we develop a novel benchmark called AmbiQT with over 3000 examples where each text is interpretable as two plausible SQLs due to lexical and/or structural ambiguity. \n\nWhen faced with ambiguity, an ideal top-$k$ decoder should generate all valid interpretations for possible disambiguation by the user. We evaluate several Text-to-SQL systems and decoding algorithms, including those employing state-of-the-art LLMs, and find them to be far from this ideal. The primary reason is that the prevalent beam search algorithm and its variants, treat SQL queries as a string and produce unhelpful token-level diversity in the top-$k$.\n\nWe propose LogicalBeam, a new decoding algorithm that navigates the SQL logic space using a blend of plan-based template generation and constrained infilling. Counterfactually generated plans diversify templates while in-filling with a beam-search that branches solely on schema names provides value diversity. LogicalBeam is up to $2.5$ times more effective than state-of-the-art models at generating all candidate SQLs in the top-$k$ ranked outputs. It also enhances the top-$5$ Exact and Execution Match Accuracies on SPIDER and Kaggle DBQA.", "keywords": "Semantic Parsing;Text-to-SQL;Ambiguity;Beam Search;ChatGPT;LLMs for Text-to-SQL", "primary_area": "", "supplementary_material": "", "author": "Adithya Bhaskar;Tushar Tomar;Ashutosh Sathe;Sunita Sarawagi", "authorids": "~Adithya_Bhaskar2;~Tushar_Tomar1;~Ashutosh_Sathe1;~Sunita_Sarawagi1", "gender": "M;;M;F", "homepage": "https://adithyabh.github.io;;https://ashutoshbsathe.github.io;https://www.cse.iitb.ac.in/~sunita/", "dblp": "334/7656;;332/0994.html;s/SunitaSarawagi", "google_scholar": ";;f3T-T-AAAAAJ;https://scholar.google.com.tw/citations?user=Hg4HmTAAAAAJ", "or_profile": "~Adithya_Bhaskar2;~Tushar_Tomar1;~Ashutosh_Sathe1;~Sunita_Sarawagi1", "aff": "Indian Institute of Technology Bombay, Indian Institute of Technology, Bombay;Indian Institute of Technology, Bombay;Indian Institute of Technology, Bombay;IIT Bombay", "aff_domain": "cse.iitb.ac.in;iitb.ac.in;cse.iitb.ac.in;iitb.ac.in", "position": "Undergrad student;M. Tech Student;MS student;Full Professor", "bibtex": "@inproceedings{\nbhaskar2023benchmarking,\ntitle={Benchmarking and Improving Text-to-{SQL} Generation under Ambiguity},\nauthor={Adithya Bhaskar and Tushar Tomar and Ashutosh Sathe and Sunita Sarawagi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=a0yFO9gKc5}\n}", "github": "", "project": "", "reviewers": "XzKB;8D4B;ePD9", "site": "https://openreview.net/forum?id=a0yFO9gKc5", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "3;4;3", "reproducibility": "4;3;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";tushar-tomar-a0666219a;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Indian Institute of Technology Bombay", "aff_unique_dep": "", "aff_unique_url": "https://www.iitb.ac.in", "aff_unique_abbr": "IIT Bombay", "aff_campus_unique_index": "0;0;0;1", "aff_campus_unique": "Bombay;Mumbai", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "India" }, { "id": "aB3Hwh4UzP", "title": "A Mechanistic Interpretation of Arithmetic Reasoning in Language Models using Causal Mediation Analysis", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Mathematical reasoning in large language models (LMs) has garnered significant attention in recent work, but there is a limited understanding of how these models process and store information related to arithmetic tasks within their architecture.\nIn order to improve our understanding of this aspect of language models, we present a mechanistic interpretation of Transformer-based LMs on arithmetic questions using a causal mediation analysis framework.\nBy intervening on the activations of specific model components and measuring the resulting changes in predicted probabilities, we identify the subset of parameters responsible for specific predictions.\nThis provides insights into how information related to arithmetic is processed by LMs.\nOur experimental results indicate that LMs process the input by transmitting the information relevant to the query from mid-sequence early layers to the final token using the attention mechanism. Then, this information is processed by a set of MLP modules, which generate result-related information that is incorporated into the residual stream.\nTo assess the specificity of the observed activation dynamics, we compare the effects of different model components on arithmetic queries with other tasks, including number retrieval from prompts and factual knowledge questions.", "keywords": "LLMs;Arithmetic Reasoning;Interpretability;Causality;Causal Mediation Analysis;Reasoning", "primary_area": "", "supplementary_material": "", "author": "Alessandro Stolfo;Yonatan Belinkov;Mrinmaya Sachan", "authorids": "~Alessandro_Stolfo1;~Yonatan_Belinkov1;~Mrinmaya_Sachan3", "gender": "M;M;M", "homepage": "https://alestolfo.github.io;https://www.belinkov.com;https://sites.google.com/site/mrinsachan/", "dblp": "329/3838;136/8705;86/10440.html", "google_scholar": "Fx50TZQAAAAJ;https://scholar.google.com/citations?authorid=K-6ujU4AAAAJ;Tpp9ZjoAAAAJ", "or_profile": "~Alessandro_Stolfo1;~Yonatan_Belinkov1;~MRINMAYA_SACHAN2", "aff": "ETHZ - ETH Zurich;Technion, Technion;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;technion.ac.il;ethz.ch", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nstolfo2023a,\ntitle={A Mechanistic Interpretation of Arithmetic Reasoning in Language Models using Causal Mediation Analysis},\nauthor={Alessandro Stolfo and Yonatan Belinkov and Mrinmaya Sachan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=aB3Hwh4UzP}\n}", "github": "", "project": "", "reviewers": "jY6n;WmZT;LE67;yvrj", "site": "https://openreview.net/forum?id=aB3Hwh4UzP", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;3;3;3", "excitement": "4;4;4;3", "reproducibility": "5;3;5;3", "correctness": "4;3;4;4", "rating_avg": 4.0, "confidence_avg": 3.25, "excitement_avg": 3.75, "reproducibility_avg": 4.0, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "alessandrostolfo/;;", "aff_unique_index": "0;1;2", "aff_unique_norm": "ETH Zurich;Technion - Israel Institute of Technology;Swiss Federal Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ethz.ch;https://www.technion.ac.il/en/;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;Technion;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Switzerland;Israel" }, { "id": "aBvwASLqMg", "title": "On the Representational Capacity of Recurrent Neural Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "This work investigates the computational expressivity of language models (LMs) based on recurrent neural networks (RNNs). \nSiegelmann and Sontag (1992) famously showed that RNNs with rational weights and hidden states and unbounded computation time are Turing complete. \nHowever, LMs define weightings over strings in addition to just (unweighted) language membership and the analysis of the computational power of RNN LMs (RLMs) should reflect this. \nWe extend the Turing completeness result to the probabilistic case, showing how a rationally weighted RLM with unbounded computation time can simulate any deterministic probabilistic Turing machine (PTM) with rationally weighted transitions. \nSince, in practice, RLMs work in real-time, processing a symbol at every time step, we treat the above result as an upper bound on the expressivity of RLMs. \nWe also provide a lower bound by showing that under the restriction to real-time computation, such models can simulate deterministic real-time rational PTMs.", "keywords": "RNN;LM;Turing machine;formal languages;probabilistic;language model", "primary_area": "", "supplementary_material": "", "author": "Franz Nowak;Anej Svete;Li Du;Ryan Cotterell", "authorids": "~Franz_Nowak1;~Anej_Svete1;~Li_Du2;~Ryan_Cotterell1", "gender": ";M;M;Not Specified", "homepage": "https://franznowak.github.io;https://anejsvete.github.io/;;https://rycolab.io/", "dblp": ";259/1164;;146/4361.html", "google_scholar": "IgJ4o30AAAAJ;https://scholar.google.com/citations?hl=en;efDU43kAAAAJ;DexOqtoAAAAJ", "or_profile": "~Franz_Nowak1;~Anej_Svete1;~Li_Du2;~Ryan_D_Cotterell1", "aff": "ETHZ - ETH Zurich;Department of Computer Science, ETHZ - ETH Zurich;Johns Hopkins University;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;inf.ethz.ch;cs.jhu.edu;ethz.ch", "position": "MS student;MS student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nnowak2023on,\ntitle={On the Representational Capacity of Recurrent Neural Language Models},\nauthor={Franz Nowak and Anej Svete and Li Du and Ryan Cotterell},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=aBvwASLqMg}\n}", "github": "", "project": "", "reviewers": "atVK;EjFc;BtV4", "site": "https://openreview.net/forum?id=aBvwASLqMg", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;2", "excitement": "4;3;4", "reproducibility": "4;5;3", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";anej-svete-95a68616a;;", "aff_unique_index": "0;0;1;2", "aff_unique_norm": "ETH Zurich;Johns Hopkins University;Swiss Federal Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ethz.ch;https://www.jhu.edu;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;JHU;ETH Zurich", "aff_campus_unique_index": "1", "aff_campus_unique": ";Zurich", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Switzerland;United States" }, { "id": "aCHq10rQiH", "title": "CREATOR: Tool Creation for Disentangling Abstract and Concrete Reasoning of Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large Language Models (LLMs) have made significant progress in utilizing tools, but their ability is limited by API availability and the instability of implicit reasoning, particularly when both planning and execution are involved. To overcome these limitations, we propose CREATOR, a novel framework that enables LLMs to create their own tools using documentation and code realization. CREATOR disentangles abstract tool creation and concrete decision execution, resulting in improved performance. We evaluate CREATOR on MATH and TabMWP benchmarks, respectively consisting of challenging math competition problems and diverse tabular contents. Remarkably, CREATOR outperforms existing chain-of-thought, program-of-thought, and tool-using baselines. Additionally, we introduce the Creation Challenge dataset, featuring 2K diverse questions, to emphasize the necessity and benefits of LLMs' tool creation ability. Further research demonstrates that leveraging LLMs as tool creators facilitates knowledge transfer, and LLMs exhibit varying levels of tool creation abilities, enabling them to adapt to diverse situations. The tool creation ability revolutionizes the LLM's problem-solving paradigm, driving us closer to the next frontier of artificial intelligence.", "keywords": "Large Language Models;Tool Creation;Model Reasoning", "primary_area": "", "supplementary_material": "", "author": "Cheng Qian;Chi Han;Yi Fung;Yujia Qin;Zhiyuan Liu;Heng Ji", "authorids": "~Cheng_Qian4;~Chi_Han1;~Yi_Fung1;~Yujia_Qin1;~Zhiyuan_Liu1;~Heng_Ji3", "gender": ";M;F;M;M;F", "homepage": ";https://glaciohound.github.io;https://mayrfung.github.io;https://yujia-qin.github.io/;http://nlp.csai.tsinghua.edu.cn/~lzy;http://blender.cs.illinois.edu/hengji.html", "dblp": ";255/6993;223/2782-1.html;126/2333;53/3245-1;", "google_scholar": ";https://scholar.google.com.sg/citations?user=DcSvbuAAAAAJ;eUae2K0AAAAJ;;dT0v5u0AAAAJ;z7GCqT4AAAAJ", "or_profile": "~Cheng_Qian4;~Chi_Han1;~Yi_Fung1;~Yujia_Qin1;~Zhiyuan_Liu1;~Heng_Ji3", "aff": ";University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;Tsinghua University;Tsinghua University;University of Illinois, Urbana-Champaign", "aff_domain": ";illinois.edu;illinois.edu;tsinghua.edu.cn;tsinghua.edu.cn;uiuc.edu", "position": ";PhD student;PhD student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nqian2023creator,\ntitle={{CREATOR}: Tool Creation for Disentangling Abstract and Concrete Reasoning of Large Language Models},\nauthor={Cheng Qian and Chi Han and Yi Fung and Yujia Qin and Zhiyuan Liu and Heng Ji},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=aCHq10rQiH}\n}", "github": "", "project": "", "reviewers": "DLxW;PJDS;yYH8", "site": "https://openreview.net/forum?id=aCHq10rQiH", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;3;3", "reproducibility": "2;3;5", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-6235-5841;;;0000-0002-7709-2543;", "linkedin": ";chi-han-b01a93141/;;yujia-qin-672595181/;;", "aff_unique_index": "0;0;1;1;2", "aff_unique_norm": "University of Illinois Urbana-Champaign;Tsinghua University;University of Illinois", "aff_unique_dep": ";;", "aff_unique_url": "https://illinois.edu;https://www.tsinghua.edu.cn;https://illinois.edu", "aff_unique_abbr": "UIUC;THU;UIUC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "United States;China" }, { "id": "aE7feUD7o7", "title": "Unified Low-Resource Sequence Labeling by Sample-Aware Dynamic Sparse Finetuning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Unified Sequence Labeling that articulates different sequence labeling problems such as Named Entity Recognition, Relation Extraction, Semantic Role Labeling, etc. in a generalized sequence-to-sequence format opens up the opportunity to make the maximum utilization of large language model knowledge toward structured prediction. Unfortunately, this requires formatting them into specialized augmented format unknown to the base pretrained language model (PLMs) necessitating finetuning to the target format. This significantly bounds its usefulness in data-limited settings where finetuning large models cannot properly generalize to the target format. To address this challenge and leverage PLM knowledge effectively, we propose FISH-DIP, a sample-aware dynamic sparse finetuning strategy that selectively focuses on a fraction of parameters, informed by feedback from highly regressing examples, during the fine-tuning process. By leveraging the dynamism of sparsity, our approach mitigates the impact of well-learned samples and prioritizes underperforming instances for improvement in generalization. Across five tasks of sequence labeling, we demonstrate that FISH-DIP can smoothly optimize the model in low resource settings offering upto 40% performance improvements over full fine-tuning depending on target evaluation settings. Also, compared to in-context learning and other parameter-efficient fine-tuning approaches, FISH-DIP performs comparably or better, notably in extreme low-resource settings. The source code of FISH-DIP will be available at [this URL](https://github.com/psunlpgroup/FISH-DIP)", "keywords": "Sequence Labeling;Low Resource Learning;Sparse Finetuning", "primary_area": "", "supplementary_material": "", "author": "Sarkar Snigdha Sarathi Das;Haoran Ranran Zhang;Peng Shi;Wenpeng Yin;Rui Zhang", "authorids": "~Sarkar_Snigdha_Sarathi_Das1;~Haoran_Ranran_Zhang1;~Peng_Shi2;~Wenpeng_Yin1;~Rui_Zhang7", "gender": "M;M;M;;M", "homepage": "https://sarathismg.github.io/;https://windchimeran.github.io;;http://wenpengyin.org/;https://ryanzhumich.github.io/", "dblp": "255/4887;;;117/7310-1;60/2536-37", "google_scholar": "V7lBToMAAAAJ;aDqdjcUAAAAJ;XTbDLrkAAAAJ;mRg16LkAAAAJ;nhuB5CEAAAAJ", "or_profile": "~Sarkar_Snigdha_Sarathi_Das1;~Haoran_Ranran_Zhang1;~Peng_Shi2;~Wenpeng_Yin1;~Rui_Zhang7", "aff": "Pennsylvania State University;Pennsylvania State University;Amazon AWS;Pennsylvania State University;Pennsylvania State University", "aff_domain": "psu.edu;psu.edu;amazon.com;psu.edu;psu.edu", "position": "PhD student;PhD student;Researcher;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\ndas2023unified,\ntitle={Unified Low-Resource Sequence Labeling by Sample-Aware Dynamic Sparse Finetuning},\nauthor={Sarkar Snigdha Sarathi Das and Haoran Ranran Zhang and Peng Shi and Wenpeng Yin and Rui Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=aE7feUD7o7}\n}", "github": "", "project": "", "reviewers": "XQLo;C1eS;YNan", "site": "https://openreview.net/forum?id=aE7feUD7o7", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "3;3;4", "reproducibility": "3;3;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;;", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Pennsylvania State University;Amazon", "aff_unique_dep": ";Amazon Web Services", "aff_unique_url": "https://www.psu.edu;https://aws.amazon.com", "aff_unique_abbr": "PSU;AWS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "aFIx8T43LU", "title": "Log-FGAER: Logic-Guided Fine-Grained Address Entity Recognition from Multi-Turn Spoken Dialogue", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Fine-grained address entity recognition (FGAER) from multi-turn spoken dialogues is particularly challenging. The major reason lies in that a full address is often formed through a conversation process. Different parts of an address are distributed through multiple turns of a dialogue with spoken noises. It is nontrivial to extract by turn and combine them. This challenge has not been well emphasized by main-stream entity extraction algorithms. To address this issue, we propose in this paper a logic-guided fine-grained address recognition method (Log-FGAER), where we formulate the address hierarchy relationship as the logic rule and softly apply it in a probabilistic manner to improve the accuracy of FGAER. In addition, we provide an ontology-based data augmentation methodology that employs ChatGPT to augment a spoken dialogue dataset with labeled address entities. Experiments are conducted using datasets generated by the proposed data augmentation technique and derived from real-world scenarios. The results of the experiment demonstrate the efficacy of our proposal.", "keywords": "Fine-grained address entity recognition;probabilistic soft logic;address extraction;data augmentation", "primary_area": "", "supplementary_material": "", "author": "Xue Han;Yitong Wang;Qian Hu;Pengwei Hu;Chao Deng;Junlan Feng", "authorids": "~Xue_Han3;~Yitong_Wang2;~Qian_Hu5;~Pengwei_Hu1;~Chao_Deng4;~Junlan_Feng3", "gender": "F;;;M;M;F", "homepage": ";https://github.com/Devil0817;https://github.com/sissi-lvu;;;", "dblp": ";;;190/4803;;36/3948", "google_scholar": "Rg4xqCgAAAAJ;;;;https://scholar.google.com/citations?hl=en;https://scholar.google.es/citations?user=rBjPtmQAAAAJ", "or_profile": "~Xue_Han3;~Yitong_Wang2;~Qian_Hu5;~Pengwei_Hu1;~Chao_Deng4;~Junlan_Feng3", "aff": "China Mobile Communications Company Limited Research Institute;Beijing University of Posts and Telecommunications;China Mobile Research Institute;University of Chinese Academy of Sciences;China Mobile Research Institute;China Mobile", "aff_domain": "chinamobile.com;bupt.edu.cn;chinamobile.com;ucas.ac.cn;jiutian.10086.cn;ioa.ac.cn", "position": "Researcher;MS student;Researcher;Full Professor;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nhan2023logfgaer,\ntitle={Log-{FGAER}: Logic-Guided Fine-Grained Address Entity Recognition from Multi-Turn Spoken Dialogue},\nauthor={Xue Han and Yitong Wang and Qian Hu and Pengwei Hu and Chao Deng and Junlan Feng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=aFIx8T43LU}\n}", "github": "", "project": "", "reviewers": "QkBv;ke6G;NiYR", "site": "https://openreview.net/forum?id=aFIx8T43LU", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "3;4;3", "reproducibility": "4;4;3", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0003-4449-5247;0000-0001-5292-2945", "linkedin": ";;;;https://www.linkedin.cn/incareer/in/ACoAAB5sppAB_Da2tlvgSyM7NFTWl6d1DhZZe1o;junlan-feng-8968ba11/", "aff_unique_index": "0;1;2;3;2;2", "aff_unique_norm": "China Mobile Communications Group Co., Ltd.;Beijing University of Posts and Telecommunications;China Mobile;University of Chinese Academy of Sciences", "aff_unique_dep": "Research Institute;;Research Institute;", "aff_unique_url": "http://www.chinamobileltd.com/;http://www.bupt.edu.cn/;https://www.chinamobile.com/;http://www.ucas.ac.cn", "aff_unique_abbr": "CMCC;BUPT;CMRI;UCAS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "aICXDsoH3O", "title": "Towards large language model-based personal agents in the enterprise: Current trends and open problems", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "There is an emerging trend to use large language models (LLMs) to reason about complex goals and orchestrate a set of pluggable tools or APIs to accomplish a goal. This functionality could, among other use cases, be used to build personal assistants for knowledge workers. While there are impressive demos of LLMs being used as autonomous agents or for tool composition, these solutions are not ready mission-critical enterprise settings. For example, they are brittle to input changes, and can produce inconsistent results for the same inputs. These use cases have many open problems in an exciting area of NLP research, such as trust and explainability, consistency and reproducibility, adherence to guardrails and policies, best practices for composable tool design, and the need for new metrics and benchmarks. This vision paper illustrates some examples of LLM-based autonomous agents that reason and compose tools, highlights cases where they fail, surveys some of the recent efforts in this space, and lays out the research challenges to make these solutions viable for enterprises.", "keywords": "Large language models;task-oriented;chatbots;multi-modal", "primary_area": "", "supplementary_material": "", "author": "Vinod Muthusamy;Yara Rizk;Kiran Kate;Praveen Venkateswaran;Vatche Isahagian;Ashu Gulati;Parijat Dube", "authorids": "~Vinod_Muthusamy1;~Yara_Rizk1;~Kiran_Kate1;~Praveen_Venkateswaran1;~Vatche_Isahagian1;~Ashu_Gulati1;~Parijat_Dube1", "gender": ";;F;M;;F;", "homepage": "https://researcher.watson.ibm.com/researcher/view.php?person=us-vmuthus;;;;;;https://researcher.watson.ibm.com/researcher/view.php?person=us-pdube", "dblp": "31/4489.html;;12/8321;177/7837;28/10038.html;;77/277", "google_scholar": "kNpK4kIAAAAJ;llV-0hwAAAAJ;;jJI7sRgAAAAJ;VYN4CfEAAAAJ;;bOejjQUAAAAJ", "or_profile": "~Vinod_Muthusamy1;~Yara_Rizk1;~Kiran_Kate1;~Praveen_Venkateswaran1;~Vatche_Isahagian1;~Ashu_Gulati1;~Parijat_Dube1", "aff": "International Business Machines;International Business Machines;International Business Machines;International Business Machines;International Business Machines;International Business Machines;International Business Machines", "aff_domain": "ibm.com;ibm.com;ibm.com;ibm.com;ibm.com;ibm.com;ibm.com", "position": "Principal Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nmuthusamy2023towards,\ntitle={Towards large language model-based personal agents in the enterprise: Current trends and open problems},\nauthor={Vinod Muthusamy and Yara Rizk and Kiran Kate and Praveen Venkateswaran and Vatche Isahagian and Ashu Gulati and Parijat Dube},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=aICXDsoH3O}\n}", "github": "", "project": "", "reviewers": "dPLF;Jf2r;AqLF", "site": "https://openreview.net/forum?id=aICXDsoH3O", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "4;4;4", "reproducibility": "4;3;3", "correctness": "4;4;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0009-0003-9688-9245;;;;", "linkedin": "vinod-muthusamy-8918aa4;https://lb.linkedin.com/in/yararizk;;;;ashu-gulati/;", "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "International Business Machines Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.ibm.com", "aff_unique_abbr": "IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "aIp5EZeO3f", "title": "ZGUL: Zero-shot Generalization to Unseen Languages using Multi-source Ensembling of Language Adapters", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We tackle the problem of zero-shot cross-lingual transfer in NLP tasks via the use of language adapters (LAs). Most of the earlier works have explored training with adapter of a single source (often English), and testing either using the target LA or LA of another related language. Training target LA requires unlabeled data, which may not be readily available for low resource *unseen* languages: those that are neither seen by the underlying multilingual language model (e.g., mBERT), nor do we have any (labeled or unlabeled) data for them. \n\nWe posit that for more effective cross-lingual transfer, instead of just one source LA, we need to leverage LAs of multiple (linguistically or geographically related) source languages, both at train and test-time - which we investigate via our novel neural architecture, ZGUL.\nExtensive experimentation across four language groups, covering 15 unseen target languages, demonstrates improvements of up to 3.2 average F1 points over standard fine-tuning and other strong baselines on POS tagging and NER tasks. We also extend ZGUL to settings where either (1) some unlabeled data or (2) few-shot training examples are available for the target language. We find that ZGUL continues to outperform baselines in these settings too.", "keywords": "Multilinguality;Low Resource Languages;Parameter-Efficient Fine-Tuning (PEFT)", "primary_area": "", "supplementary_material": "", "author": "Vipul Kumar Rathore;Rajdeep Dhingra;Parag Singla;Mausam .", "authorids": "~Vipul_Kumar_Rathore1;~Rajdeep_Dhingra1;~Parag_Singla1;~Mausam_.1", "gender": "M;M;M;M", "homepage": "http://www.cse.iitd.ac.in/~vipulk;;http://www.cse.iitd.ac.in/~parags;http://www.cse.iitd.ac.in/~mausam", "dblp": "265/5917.html;;14/167;30/6391.html", "google_scholar": "GEy8HT4AAAAJ;;https://scholar.google.co.in/citations?user=V49BsgMAAAAJ;https://scholar.google.co.in/citations?hl=en", "or_profile": "~Vipul_Kumar_Rathore1;~Rajdeep_Dhingra1;~Parag_Singla1;~Mausam_Mausam2", "aff": "Indian Institute of Technology Delhi;Indian Institute of Technology, Delhi;Indian Institute of Technology, Delhi;Indian Institute of Technology Delhi", "aff_domain": "iitd.ac.in;iitd.ac.in;iitd.ac.in;iitd.ac.in", "position": "PhD student;Undergrad student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nrathore2023zgul,\ntitle={{ZGUL}: Zero-shot Generalization to Unseen Languages using Multi-source Ensembling of Language Adapters},\nauthor={Vipul Kumar Rathore and Rajdeep Dhingra and Parag Singla and Mausam .},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=aIp5EZeO3f}\n}", "github": "", "project": "", "reviewers": "m43w;RmJP;itj7", "site": "https://openreview.net/forum?id=aIp5EZeO3f", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;4;3", "reproducibility": "5;5;5", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 5.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-4088-4296", "linkedin": "vipul-rathore-7b36b488/;rajdeep-dhingra/;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Indian Institute of Technology Delhi", "aff_unique_dep": "", "aff_unique_url": "https://www.iitd.ac.in", "aff_unique_abbr": "IIT Delhi", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Delhi", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "India" }, { "id": "aJILUuANbs", "title": "Multi-Stage Pre-training Enhanced by ChatGPT for Multi-Scenario Multi-Domain Dialogue Summarization", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Dialogue summarization involves a wide range of scenarios and domains. However, existing methods generally only apply to specific scenarios or domains. In this study, we propose a new pre-trained model specifically designed for multi-scenario multi-domain dialogue summarization. It adopts a multi-stage pre-training strategy to reduce the gap between the pre-training objective and fine-tuning objective. Specifically, we first conduct domain-aware pre-training using large-scale multi-scenario multi-domain dialogue data to enhance the adaptability of our pre-trained model. Then, we conduct task-oriented pre-training using large-scale multi-scenario multi-domain \"dialogue-summary\" parallel data annotated by ChatGPT to enhance the dialogue summarization ability of our pre-trained model. Experimental results on three dialogue summarization datasets from different scenarios and domains indicate that our pre-trained model significantly outperforms previous state-of-the-art models in full fine-tuning, zero-shot, and few-shot settings.", "keywords": "Multi-Scenario Multi-Domain Dialogue Summarization;Multi-Stage Pre-training;ChatGPT", "primary_area": "", "supplementary_material": "", "author": "Weixiao Zhou;Gengyao Li;Xianfu Cheng;Xinnian Liang;Junnan Zhu;Feifei Zhai;Zhoujun Li", "authorids": "~Weixiao_Zhou1;~Gengyao_Li2;~Xianfu_Cheng1;~Xinnian_Liang1;~Junnan_Zhu1;~Feifei_Zhai1;~Zhoujun_Li1", "gender": "M;M;M;M;M;M;M", "homepage": "https://github.com/zhouweixiao;https://github.com/xxbbb1996;https://blog.csdn.net/qq_20200047?type=blog;;;;", "dblp": "359/0717;;05/10105.html;275/9970;205/8977;99/10489;76/2866-1", "google_scholar": "ET1_G9cAAAAJ;;https://scholar.google.com/citations?hl=en;q0kbdFMAAAAJ;Seamo_wAAAAJ;;", "or_profile": "~Weixiao_Zhou1;~Gengyao_Li2;~Xianfu_Cheng1;~Xinnian_Liang1;~Junnan_Zhu1;~Feifei_Zhai1;~Zhoujun_Li1", "aff": "Beihang University;University of Chinese Academy of Sciences;Beihang University;Beihang University;Institute of Automation, Chinese Academy of Sciences;Fanyu AI Research, Beijing Fanyu Technology Co., LTD. ;Beihang University", "aff_domain": "buaa.edu.cn;ucas.ac.cn;buaa.edu.cn;buaa.edu.cn;ia.ac.cn;zkfy.com;buaa.edu.cn", "position": "PhD student;MS student;PhD student;PhD student;Assistant Professor;Researcher;Full Professor", "bibtex": "@inproceedings{\nzhou2023multistage,\ntitle={Multi-Stage Pre-training Enhanced by Chat{GPT} for Multi-Scenario Multi-Domain Dialogue Summarization},\nauthor={Weixiao Zhou and Gengyao Li and Xianfu Cheng and Xinnian Liang and Junnan Zhu and Feifei Zhai and Zhoujun Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=aJILUuANbs}\n}", "github": "", "project": "", "reviewers": "BDoc;SZZj;k7jr", "site": "https://openreview.net/forum?id=aJILUuANbs", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;2", "excitement": "3;2;3", "reproducibility": "4;3;3", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0006-8929-0834;;0000-0003-1130-8302;0000-0002-4744-6179;0000-0002-9856-2946;;", "linkedin": ";;;;;;", "aff_unique_index": "0;1;0;0;2;3;0", "aff_unique_norm": "Beihang University;University of Chinese Academy of Sciences;Chinese Academy of Sciences;Beijing Fanyu Technology Co., LTD.", "aff_unique_dep": ";;Institute of Automation;Fanyu AI Research", "aff_unique_url": "http://www.buaa.edu.cn/;http://www.ucas.ac.cn;http://www.ia.cas.cn;", "aff_unique_abbr": "BUAA;UCAS;CAS;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "aLkknJNdl6", "title": "Towards Low-Resource Automatic Program Repair with Meta-Learning and Pretrained Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Automatic program repair (APR) has gained increasing attention as an essential technique in software development to reduce manual debugging efforts and boost developers' productivity. Recent advances in deep learning (DL) based models have demonstrated promising results by learning from large-scale bug-fix examples in a data-driven manner. However, in practical scenarios, software bugs have an imbalanced distribution, and the fixing knowledge learned by APR models often only capture the patterns of frequent error types, making it inapplicable to handle the rare error types. To address this limitation, we investigate a novel task of low-resource APR, and propose Meta-APR, a new meta-learning framework integrated with code pretrained language models to generate fixes for low-resource bugs with limited training samples. Our Meta-APR learns better error-specific knowledge from high-resource bugs through efficient first-order meta-learning optimization, which allows for a faster adaptation to the target low-resource bugs. Besides, while we adopt CodeT5, a pretrained code-aware encoder-decoder Transformer, as the backbone model for Meta-APR, it is a model-agnostic framework that can be integrated with any neural models. Extensive experimental results on three benchmarks in various programming languages verify the superiority of our method over existing DL-based APR approaches.", "keywords": "Low-resource APR", "primary_area": "", "supplementary_material": "", "author": "Weishi Wang;Yue Wang;Steven Hoi;Shafiq Joty", "authorids": "~Weishi_Wang2;~Yue_Wang19;~Steven_Hoi2;~Shafiq_Joty1", "gender": "M;M;M;M", "homepage": ";https://yuewang-cuhk.github.io/;http://stevenhoi.com;https://raihanjoty.github.io/", "dblp": ";60/9374-34;;62/2078", "google_scholar": "P8TGNcoAAAAJ;iyxbtcEAAAAJ;JoLjflYAAAAJ;hR249csAAAAJ", "or_profile": "~Weishi_Wang2;~Yue_Wang19;~Steven_Hoi2;~Shafiq_Joty1", "aff": "Nanyang Technological University;SalesForce.com;Singapore Management University;SalesForce.com", "aff_domain": "ntu.edu.sg;salesforce.com;smu.edu.sg;salesforce.com", "position": "PhD student;Researcher;Associate Professor;Principal Researcher", "bibtex": "@inproceedings{\nwang2023towards,\ntitle={Towards Low-Resource Automatic Program Repair with Meta-Learning and Pretrained Language Models},\nauthor={Weishi Wang and Yue Wang and Steven Hoi and Shafiq Joty},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=aLkknJNdl6}\n}", "github": "", "project": "", "reviewers": "A13G;yvFS;rer7", "site": "https://openreview.net/forum?id=aLkknJNdl6", "pdf_size": 0, "rating": "", "confidence": "4;4;3", "excitement": "4;3;4", "reproducibility": "5;4;3", "correctness": "4;3;4", "rating_avg": 0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0, "corr_rating_correctness": 0, "orcid": ";;;", "linkedin": ";yue-wang-37458795/;;", "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Nanyang Technological University;Salesforce;Singapore Management University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.salesforce.com;https://www.smu.edu.sg", "aff_unique_abbr": "NTU;Salesforce;SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "Singapore;United States" }, { "id": "aN8zkE15Nx", "title": "An Investigation of LLMs\u2019 Inefficacy in Understanding Converse Relations", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large Language Models (LLMs) have achieved remarkable success in many formal language oriented tasks, such as structural data-to-text and semantic parsing.\nHowever current benchmarks mostly follow the data distribution of the pre-training data of LLMs.\nTherefore, a natural question rises that do LLMs really understand the structured semantics of formal languages.\nIn this paper, we investigate this problem on a special case, converse binary relation.\nWe introduce a new benchmark ConvRe focusing on converse relations, which contains 17 relations and 1240 triples extracted from popular knowledge graph completion datasets.\nOur ConvRE features two tasks, Re2Text and Text2Re, which are formulated as multi-choice question answering to evaluate LLMs' ability to determine the matching between relations and associated text.\nFor the evaluation protocol, apart from different prompting methods, we further introduce variants to the test text and few-shot example text. \nWe conduct experiments on three popular LLM families and have observed various scaling trends. \nThe results suggest that LLMs\noften resort to shortcut learning and still face challenges on our proposed benchmark.", "keywords": "Large language models;inverse scaling;converse relation", "primary_area": "", "supplementary_material": "", "author": "Chengwen Qi;Bowen Li;Binyuan Hui;Bailin Wang;Jinyang Li;Jinwang Wu;Yuanjun Laili", "authorids": "~Chengwen_Qi1;~Bowen_Li8;~Binyuan_Hui1;~Bailin_Wang3;~Jinyang_Li4;~Jinwang_Wu1;~Yuanjun_Laili1", "gender": "M;;F;M;M;F;M", "homepage": "https://github.com/Trayvon001;;https://huybery.github.io/;http://jinyang-li.me/;https://github.com/wjwpoi;https://shi.buaa.edu.cn/lailiyuanjun;https://berlino.github.io/", "dblp": ";75/10470-2;246/4699;79/572-3;;;218/7334", "google_scholar": ";RLWXNf8AAAAJ;RBb3ItMAAAAJ;https://scholar.google.com/citations?hl=en;;;", "or_profile": "~Chengwen_Qi1;~Bowen_Li8;~Binyuan_Hui1;~Jinyang_Li4;~Jinwang_Wu1;~Yuanjun_Laili1;~bailin_wang1", "aff": "Beihang University;International Innovation Center of Tsinghua University, Shanghai;Alibaba Group;The University of Hong Kong;Beihang University;Beihang University;Massachusetts Institute of Technology", "aff_domain": "buaa.edu.cn;tsinghua.edu.cn;alibaba-inc.com;hku.hk;buaa.edu.cn;buaa.edu.cn;mit.edu", "position": "MS student;Researcher;Researcher;PhD student;MS student;Associate Professor;Postdoc", "bibtex": "@inproceedings{\nqi2023an,\ntitle={An Investigation of {LLM}s{\\textquoteright} Inefficacy in Understanding Converse Relations},\nauthor={Chengwen Qi and Bowen Li and Binyuan Hui and Bailin Wang and Jinyang Li and Jinwang Wu and Yuanjun Laili},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=aN8zkE15Nx}\n}", "github": "", "project": "", "reviewers": "2Eqb;RTYu;moP5;qoc2", "site": "https://openreview.net/forum?id=aN8zkE15Nx", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;4;4;3", "excitement": "4;3;4;3", "reproducibility": "2;3;5;3", "correctness": "3;4;5;3", "rating_avg": 4.0, "confidence_avg": 3.5, "excitement_avg": 3.5, "reproducibility_avg": 3.25, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0003-3834-4602;", "linkedin": ";;;;;;", "aff_unique_index": "0;1;2;3;0;0;4", "aff_unique_norm": "Beihang University;Tsinghua University;Alibaba Group;University of Hong Kong;Massachusetts Institute of Technology", "aff_unique_dep": ";International Innovation Center;;;", "aff_unique_url": "http://www.buaa.edu.cn/;https://www.tsinghua.edu.cn;https://www.alibaba.com;https://www.hku.hk;https://web.mit.edu", "aff_unique_abbr": "BUAA;THU;Alibaba;HKU;MIT", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Shanghai;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;1", "aff_country_unique": "China;United States" }, { "id": "aNFWz8zubu", "title": "EtiCor: Corpus for Analyzing LLMs for Etiquettes", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Etiquettes are an essential ingredient of day-to-day interactions among people. Moreover, etiquettes are region-specific, and etiquettes in one region might contradict those in other regions. In this paper, we propose EtiCor, an Etiquettes Corpus, having texts about social norms from five different regions across the globe. The corpus provides a test bed for evaluating LLMs for knowledge and understanding of region-specific etiquettes. Additionally, we propose the task of Etiquette Sensitivity. We experiment with state-of-the-art LLMs (Delphi, Falcon40B, and GPT-3.5). Initial results indicate that LLMs, mostly fail to understand etiquettes from regions from non-Western world.", "keywords": "Social Norms;Etiquette;LLMs", "primary_area": "", "supplementary_material": "", "author": "Ashutosh Dwivedi;Pradhyumna Lavania;Ashutosh Modi", "authorids": "~Ashutosh_Dwivedi1;~Pradhyumna_Lavania1;~Ashutosh_Modi1", "gender": "M;M;M", "homepage": ";;https://ashutosh-modi.github.io/", "dblp": ";;139/0873", "google_scholar": ";;AWu6f60AAAAJ", "or_profile": "~Ashutosh_Dwivedi1;~Pradhyumna_Lavania1;~Ashutosh_Modi1", "aff": "University of Calgary;Indian Institute of Technology, Kanpur;IIT Kanpur", "aff_domain": "ucalgary.ca;iitk.ac.in;iitk.ac.in", "position": "Researcher;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\ndwivedi2023eticor,\ntitle={EtiCor: Corpus for Analyzing {LLM}s for Etiquettes},\nauthor={Ashutosh Dwivedi and Pradhyumna Lavania and Ashutosh Modi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=aNFWz8zubu}\n}", "github": "", "project": "", "reviewers": "FAuQ;j5Th;vwjx", "site": "https://openreview.net/forum?id=aNFWz8zubu", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;4", "excitement": "4;4;3", "reproducibility": "3;3;3", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "ashutosh-dwivedi-434192221/;pradhyumna-lavania-5777b61ba/;dr-ashutosh-modi-3907835/", "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Calgary;Indian Institute of Technology Kanpur", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucalgary.ca;https://www.iitk.ac.in", "aff_unique_abbr": "U of C;IIT Kanpur", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Kanpur", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Canada;India" }, { "id": "aP5f7cgY1M", "title": "Rather a Nurse than a Physician - Contrastive Explanations under Investigation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Contrastive explanations, where one decision is explained *in contrast to another*, are supposed to be closer to how humans explain a decision than non-contrastive explanations, where the decision is not necessarily referenced to an alternative. This claim has never been empirically validated. We analyze four English text-classification datasets (SST2, DynaSent, BIOS and DBpedia-Animals). We fine-tune and extract explanations from three different models (RoBERTa, GTP-2, and T5), each in three different sizes and apply three post-hoc explainability methods (LRP, GradientxInput, GradNorm). We furthermore collect and release human rationale annotations for a subset of 100 samples from the BIOS dataset for contrastive and non-contrastive settings. A cross-comparison between model-based rationales and human annotations, both in contrastive and non-contrastive settings, yields a high agreement between the two settings for models as well as for humans. Moreover, model-based explanations computed in both settings align equally well with human rationales. Thus, we empirically find that humans do not necessarily explain in a contrastive manner.", "keywords": "explainability;contrastive explanations;human annotations", "primary_area": "", "supplementary_material": "", "author": "Oliver Eberle;Ilias Chalkidis;Laura Cabello;Stephanie Brandl", "authorids": "~Oliver_Eberle1;~Ilias_Chalkidis1;~Laura_Cabello1;~Stephanie_Brandl1", "gender": ";M;;F", "homepage": "https://www.tu.berlin/;https://iliaschalkidis.github.io;;https://stephaniebrandl.github.io", "dblp": "260/6891;199/8161;;194/9380", "google_scholar": "vZB4qw0AAAAJ;BrtAqz8AAAAJ;;eCDiVTMAAAAJ", "or_profile": "~Oliver_Eberle1;~Ilias_Chalkidis1;~Laura_Cabello1;~Stephanie_Brandl1", "aff": "Technische Universit\u00e4t Berlin;Copenhagen University;;K\u00f8benhavns Universitet", "aff_domain": "tu-berlin.de;ku.dk;;di.ku.dk", "position": "Postdoc;Postdoc;;Postdoc", "bibtex": "@inproceedings{\neberle2023rather,\ntitle={Rather a Nurse than a Physician - Contrastive Explanations under Investigation},\nauthor={Oliver Eberle and Ilias Chalkidis and Laura Cabello and Stephanie Brandl},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=aP5f7cgY1M}\n}", "github": "", "project": "", "reviewers": "i43o;vveg;beUE", "site": "https://openreview.net/forum?id=aP5f7cgY1M", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;4", "excitement": "4;4;4", "reproducibility": "3;3;4", "correctness": "3;3;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-6967-9950;0000-0002-0706-7772;;", "linkedin": ";;;", "aff_unique_index": "0;1;1", "aff_unique_norm": "Technische Universit\u00e4t Berlin;University of Copenhagen", "aff_unique_dep": ";", "aff_unique_url": "https://www.tu-berlin.de;https://www.ku.dk", "aff_unique_abbr": "TU Berlin;UCPH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Germany;Denmark" }, { "id": "aPZ7AjA5YV", "title": "Revisiting Large Language Models as Zero-shot Relation Extractors", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Relation extraction (RE) consistently involves a certain degree of labeled or unlabeled data even if under zero-shot setting. Recent studies have shown that large language models (LLMs) transfer well to new tasks out-of-the-box simply given a natural language prompt, which provides the possibility of extracting relations from text without any data and parameter tuning. This work focuses on the study of exploring LLMs, such as ChatGPT, as zero-shot relation extractors. On the one hand, we analyze the drawbacks of existing RE prompts and attempt to incorporate recent prompt techniques such as chain-of-thought (CoT) to improve zero-shot RE. We propose the summarize-and-ask (\\textsc{SumAsk}) prompting, a simple prompt recursively using LLMs to transform RE inputs to the effective question answering (QA) format. On the other hand, we conduct comprehensive experiments on various benchmarks and settings to investigate the capabilities of LLMs on zero-shot RE. Specifically, we have the following findings: (i) \\textsc{SumAsk} consistently and significantly improves LLMs performance on different model sizes, benchmarks and settings; (ii) Zero-shot prompting with ChatGPT achieves competitive or superior results compared with zero-shot and fully supervised methods; (iii) LLMs deliver promising performance in extracting overlapping relations; (iv) The performance varies greatly regarding different relations. Different from small language models, LLMs are effective in handling challenge none-of-the-above (NoTA) relation.", "keywords": "Relation Extraction;Large Language Models;Zero-shot Learning", "primary_area": "", "supplementary_material": "", "author": "Guozheng Li;Peng Wang;Wenjun Ke", "authorids": "~Guozheng_Li3;~Peng_Wang11;~Wenjun_Ke1", "gender": "M;;M", "homepage": ";;https://cs.seu.edu.cn/2023/1024/c23024a469536/page.htm", "dblp": ";;", "google_scholar": "https://scholar.google.com/citations?hl=en;;", "or_profile": "~Guozheng_Li3;~Peng_Wang11;~Wenjun_Ke1", "aff": "Southeast University;;Beijing Institute of Computer Technology and Application", "aff_domain": "seu.edu.cn;;ict.ac.cn", "position": "PhD student;;Researcher", "bibtex": "@inproceedings{\nli2023revisiting,\ntitle={Revisiting Large Language Models as Zero-shot Relation Extractors},\nauthor={Guozheng Li and Peng Wang and Wenjun Ke},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=aPZ7AjA5YV}\n}", "github": "", "project": "", "reviewers": "83on;PmA9;ry3f", "site": "https://openreview.net/forum?id=aPZ7AjA5YV", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "4;3;3", "reproducibility": "3;3;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-8068-3491;;0000-0001-7352-1710", "linkedin": ";;", "aff_unique_index": "0;1", "aff_unique_norm": "Southeast University;Beijing Institute of Computer Technology and Application", "aff_unique_dep": ";", "aff_unique_url": "https://www.seu.edu.cn/;", "aff_unique_abbr": "SEU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "aRlH9AkiEA", "title": "KEPLET: Knowledge-Enhanced Pretrained Language Model with Topic Entity Awareness", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In recent years, Pre-trained Language Models (PLMs) have shown their superiority by pre-training on unstructured text corpus and then fine-tuning on downstream tasks. On entity-rich textual resources like Wikipedia, Knowledge-Enhanced PLMs (KEPLMs) incorporate the interactions between tokens and mentioned entities in pre-training, and are thus more effective on entity-centric tasks such as entity linking and relation classification. Although exploiting Wikipedia's rich structures to some extent, conventional KEPLMs still neglect a unique layout of the corpus where each Wikipedia page is around a topic entity (identified by the page URL and shown in the page title). In this paper, we demonstrate that KEPLMs without incorporating the topic entities will lead to insufficient entity interaction and biased (relation) word semantics. We thus propose KEPLET, a novel {K}nowledge-{\\'E}nhanced {P}re-trained {L}anguag{E} model with {T}opic entity awareness. In an end-to-end manner, KEPLET identifies where to add the topic entity's information in a Wikipedia sentence, fuses such information into token and mentioned entities representations, and supervises the network learning, through which it takes topic entities back into consideration. Experiments demonstrated the generality and superiority of KEPLET which was applied to two representative KEPLMs, achieving significant improvements on four entity-centric tasks.", "keywords": "language model;knowledge enhanced language model", "primary_area": "", "supplementary_material": "", "author": "Yichuan Li;Jialong Han;Kyumin Lee;Chengyuan Ma;Benjamin Z. Yao;Xiaohu Liu", "authorids": "~Yichuan_Li3;~Jialong_Han1;~Kyumin_Lee1;~Chengyuan_Ma1;~Benjamin_Z._Yao1;~Xiaohu_Liu1", "gender": ";;M;M;M;", "homepage": ";https://www.jialonghan.com/;https://web.cs.wpi.edu/~kmlee/;https://scholar.google.com/citations?user=gyoOBiIAAAAJ;;", "dblp": "216/7478-1.html;92/7536;https://dblp.uni-trier.de/pid/22/8024.html;62/6934.html;134/7162;18/2453", "google_scholar": "lLvYmOwAAAAJ;L5Z2lmkAAAAJ;zQKRsSEAAAAJ;gyoOBiIAAAAJ;;", "or_profile": "~Yichuan_Li3;~Jialong_Han1;~Kyumin_Lee1;~Chengyuan_Ma1;~Benjamin_Z._Yao1;~Xiaohu_Liu1", "aff": "Worcester Polytechnic Institute;Amazon;Worcester Polytechnic Institute;;Amazon;", "aff_domain": "wpi.edu;amazon.com;wpi.edu;;amazon.com;", "position": "PhD student;Senior Applied Scientist;Associate Professor;;Researcher;", "bibtex": "@inproceedings{\nli2023keplet,\ntitle={{KEPLET}: Knowledge-Enhanced Pretrained Language Model with Topic Entity Awareness},\nauthor={Yichuan Li and Jialong Han and Kyumin Lee and Chengyuan Ma and Benjamin Z. Yao and Xiaohu Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=aRlH9AkiEA}\n}", "github": "", "project": "", "reviewers": "pHNa;pRqK;5vf9", "site": "https://openreview.net/forum?id=aRlH9AkiEA", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;4", "excitement": "3;3;4", "reproducibility": "3;2;5", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-5285-9210;;0000-0001-5126-5883;;", "linkedin": ";jialonghan/;;;;", "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Worcester Polytechnic Institute;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.wpi.edu;https://www.amazon.com", "aff_unique_abbr": "WPI;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "aURCCzSuhc", "title": "Taxonomy Expansion for Named Entity Recognition", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Training a Named Entity Recognition (NER) model often involves fixing a taxonomy of entity types. However, requirements evolve and we might need the NER model to recognize additional entity types. A simple approach is to re-annotate entire dataset with both existing and additional entity types and then train the model on the re-annotated dataset. However, this is an extremely laborious task. To remedy this, we propose a novel approach called Partial Label Model (PLM) that uses only partially annotated datasets. We experiment with 6 diverse datasets and show that PLM consistently performs better than most other approaches (0.5 - 2.5 F1), including in novel settings for taxonomy expansion not considered in prior work. The gap between PLM and all other approaches is especially large in settings where there is limited data available for the additional entity types (as much as 11 F1), thus suggesting a more cost effective approaches to taxonomy expansion.", "keywords": "named entity recognition;taxonomy", "primary_area": "", "supplementary_material": "", "author": "Karthikeyan K;Yogarshi Vyas;Jie Ma;Giovanni Paolini;Neha Anna John;Shuai Wang;Yassine Benajiba;Vittorio Castelli;Dan Roth;Miguel Ballesteros", "authorids": "~Karthikeyan_K1;~Yogarshi_Vyas1;~Jie_Ma3;~Giovanni_Paolini1;~Neha_Anna_John1;~Shuai_Wang2;~Yassine_Benajiba1;~Vittorio_Castelli1;~Dan_Roth3;~Miguel_Ballesteros1", "gender": "M;M;M;M;F;M;Not Specified;M;M;M", "homepage": ";http://www.cs.umd.edu/~yogarshi/;;http://giovannipaolini.org;;https://shuaiwang.net/;;;https://www.cis.upenn.edu/~danroth/;https://miguelballesteros.github.io/", "dblp": "255/5210;147/9150;62/5110-5.html;150/6260;331/2445.html;42/1503-20;17/6428;c/VittorioCastelli;r/DanRoth;38/8065", "google_scholar": "KACcWC4AAAAJ;k6k7i1IAAAAJ;0FSlSt4AAAAJ;https://scholar.google.it/citations?user=xGI18C0AAAAJ;7_JJaE0AAAAJ;bXAin_gAAAAJ;;d-lg1lEAAAAJ;E-bpPWgAAAAJ;lhDwr-AAAAAJ", "or_profile": "~Karthikeyan_K1;~Yogarshi_Vyas1;~Jie_Ma3;~Giovanni_Paolini1;~Neha_Anna_John1;~Shuai_Wang2;~Yassine_Benajiba1;~Vittorio_Castelli1;~Dan_Roth3;~Miguel_Ballesteros1", "aff": "Amazon;Amazon;Amazon;Amazon;Amazon;Amazon;Amazon;Amazon;Amazon;Amazon", "aff_domain": "amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com;amazon.com", "position": "Intern;Applied Scientist;Researcher;Applied Scientist;Researcher;Applied Scientist;Principal Researcher;Senior Science Manager;VP and Distinguished Scientist;Principal Applied Scientist", "bibtex": "@inproceedings{\nk2023taxonomy,\ntitle={Taxonomy Expansion for Named Entity Recognition},\nauthor={Karthikeyan K and Yogarshi Vyas and Jie Ma and Giovanni Paolini and Neha Anna John and Shuai Wang and Yassine Benajiba and Vittorio Castelli and Dan Roth and Miguel Ballesteros},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=aURCCzSuhc}\n}", "github": "", "project": "", "reviewers": "451n;CUSJ;nj9h;xZ9p", "site": "https://openreview.net/forum?id=aURCCzSuhc", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;4;4;4", "excitement": "4;4;3;3", "reproducibility": "4;5;4;3", "correctness": "4;4;2;4", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 3.5, "reproducibility_avg": 4.0, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 10, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-3964-9101;;;;;;", "linkedin": "karthikeyan-k-55266b124/;;jie-ma-6ab59497/;g-paolini/;nehaannajohn/;;;vittorio-castelli-3449604/;dan-roth-8667361/;", "aff_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Amazon", "aff_unique_dep": "Amazon.com, Inc.", "aff_unique_url": "https://www.amazon.com", "aff_unique_abbr": "Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "aUn1BAzo7q", "title": "Weakly Supervised Semantic Parsing with Execution-based Spurious Program Filtering", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The problem of spurious programs is a longstanding challenge when training a semantic parser from weak supervision. To eliminate such programs that have wrong semantics but correct denotation, existing methods focus on exploiting similarities between examples based on domain-specific knowledge. In this paper, we propose a domain-agnostic filtering mechanism based on program execution results. Specifically, for each program obtained through the search process, we first construct a representation that captures the program's semantics as execution results under various inputs. Then, we run a majority vote on these representations to identify and filter out programs with significantly different semantics from the other programs. In particular, our method is orthogonal to the program search process so that it can easily augment any of the existing weakly supervised semantic parsing frameworks. Empirical evaluations on the Natural Language Visual Reasoning and WikiTableQuestions demonstrate that applying our method to the existing semantic parsers induces significantly improved performances.", "keywords": "weakly supervised semantic parsing;spurious programs", "primary_area": "", "supplementary_material": "", "author": "Kang-il Lee;Segwang Kim;Kyomin Jung", "authorids": "~Kang-il_Lee1;~Segwang_Kim1;~Kyomin_Jung1", "gender": "M;M;M", "homepage": ";https://segwangkim.github.io/;http://milab.snu.ac.kr/kjung/index.html", "dblp": "304/2031-1;220/3735;48/3867", "google_scholar": "https://scholar.google.co.kr/citations?user=-YroyxsAAAAJ;;https://scholar.google.co.kr/citations?user=u3uMl4MAAAAJ", "or_profile": "~Kang-il_Lee1;~Segwang_Kim1;~Kyomin_Jung1", "aff": "Seoul National University;;Seoul National University", "aff_domain": "snu.ac.kr;;snu.ac.kr", "position": "PhD student;;Full Professor", "bibtex": "@inproceedings{\nlee2023weakly,\ntitle={Weakly Supervised Semantic Parsing with Execution-based Spurious Program Filtering},\nauthor={Kang-il Lee and Segwang Kim and Kyomin Jung},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=aUn1BAzo7q}\n}", "github": "", "project": "", "reviewers": "ddAc;jkaF;E67k", "site": "https://openreview.net/forum?id=aUn1BAzo7q", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;2;4", "excitement": "3;3;4", "reproducibility": "4;4;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";segwang-kim-9620a2149/;", "aff_unique_index": "0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "aVejMt2gYN", "title": "Inference-Time Policy Adapters (IPA): Tailoring Extreme-Scale LMs without Fine-tuning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "While extreme-scale language models have demonstrated exceptional performance on a variety of language tasks, the degree of control over these language models through pure prompting can often be limited. Directly fine-tuning such language models can be effective for tailoring them, but it can be either extremely costly (e.g., GPT-3) or not even feasible for the broader community (e.g., GPT-4).\n\nWe propose Inference-time Policy Adapters (IPA), which efficiently tailors a language model such as GPT-3 without fine-tuning it. IPA guides a large base model during decoding time through a lightweight policy adapter trained to optimize an arbitrary user objective with reinforcement learning.\n\nOn five challenging text generation tasks, such as toxicity reduction and lexically constrained generation, IPA consistently brings significant\nimprovements over off-the-shelf language models. It outperforms competitive baseline methods, sometimes even including expensive fine-tuning. In particular, tailoring GPT-2 with IPA can outperform GPT-3, while tailoring GPT-3 with IPA brings a major performance boost over GPT-3 (and sometimes even over GPT-4). Our promising results highlight the potential of IPA as a lightweight alternative to tailoring extreme-scale language models.", "keywords": "Language Model;Text Generation;Reinforcement Learning;Inference-time Algorithm", "primary_area": "", "supplementary_material": "", "author": "Ximing Lu;Faeze Brahman;Peter West;Jaehun Jung;Khyathi Chandu;Abhilasha Ravichander;Prithviraj Ammanabrolu;Liwei Jiang;Sahana Ramnath;Nouha Dziri;Jillian Fisher;Bill Yuchen Lin;Skyler Hallinan;Lianhui Qin;Xiang Ren;Sean Welleck;Yejin Choi", "authorids": "~Ximing_Lu1;~Faeze_Brahman1;~Peter_West1;~Jaehun_Jung1;~Khyathi_Chandu1;~Abhilasha_Ravichander2;~Prithviraj_Ammanabrolu1;~Liwei_Jiang2;~Sahana_Ramnath2;~Nouha_Dziri2;~Jillian_Fisher1;~Bill_Yuchen_Lin1;~Skyler_Hallinan1;~Lianhui_Qin1;~Xiang_Ren1;~Sean_Welleck1;~Yejin_Choi1", "gender": "F;F;M;M;;;M;F;F;;F;M;M;F;M;;F", "homepage": "https://gloriaximinglu.github.io/;https://fabrahman.github.io;https://peterwestai.notion.site/;https://jaehunjung.com;;https://www.cs.cmu.edu/~aravicha/;http://prithvirajva.com;https://liweijiang.me;;;http://jfisher52.github.io;http://yuchenlin.xyz/;https://skylerhallinan.com/;https://lianhui.ucsd.edu/;https://shanzhenren.github.io/;;https://yejinc.github.io/", "dblp": "24/10879;276/6005;179/4587;192/7707;;170/4795.html;202/2351;;252/5822;;336/3238;190/4518;256/6863;184/3753;36/360-1;;89/579-1", "google_scholar": "https://scholar.google.com/citations?hl=en;viCG2ikAAAAJ;https://scholar.google.ca/citations?user=9ubCBYwAAAAJ;_bXzUGEAAAAJ;;6vLsKGsAAAAJ;2yaiWZ8AAAAJ;lcPsDgUAAAAJ;YuRzzf0AAAAJ;;Gnk0E_QAAAAJ;https://scholar.google.com/citations?hl=en;mO_tZ94AAAAJ;smd19iIAAAAJ;_moJlrIAAAAJ;;vhP-tlcAAAAJ", "or_profile": "~Ximing_Lu1;~Faeze_Brahman1;~Peter_West1;~Jaehun_Jung1;~Khyathi_Chandu1;~Abhilasha_Ravichander2;~Prithviraj_Ammanabrolu1;~Liwei_Jiang2;~Sahana_Ramnath2;~Nouha_Dziri2;~Jillian_Fisher1;~Bill_Yuchen_Lin1;~Skyler_Hallinan1;~Lianhui_Qin1;~Xiang_Ren1;~Sean_Welleck1;~Yejin_Choi1", "aff": "University of Washington;Allen Institute for AI;Allen Institute for Artificial Intelligence;University of Washington;;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;University of Washington;University of Southern California;;University of Washington;Allen Institute for Artificial Intelligence;University of Washington;University of Washington;University of Southern California;;Department of Computer Science, University of Washington", "aff_domain": "cs.washington.edu;allenai.org;allenai.org;uw.edu;;allenai.org;allenai.org;washington.edu;usc.edu;;uw.edu;allenai.org;uw.edu;uw.edu;usc.edu;;cs.washington.edu", "position": "Undergrad student;Postdoc;Intern;PhD student;;Postdoc;Researcher;PhD student;PhD student;;PhD student;Researcher;MS student;PhD student;Associate Professor;;Full Professor", "bibtex": "@inproceedings{\nlu2023inferencetime,\ntitle={Inference-Time Policy Adapters ({IPA}): Tailoring Extreme-Scale {LM}s without Fine-tuning},\nauthor={Ximing Lu and Faeze Brahman and Peter West and Jaehun Jung and Khyathi Chandu and Abhilasha Ravichander and Prithviraj Ammanabrolu and Liwei Jiang and Sahana Ramnath and Nouha Dziri and Jillian Fisher and Bill Yuchen Lin and Skyler Hallinan and Lianhui Qin and Xiang Ren and Sean Welleck and Yejin Choi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=aVejMt2gYN}\n}", "github": "", "project": "", "reviewers": "pKXZ;44dF;Ge8N", "site": "https://openreview.net/forum?id=aVejMt2gYN", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "4;3;4", "reproducibility": "3;3;3", "correctness": "3;3;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 17, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-0292-3074;;;;;;;;;;;;;", "linkedin": ";;;;;abhilasha-ravichander-57524958;rajammanabrolu/;;;;jillianrosefisher/;;skyler-hallinan/;;xren7;;", "aff_unique_index": "0;1;2;0;2;2;0;3;0;2;0;0;3;0", "aff_unique_norm": "University of Washington;Allen Institute for AI;Allen Institute for Artificial Intelligence;University of Southern California", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.washington.edu;https://allenai.org;https://allenai.org;https://www.usc.edu", "aff_unique_abbr": "UW;AI2;AI2;USC", "aff_campus_unique_index": "1;1;2", "aff_campus_unique": ";Los Angeles;Seattle", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "aVqGqTyky7", "title": "Contrastive Distant Supervision for Debiased and Denoised Machine Reading Comprehension", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Distant Supervision (DS) is a promising learning approach for MRC by leveraging easily-obtained question-answer pairs. Unfortunately, the heuristically annotated dataset will inevitably lead to mislabeled instances, resulting in answer bias and context noise problems. To learn debiased and denoised MRC models, this paper proposes the Contrastive Distant Supervision algorithm -- CDS, which can learn to distinguish confusing and noisy instances via confidence-aware contrastive learning. Specifically, to eliminate answer bias, CDS samples counterfactual negative instances, which ensures that MRC models must take both answer information and question-context interaction into consideration. To denoise distantly annotated contexts, CDS samples confusing negative instances to increase the margin between correct and mislabeled instances. We further propose a confidence-aware contrastive loss to model and leverage the uncertainty of all DS instances during learning. Experimental results show that CDS is effective and can even outperform supervised MRC models without manual annotations.", "keywords": "Distant Supervision;Machine Reading Comprehension;Contrastive Learning", "primary_area": "", "supplementary_material": "", "author": "Ning Bian;Hongyu Lin;Xianpei Han;Ben He;Le Sun", "authorids": "~Ning_Bian1;~Hongyu_Lin1;~Xianpei_Han1;~Ben_He1;~Le_Sun1", "gender": ";M;M;M;M", "homepage": ";http://linhongyu.top/;http://www.icip.org.cn/team/homepage/;http://people.ucas.ac.cn/~benhe;http://www.icip.org.cn/team/sunle/", "dblp": ";;57/2368;;78/5897-1", "google_scholar": ";mu5lLakAAAAJ;pA88bm4AAAAJ;https://scholar.google.com/citations?view_op=list_works;6bFNhtwAAAAJ", "or_profile": "~Ning_Bian1;~Hongyu_Lin1;~Xianpei_Han1;~Ben_He1;~Le_Sun1", "aff": ";Institute of Software, Chinese Academy of Sciences;Institute of Software, CAS;University of Chinese Academy of Sciences;Institute of Software, Chinese Academy of Sciences", "aff_domain": ";iscas.ac.cn;iscas.ac.cn;ucas.ac.cn;iscas.ac.cn", "position": ";Associate Professor;Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nbian2023contrastive,\ntitle={Contrastive Distant Supervision for Debiased and Denoised Machine Reading Comprehension},\nauthor={Ning Bian and Hongyu Lin and Xianpei Han and Ben He and Le Sun},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=aVqGqTyky7}\n}", "github": "", "project": "", "reviewers": "dmb8;GuVT;6c51", "site": "https://openreview.net/forum?id=aVqGqTyky7", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "3;4;3", "reproducibility": "3;5;4", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_unique_dep": "Institute of Software;", "aff_unique_url": "http://www.ios.ac.cn;http://www.ucas.ac.cn", "aff_unique_abbr": "CAS;UCAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "aXi6UwdygV", "title": "Cognate Transformer for Automated Phonological Reconstruction and Cognate Reflex Prediction", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Phonological reconstruction is one of the central problems in historical linguistics where a proto-word of an ancestral language is determined from the observed cognate words of daughter languages. Computational approaches to historical linguistics attempt to automate the task by learning models on available linguistic data. Several ideas and techniques drawn from computational biology have been successfully applied in this area of computational historical linguistics. Following these lines, we adapt MSA Transformer, a protein language model, to the problem of automated phonological reconstruction. MSA Transformer trains on multiple sequence alignments as input and is, thus, apt for application on aligned cognate words. We, hence, name our model as Cognate Transformer. We also apply the model on another associated task, namely, cognate reflex prediction where a reflex word in a daughter language is predicted based on cognate words from other daughter languages. We show that our model outperforms the existing models on both the tasks, especially when it is pre-trained on masked word prediction task.", "keywords": "Computational historical linguistics;Phonological reconstruction;Cognate reflex prediction;Transformer", "primary_area": "", "supplementary_material": "", "author": "V.S.D.S.Mahesh Akavarapu;Arnab Bhattacharya", "authorids": "~V.S.D.S.Mahesh_Akavarapu1;~Arnab_Bhattacharya1", "gender": "M;M", "homepage": "https://www.cse.iitk.ac.in/users/arnabb/;https://www.cse.iitk.ac.in/users/maheshak/", "dblp": "48/2626-1;358/9551", "google_scholar": "https://scholar.google.co.in/citations?user=Sk-JV9QAAAAJ;6KmJhd0AAAAJ", "or_profile": "~Arnab_Bhattacharya1;~A_V_S_D_S_Mahesh1", "aff": "IIT Kanpur;IIT Kanpur, IIT Kanpur", "aff_domain": "iitk.ac.in;cse.iitk.ac.in", "position": "Full Professor;PhD student", "bibtex": "@inproceedings{\nakavarapu2023cognate,\ntitle={Cognate Transformer for Automated Phonological Reconstruction and Cognate Reflex Prediction},\nauthor={V.S.D.S.Mahesh Akavarapu and Arnab Bhattacharya},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=aXi6UwdygV}\n}", "github": "", "project": "", "reviewers": "LYee;8tWY;ZH7n", "site": "https://openreview.net/forum?id=aXi6UwdygV", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "4;3;2", "reproducibility": "5;5;3", "correctness": "5;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-7331-0788;", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Indian Institute of Technology Kanpur", "aff_unique_dep": "", "aff_unique_url": "https://www.iitk.ac.in", "aff_unique_abbr": "IITK", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Kanpur", "aff_country_unique_index": "0;0", "aff_country_unique": "India" }, { "id": "aY4avQ0ItI", "title": "A Systematic Study of Performance Disparities in Multilingual Task-Oriented Dialogue Systems", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Achieving robust language technologies that can perform well across the world's many languages is a central goal of multilingual NLP. In this work, we take stock of and empirically analyse task performance disparities that exist between multilingual task-oriented dialogue (ToD) systems. We first define new quantitative measures of absolute and relative equivalence in system performance, capturing disparities across languages and within individual languages. Through a series of controlled experiments, we demonstrate that performance disparities depend on a number of factors: the nature of the ToD task at hand, the underlying pretrained language model, the target language, and the amount of ToD annotated data. We empirically prove the existence of the adaptation and intrinsic biases in current ToD systems: e.g., ToD systems trained for Arabic or Turkish using annotated ToD data fully parallel to English ToD data still exhibit diminished ToD task performance. Beyond providing a series of insights into the performance disparities of ToD systems in different languages, our analyses offer practical tips on how to approach ToD data collection and system development for new languages.", "keywords": "multilingual task-oriented dialogue systems;analysis of performance disparities", "primary_area": "", "supplementary_material": "", "author": "Songbo Hu;Han Zhou;Moy Yuan;Milan Gritta;Guchun Zhang;Ignacio Iacobacci;Anna Korhonen;Ivan Vuli\u0107", "authorids": "~Songbo_Hu1;~Han_Zhou4;~Moy_Yuan1;~Milan_Gritta1;~Guchun_Zhang1;~Ignacio_Iacobacci1;~Anna_Korhonen1;~Ivan_Vuli\u01071", "gender": "M;M;;M;;;;M", "homepage": ";https://hzhou.top;;https://github.com/milangritta;;;https://sites.google.com/site/annakorhonen/;https://sites.google.com/site/ivanvulic/", "dblp": ";;;203/9368;53/8139;;14/6532;77/9768", "google_scholar": "AKjpAowAAAAJ;7pXfJVgAAAAJ;;LSyAqp4AAAAJ;;;https://scholar.google.co.uk/citations?user=SCoVoOYAAAAJ;ZX8js60AAAAJ", "or_profile": "~Songbo_Hu1;~Han_Zhou4;~Moy_Yuan1;~Milan_Gritta1;~Guchun_Zhang1;~Ignacio_Iacobacci1;~Anna_Korhonen1;~Ivan_Vuli\u01071", "aff": "Language Technology Lab, University of Cambridge;Google;;Huawei Noah's Ark Lab;Huawei Noah's Ark Lab;;University of Cambridge;PolyAI Limited", "aff_domain": "cam.ac.uk;google.com;;huawei.com;huawei.com;;cam.ac.uk;poly-ai.com", "position": "PhD student;Student Researcher;;Researcher;Researcher;;Professor;Senior Scientist", "bibtex": "@inproceedings{\nhu2023a,\ntitle={A Systematic Study of Performance Disparities in Multilingual Task-Oriented Dialogue Systems},\nauthor={Songbo Hu and Han Zhou and Moy Yuan and Milan Gritta and Guchun Zhang and Ignacio Iacobacci and Anna Korhonen and Ivan Vuli{\\'c}},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=aY4avQ0ItI}\n}", "github": "", "project": "", "reviewers": "5jxt;Hf6r;Pe2h", "site": "https://openreview.net/forum?id=aY4avQ0ItI", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "4;4;3", "reproducibility": "4;4;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-3778-4075;;;;;;", "linkedin": "songbohu/;hanzhou032;;https://linkedin.com/in/milangritta;;;anna-korhonen-534a9b5/;ivan-vuli%C4%87-286b4a81/", "aff_unique_index": "0;1;2;2;0;3", "aff_unique_norm": "University of Cambridge;Google;Huawei;PolyAI Limited", "aff_unique_dep": "Language Technology Lab;Google;Noah's Ark Lab;", "aff_unique_url": "https://www.cam.ac.uk;https://www.google.com;https://www.huawei.com;https://www.poly.ai", "aff_unique_abbr": "Cambridge;Google;Huawei;PolyAI", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Cambridge;Mountain View;", "aff_country_unique_index": "0;1;2;2;0;0", "aff_country_unique": "United Kingdom;United States;China" }, { "id": "aaMwMjrDz0", "title": "Conditional Natural Language Inference", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "To properly explain sentence pairs that provide contradictory (different) information for different conditions, we introduce the task of conditional natural language inference (Cond-NLI) and focus on automatically extracting contradictory aspects and their conditions from a sentence pair. Cond-NLI can help to provide a full spectrum of information, such as when there are multiple answers to a question each addressing a specific condition, or reviews with different opinions for different conditions. We show that widely-used feature-attribution explanation models are not suitable for finding conditions, especially when sentences are long and are written independently. We propose a simple yet effective model for the original NLI task that can successfully extract conditions while not requiring token-level annotations. Our model enhances the interpretability of the NLI task while maintaining comparable accuracy. To evaluate models for the Cond-NLI, we build and release a token-level annotated dataset BioClaim which contains potentially contradictory claims from the biomedical domain. Our experiments show that our proposed model outperforms the full cross-encoder and other baselines in extracting conditions. It also performs on-par with GPT-3 which has an order of magnitude more parameters and trained on a huge amount of data.", "keywords": "Natural language inference;NLI;explanation;contradictory aspect;token-level explanation;interpretable model", "primary_area": "", "supplementary_material": "", "author": "Youngwoo Kim;Razieh Rahimi;James Allan", "authorids": "~Youngwoo_Kim1;~Razieh_Rahimi1;~James_Allan1", "gender": ";;M", "homepage": ";;https://cs.umass.edu/~allan/", "dblp": ";;10/4537", "google_scholar": ";;https://scholar.google.com.tw/citations?user=-bLGeg0AAAAJ", "or_profile": "~Youngwoo_Kim1;~Razieh_Rahimi1;~James_Allan1", "aff": ";;University of Massachusetts, Amherst", "aff_domain": ";;umass.edu", "position": ";;Full Professor", "bibtex": "@inproceedings{\nkim2023conditional,\ntitle={Conditional Natural Language Inference},\nauthor={Youngwoo Kim and Razieh Rahimi and James Allan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=aaMwMjrDz0}\n}", "github": "", "project": "", "reviewers": "gcJS;496z;kinb", "site": "https://openreview.net/forum?id=aaMwMjrDz0", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;2;3", "excitement": "2;4;3", "reproducibility": "2;4;4", "correctness": "2;4;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-0132-5694", "linkedin": ";;jamesallan/", "aff_unique_index": "0", "aff_unique_norm": "University of Massachusetts Amherst", "aff_unique_dep": "", "aff_unique_url": "https://www.umass.edu", "aff_unique_abbr": "UMass Amherst", "aff_campus_unique_index": "0", "aff_campus_unique": "Amherst", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "adIeh9ZsfC", "title": "An Empirical Study of Frame Selection for Text-to-Video Retrieval", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Text-to-video retrieval (TVR) aims to find the most relevant video in a large video gallery given a query text. The intricate and abundant context of the video challenges the performance and efficiency of TVR. To handle the serialized video contexts, existing methods typically select a subset of frames within a video to represent the video content for TVR. How to select the most representative frames is a crucial issue, whereby the selected frames are required to not only retain the semantic information of the video but also promote retrieval\nefficiency by excluding temporally redundant frames. In this paper, we make the first empirical study of frame selection for TVR. We\nsystemically classify existing frame selection methods into text-free and text-guided ones, under which we detailedly analyze six different frame selections in terms of effectiveness and efficiency. Among them, two frame selections are first developed in this paper. According to\nthe comprehensive analysis on multiple TVR benchmarks, we empirically conclude that the TVR with proper frame selections can significantly improve the retrieval efficiency without sacrificing the retrieval performance.", "keywords": "text-to-video retrival;frame selection", "primary_area": "", "supplementary_material": "", "author": "Mengxia Wu;Min Cao;Yang Bai;Ziyin Zeng;Chen Chen;Liqiang Nie;Min Zhang", "authorids": "~Mengxia_Wu1;~Min_Cao2;~Yang_Bai10;~Ziyin_Zeng2;~Chen_Chen9;~Liqiang_Nie2;~Min_Zhang9", "gender": "F;F;M;M;;M;M", "homepage": "https://github.com/Superhalo;;;https://github.com/zhendekeng;;https://liqiangnie.github.io/index.html;https://zhangmin-nlp-ai.github.io/", "dblp": ";;39/6825-10;;65/4423-36.html;92/8277;83/5342-5", "google_scholar": ";nhMWtZsAAAAJ;oRTnolQAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;yywVMhUAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Mengxia_Wu1;~Min_Cao2;~Yang_Bai10;~Ziyin_Zeng2;~Chen_Chen9;~Liqiang_Nie2;~Min_Zhang9", "aff": "Suzhou University;Soochow University;Soochow University;Suzhou University;Institute of Automation, Chinese Academy of Sciences;Shandong University;Harbin Institute of Technology, Shenzhen", "aff_domain": "suda.edu.cn;suda.edu.cn;suda.edu.cn;suda.edu.cn;ia.ac.cn;sdu.edu.cn;hit.edu.cn", "position": "MS student;Associate Professor;MS student;MS student;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwu2023an,\ntitle={An Empirical Study of Frame Selection for Text-to-Video Retrieval},\nauthor={Mengxia Wu and Min Cao and Yang Bai and Ziyin Zeng and Chen Chen and Liqiang Nie and Min Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=adIeh9ZsfC}\n}", "github": "", "project": "", "reviewers": "tb4G;FXRQ;ELKD", "site": "https://openreview.net/forum?id=adIeh9ZsfC", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;5", "excitement": "3;4;4", "reproducibility": "4;4;5", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0001-8297-6549;0000-0003-1476-0273;", "linkedin": ";;;;;;", "aff_unique_index": "0;1;1;0;2;3;4", "aff_unique_norm": "Suzhou University;Soochow University;Chinese Academy of Sciences;Shandong University;Harbin Institute of Technology", "aff_unique_dep": ";;Institute of Automation;;", "aff_unique_url": "https://www.suda.edu.cn;https://www.soochow.edu.cn;http://www.ia.cas.cn;http://www.sdu.edu.cn;http://en.hhit.edu.cn/", "aff_unique_abbr": "Suda;Soochow U;CAS;SDU;HIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "adjZtG9bDM", "title": "Evaluation of African American Language Bias in Natural Language Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "While biases disadvantaging African American Language (AAL) have been uncovered in models for tasks such as speech recognition and toxicity detection, there has been little investigation of these biases for language generation models like ChatGPT. We evaluate how well LLMs understand AAL in comparison to White Mainstream English (WME), the encouraged \"standard\" form of English taught in American classrooms. We measure large language model performance on two tasks: a counterpart generation task, where a model generates AAL given WME and vice versa, and a masked span prediction (MSP) task, where models predict a phrase hidden from their input. Using a novel dataset of AAL texts from a variety of regions and contexts, we present evidence of dialectal bias for six pre-trained LLMs through performance gaps on these tasks.", "keywords": "benchmarking large language models;african american language;bias and fairness;language generation", "primary_area": "", "supplementary_material": "", "author": "Nicholas Deas;Jessica A Grieser;Shana Kleiner;Desmond U. Patton;Elsbeth Turcan;Kathleen McKeown", "authorids": "~Nicholas_Deas1;~Jessica_A_Grieser1;~Shana_Kleiner1;~Desmond_U._Patton1;~Elsbeth_Turcan1;~Kathleen_McKeown1", "gender": "M;F;F;;Not Specified;F", "homepage": "https://www.cs.columbia.edu/~ndeas/;http://www.jessgrieser.com;https://www.asc.upenn.edu/people/faculty/shana-kleiner-lmsw;;http://www.cs.columbia.edu/~eturcan/;http://www.cs.columbia.edu/~kathy/", "dblp": "302/7609;;;;252/5443;m/KathleenMcKeown", "google_scholar": "hwiDX74AAAAJ;2Q-Oyl4AAAAJ;;;mBf4mJUAAAAJ;https://scholar.google.com.tw/citations?user=ujDhg2sAAAAJ", "or_profile": "~Nicholas_Deas1;~Jessica_A_Grieser1;~Shana_Kleiner1;~Desmond_U._Patton1;~Elsbeth_Turcan1;~Kathleen_McKeown1", "aff": "Columbia University;University of Michigan - Ann Arbor;University of Pennsylvania;;Computer Science Department, Columbia University;Columbia University", "aff_domain": "columbia.edu;umich.edu;upenn.edu;;cs.columbia.edu;columbia.edu", "position": "PhD student;Associate Professor;Researcher;;PhD student;Full Professor", "bibtex": "@inproceedings{\ndeas2023evaluation,\ntitle={Evaluation of African American Language Bias in Natural Language Generation},\nauthor={Nicholas Deas and Jessica A Grieser and Shana Kleiner and Desmond U. Patton and Elsbeth Turcan and Kathleen McKeown},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=adjZtG9bDM}\n}", "github": "", "project": "", "reviewers": "jtk5;gaMz;raHm", "site": "https://openreview.net/forum?id=adjZtG9bDM", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;5", "excitement": "5;2;5", "reproducibility": "4;3;4", "correctness": "4;1;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-2306-5101;0000-0003-1777-5243;;;0000-0003-4854-7593;", "linkedin": "ndeas/;;;;;", "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Columbia University;University of Michigan;University of Pennsylvania", "aff_unique_dep": ";;", "aff_unique_url": "https://www.columbia.edu;https://www.umich.edu;https://www.upenn.edu", "aff_unique_abbr": "Columbia;UM;UPenn", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Ann Arbor;New York", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "ae6MmBuX6k", "title": "MCC-KD: Multi-CoT Consistent Knowledge Distillation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models (LLMs) have showcased remarkable capabilities in complex reasoning through chain of thought (CoT) prompting. Recently, there has been a growing interest in transferring these reasoning abilities from LLMs to smaller models. However, achieving both the diversity and consistency in rationales presents a challenge. In this paper, we focus on enhancing these two aspects and propose Multi-CoT Consistent Knowledge Distillation (MCC-KD) to efficiently distill the reasoning capabilities. In MCC-KD, we generate multiple rationales for each question and enforce consistency among their predictions by minimizing the bidirectional KL-divergence between the answer distributions. We conduct comprehensive experiments to investigate the effectiveness of MCC-KD with different model architectures (LLaMA/FlanT5) and various model scales (3B/7B/11B/13B) on both mathematical reasoning and commonsense reasoning benchmarks. The empirical results demonstrate that MCC-KD achieves superior performance on in-distribution datasets and exhibits a strong generalization ability on out-of-distribution datasets.", "keywords": "Knowledge Distillation;Chain of Thought;Reasoning", "primary_area": "", "supplementary_material": "", "author": "Hongzhan Chen;Siyue Wu;Xiaojun Quan;Rui Wang;Ming Yan;Ji Zhang", "authorids": "~Hongzhan_Chen1;~Siyue_Wu1;~Xiaojun_Quan1;~Rui_Wang16;~Ming_Yan2;~Ji_Zhang3", "gender": ";;M;M;M;", "homepage": "https://github.com/homzer/;;https://sites.google.com/site/xiaojunquan/;http://www.coli.uni-saarland.de/~rwang/;;", "dblp": ";;90/5936;w/RuiWang5;51/5332-4.html;86/1953-11", "google_scholar": ";;dRpg4t8AAAAJ;Sd6VSasAAAAJ;uIUfGxYAAAAJ;cgnuJDUAAAAJ", "or_profile": "~Hongzhan_Chen1;~Siyue_Wu1;~Xiaojun_Quan1;~Rui_Wang16;~Ming_Yan2;~Ji_Zhang3", "aff": "SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;Vipshop (China) Co., Ltd.;Alibaba Group;Alibaba Group", "aff_domain": "sysu.edu.cn;sysu.edu.cn;sysu.edu.cn;vipshop.com;alibaba-inc.com;alibaba-inc.com", "position": "MS student;MS student;Full Professor;NLP Director;Instructor;Senior Staff Engineer", "bibtex": "@inproceedings{\nchen2023mcckd,\ntitle={{MCC}-{KD}: Multi-CoT Consistent Knowledge Distillation},\nauthor={Hongzhan Chen and Siyue Wu and Xiaojun Quan and Rui Wang and Ming Yan and Ji Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ae6MmBuX6k}\n}", "github": "", "project": "", "reviewers": "aS7U;XvaA;7UzP", "site": "https://openreview.net/forum?id=ae6MmBuX6k", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;2;3", "reproducibility": "4;4;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0003-4959-8878;", "linkedin": ";\u601d\u8d8a-\u5434-193598226;;;;", "aff_unique_index": "0;0;0;1;2;2", "aff_unique_norm": "Sun Yat-sen University;Vipshop;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "http://www.sysu.edu.cn;https://www.vip.com;https://www.alibaba.com", "aff_unique_abbr": "SYSU;Vipshop;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "aeLyo8GAco", "title": "Hierarchical Catalogue Generation for Literature Review: A Benchmark", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Scientific literature review generation aims to extract and organize important information from an abundant collection of reference papers and produces corresponding reviews while lacking a clear and logical hierarchy.\nWe observe that a high-quality catalogue-guided generation process can effectively alleviate this problem.\nTherefore, we present an atomic and challenging task named Hierarchical Catalogue Generation for Literature Review as the first step for review generation, which aims to produce a hierarchical catalogue of a review paper given various references. \nWe construct a novel English Hierarchical Catalogues of Literature Reviews Dataset with 7.6k literature review catalogues and 389k reference papers. To accurately assess the model performance, we design two evaluation metrics for informativeness and similarity to ground truth from semantics and structure. Our extensive analyses verify the high quality of our dataset and the effectiveness of our evaluation metrics. We further benchmark diverse experiments on state-of-the-art summarization models like BART and large language models like ChatGPT to evaluate their capabilities. We further discuss potential directions for this task to motivate future research.", "keywords": "scientific document processing;multi-document summarization;datasets;metrics", "primary_area": "", "supplementary_material": "", "author": "kun Zhu;Xiaocheng Feng;Xiachong Feng;Yingsheng Wu;Bing Qin", "authorids": "~kun_Zhu2;~Xiaocheng_Feng1;~Xiachong_Feng2;~Yingsheng_Wu1;~Bing_Qin2", "gender": "F;M;M;M;", "homepage": ";http://ir.hit.edu.cn/~xcfeng/;http://xcfeng.net/;https://www.zhihu.com/people/xmssgg;http://ir.hit.edu.cn/~qinb", "dblp": "344/4587;;;;86/5934.html", "google_scholar": "https://scholar.google.com/citations?hl=en;Xu8NbhYAAAAJ;https://scholar.google.com.hk/citations?user=Wifx6goAAAAJ;;LKnCub0AAAAJ", "or_profile": "~kun_Zhu2;~Xiaocheng_Feng1;~Xiachong_Feng2;~Yingsheng_Wu1;~Bing_Qin2", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;hit.edu.cn;hit.edu.cn;ir.hit.edu;hit.edu.cn", "position": "PhD student;Associate Professor;PhD student;MS student;Full Professor", "bibtex": "@inproceedings{\nzhu2023hierarchical,\ntitle={Hierarchical Catalogue Generation for Literature Review: A Benchmark},\nauthor={kun Zhu and Xiaocheng Feng and Xiachong Feng and Yingsheng Wu and Bing Qin},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=aeLyo8GAco}\n}", "github": "", "project": "", "reviewers": "9uwx;bVQ3;SJpq", "site": "https://openreview.net/forum?id=aeLyo8GAco", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;3", "excitement": "3;4;3", "reproducibility": "3;4;3", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0008-0731-2978;;;;0000-0002-2543-5604", "linkedin": ";;xiachong-feng-1646761b7/;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Harbin Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hit.edu.cn/", "aff_unique_abbr": "HIT", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Harbin", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "ahVTS392C3", "title": "JASMINE: Arabic GPT Models for Few-Shot Learning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Scholarship on generative pretraining (GPT) remains acutely Anglocentric, leaving serious gaps in our understanding of the whole class of autoregressive models. For example, we have little knowledge about the potential of these models and their societal impacts in diverse linguistic and cultural settings. We alleviate this issue for Arabic, a wide collection of languages and dialectal varieties with more than 400 million population, by introducing JASMINE. JASMINE is a suite of powerful Arabic autoregressive Transformer language models ranging in size between 300 million-6.7 billion parameters pretrained on a large and diverse dataset (~ 235 GB of text). We also carefully design and release a comprehensive benchmark for both automated and human evaluation of Arabic autoregressive models, with coverage of potential social biases, harms, and toxicity. Using our novel benchmark, we evaluate JASMINE extensively showing powerful performance intrinsically as well as in few-shot learning on a wide range of NLP tasks. We aim to responsibly release our models and evaluation benchmark with interested researchers, along with code for experimenting with them.", "keywords": "Arabic;Arabic dialects varieties;GPT;Few-shot learning", "primary_area": "", "supplementary_material": "", "author": "El Moatez Billah Nagoudi;Muhammad Abdul-Mageed;AbdelRahim A. Elmadany;Alcides Alcoba Inciarte;Md Tawkat Islam Khondaker", "authorids": "~El_Moatez_Billah_Nagoudi1;~Muhammad_Abdul-Mageed2;~AbdelRahim_A._Elmadany1;~Alcides_Alcoba_Inciarte1;~Md_Tawkat_Islam_Khondaker1", "gender": ";;;M;", "homepage": ";;;;https://sites.google.com/view/tawkat", "dblp": ";;;331/5834;241/5971.html", "google_scholar": ";;;;https://scholar.google.ca/citations?user=koKhlhwAAAAJ", "or_profile": "~El_Moatez_Billah_Nagoudi1;~Muhammad_Abdul-Mageed2;~AbdelRahim_A._Elmadany1;~Alcides_Alcoba_Inciarte1;~Md_Tawkat_Islam_Khondaker1", "aff": ";;;University of British Columbia;University of British Columbia", "aff_domain": ";;;ubc.ca;ubc.ca", "position": ";;;Researcher;MS student", "bibtex": "@inproceedings{\nnagoudi2023jasmine,\ntitle={{JASMINE}: Arabic {GPT} Models for Few-Shot Learning},\nauthor={El Moatez Billah Nagoudi and Muhammad Abdul-Mageed and AbdelRahim A. Elmadany and Alcides Alcoba Inciarte and Md Tawkat Islam Khondaker},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ahVTS392C3}\n}", "github": "", "project": "", "reviewers": "W8s1;EqWq;KASJ", "site": "https://openreview.net/forum?id=ahVTS392C3", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;4;5", "excitement": "4;4;3", "reproducibility": "4;3;2", "correctness": "5;5;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 4.333333333333333, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0001-5335-0723", "linkedin": ";;;alcides-alcoba/;md-tawkat-islam-khondaker-781962149", "aff_unique_index": "0;0", "aff_unique_norm": "University of British Columbia", "aff_unique_dep": "", "aff_unique_url": "https://www.ubc.ca", "aff_unique_abbr": "UBC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "id": "ai6kjD6cyX", "title": "Event Causality Extraction via Implicit Cause-Effect Interactions", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Event Causality Extraction (ECE) aims to extract the cause-effect event pairs from the given text, which requires the model to possess a strong reasoning ability to capture event causalities.\nHowever, existing works have not adequately exploited the interactions between the cause and effect event that could provide crucial clues for causality reasoning.\nTo this end, we propose an Implicit Cause-Effect interaction (ICE) framework, which formulates ECE as a template-based conditional generation problem. The proposed method captures the implicit intra- and inter-event interactions by incorporating the privileged information (ground truth event types and arguments) for reasoning, and a knowledge distillation mechanism is introduced to alleviate the unavailability of privileged information in the test stage. \nFurthermore, to facilitate knowledge transfer from teacher to student, we design an event-level alignment strategy named Cause-Effect Optimal Transport (CEOT) to strengthen the semantic interactions of cause-effect event types and arguments. \nExperimental results indicate that ICE achieves state-of-the-art performance on the ECE-CCKS dataset.", "keywords": "Event Causality Extraction;Generative Language Models;Knowledge Distillation;Optimal Transport", "primary_area": "", "supplementary_material": "", "author": "Jintao Liu;Zequn Zhang;kaiwen wei;Zhi Guo;Xian Sun;Li Jin;Xiaoyu Li", "authorids": "~Jintao_Liu1;~Zequn_Zhang2;~kaiwen_wei1;~Zhi_Guo2;~Xian_Sun2;~Li_Jin5;~Xiaoyu_Li10", "gender": ";;M;;M;M;M", "homepage": ";;https://wkwiecas.github.io/Kaiwen1997.github.io/;http://aircas.cas.cn/sourcedb_air_cas/cn/expert/yjy/201811/t20181106_5165652.html?;https://github.com/trailsV;https://github.com/jinli331;https://github.com/LiXiaoyu0101", "dblp": ";120/9628.html;297/8721;;;42/1899-1.html;", "google_scholar": ";;;;;g7lHJYcAAAAJ;", "or_profile": "~Jintao_Liu1;~Zequn_Zhang2;~kaiwen_wei1;~Zhi_Guo2;~Xian_Sun2;~Li_Jin5;~Xiaoyu_Li10", "aff": "University of Chinese Academy of Sciences;Aerospace Information Research Institute, Chinese Academy of Science;University of Chinese Academy of Sciences;Associate Professor at Aerospace Information Innovation Institute, Chinese Academy of Science;, Chinese Academy of Sciences;Aerospace Information Research Institute, Chinese Academy of Sciences;Aerospace Information Innovation Institute, Chinese Academy of Science", "aff_domain": "ucas.ac.cn;aircas.ac.cn;ucas.edu.cn;mail.ie.ac.cn;ucas.ac.cn;aircas.ac.cn;aircas.ac.cn", "position": "PhD student;Associate Professor;PhD student;Researcher;Full Professor;Associate Professor;Principal Researcher", "bibtex": "@inproceedings{\nliu2023event,\ntitle={Event Causality Extraction via Implicit Cause-Effect Interactions},\nauthor={Jintao Liu and Zequn Zhang and kaiwen wei and Zhi Guo and Xian Sun and Li Jin and Xiaoyu Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ai6kjD6cyX}\n}", "github": "", "project": "", "reviewers": "ttgs;dU5i;BTRu", "site": "https://openreview.net/forum?id=ai6kjD6cyX", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "3;4;4", "reproducibility": "4;3;3", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0006-1816-0848;;0000-0002-5830-0802;;;0000-0001-8833-4862;0000-0003-0286-6660", "linkedin": ";;;;;;", "aff_unique_index": "0;1;0;1;1;1;1", "aff_unique_norm": "University of Chinese Academy of Sciences;Chinese Academy of Sciences", "aff_unique_dep": ";Aerospace Information Research Institute", "aff_unique_url": "http://www.ucas.ac.cn;http://www.cas.cn", "aff_unique_abbr": "UCAS;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "aipbZ5obaz", "title": "Comparing Styles across Languages", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Understanding how styles differ across languages is advantageous for training both humans and computers to generate culturally appropriate text. We introduce an explanation framework to extract stylistic differences from multilingual LMs and compare styles across languages. Our framework (1) generates comprehensive style lexica in any language and (2) consolidates feature importances from LMs into comparable lexical categories. We apply this framework to compare politeness, creating the first holistic multilingual politeness dataset and exploring how politeness varies across four languages. Our approach enables an effective evaluation of how distinct linguistic categories contribute to stylistic variations and provides interpretable insights into how people communicate differently around the world.", "keywords": "NLP;Style;Cross-Cultural;Multilingual;Explainability;Lexica", "primary_area": "", "supplementary_material": "", "author": "Shreya Havaldar;Matthew Pressimone;Eric Wong;Lyle Ungar", "authorids": "~Shreya_Havaldar1;~Matthew_Pressimone1;~Eric_Wong1;~Lyle_Ungar1", "gender": "F;M;M;M", "homepage": "https://shreyahavaldar.com;https://www.linkedin.com/in/matthew-pressimone;http://riceric22.github.io/;http://www.cis.upenn.edu/~ungar/", "dblp": ";;64/1811-1.html;u/LyleHUngar", "google_scholar": "h2tzi9MAAAAJ;;pWnTMRkAAAAJ;https://scholar.google.com.tw/citations?user=KCiDjbkAAAAJ", "or_profile": "~Shreya_Havaldar1;~Matthew_Pressimone1;~Eric_Wong1;~Lyle_Ungar1", "aff": "University of Pennsylvania;University of Pennsylvania;University of Pennsylvania;University of Pennsylvania", "aff_domain": "upenn.edu;upenn.edu;upenn.edu;upenn.edu", "position": "PhD student;MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nhavaldar2023comparing,\ntitle={Comparing Styles across Languages},\nauthor={Shreya Havaldar and Matthew Pressimone and Eric Wong and Lyle Ungar},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=aipbZ5obaz}\n}", "github": "", "project": "", "reviewers": "a7sm;hCmg;QT67", "site": "https://openreview.net/forum?id=aipbZ5obaz", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;3;3", "excitement": "4;4;4", "reproducibility": "5;4;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "shreya-havaldar-0a2664155/;matthew-pressimone;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Pennsylvania", "aff_unique_dep": "", "aff_unique_url": "https://www.upenn.edu", "aff_unique_abbr": "UPenn", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "ajzFrKT3U7", "title": "Leveraging Structured Information for Explainable Multi-hop Question Answering and Reasoning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Neural models, including large language models (LLMs), achieve superior performance on multi-hop question-answering. To elicit reasoning capabilities from LLMs, recent works propose using the chain-of-thought (CoT) mechanism to generate both the reasoning chain and the answer, which enhances the model's capabilities in conducting multi-hop reasoning. However, several challenges still remain: such as struggling with inaccurate reasoning, hallucinations, and lack of interpretability. On the other hand, information extraction (IE) identifies entities, relations, and events grounded to the text. The extracted structured information can be easily interpreted by humans and machines (Grishman, 2019). In this work, we investigate constructing and leveraging extracted semantic structures (graphs) for multi-hop question answering, especially the reasoning process. Empirical results and human evaluations show that our framework: generates more faithful reasoning chains and substantially improves the QA performance on two benchmark datasets. Moreover, the extracted structures themselves naturally provide grounded explanations that are preferred by humans, as compared to the generated reasoning chains and saliency-based explanations.", "keywords": "Natural Language Processing;Multi-hop Question Answering", "primary_area": "", "supplementary_material": "", "author": "Ruosen Li;Xinya Du", "authorids": "~Ruosen_Li1;~Xinya_Du1", "gender": "M;M", "homepage": ";https://xinyadu.github.io", "dblp": "351/0775;200/8114", "google_scholar": "tN-RVAkAAAAJ;R-lKQqkAAAAJ", "or_profile": "~Ruosen_Li1;~Xinya_Du1", "aff": "University of Texas at Dallas;University of Texas at Dallas", "aff_domain": "utd.edu;utdallas.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nli2023leveraging,\ntitle={Leveraging Structured Information for Explainable Multi-hop Question Answering and Reasoning},\nauthor={Ruosen Li and Xinya Du},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ajzFrKT3U7}\n}", "github": "", "project": "", "reviewers": "3aEg;ke1q;ns5s", "site": "https://openreview.net/forum?id=ajzFrKT3U7", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "3;4;4", "reproducibility": "3;3;3", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": "ruosenli/?locale=en_US;", "aff_unique_index": "0;0", "aff_unique_norm": "University of Texas at Dallas", "aff_unique_dep": "", "aff_unique_url": "https://www.utdallas.edu", "aff_unique_abbr": "UT Dallas", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Dallas", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "ak6PQPmmEK", "title": "The student becomes the master: Outperforming GPT3 on Scientific Factual Error Correction", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Due to the prohibitively high cost of creating\nerror correction datasets, most Factual Claim\nCorrection methods rely on a powerful verification model to guide the correction process.\nThis leads to a significant drop in performance\nin domains like Scientific Claim Correction,\nwhere good verification models do not always\nexist. In this work we introduce SciFix, a\nclaim correction system that does not require\na verifier but is able to outperform existing\nmethods by a considerable margin \u2014 achieving correction accuracy of 84% on the SciFact\ndataset, 77% on SciFact-Open and 72.75% on\nthe CovidFact dataset, compared to next best\naccuracies of 7.6%, 5% and 15% on the same\ndatasets respectively. Our method leverages the\npower of prompting with LLMs during training to create a richly annotated dataset that can\nbe used for fully supervised training and regularization. We additionally use a claim-aware\ndecoding procedure to improve the quality of\ncorrected claims. Our method outperforms the\nvery LLM that was used to generate the annotated dataset \u2014 with FewShot Prompting on\nGPT3.5 achieving 58%, 61% and 64% on the\nrespective datasets, a consistently lower correction accuracy, despite using nearly 800 times\nas many parameters as our model.", "keywords": "Factual Error Correction;GPT;Domain Adaptation;Distribution Shift", "primary_area": "", "supplementary_material": "", "author": "Dhananjay Ashok;Atharva Kulkarni;Hai Pham;Barnabas Poczos", "authorids": "~Dhananjay_Ashok1;~Atharva_Kulkarni1;~Hai_Pham2;~Barnabas_Poczos1", "gender": ";M;M;", "homepage": "https://dhananjayashok.github.io/;https://athrvkk.github.io;https://www.cs.cmu.edu/~htpham/;http://www.cs.cmu.edu/~bapoczos/", "dblp": "277/0892;261/0205;;15/4829", "google_scholar": "dxKCrsYAAAAJ;tQTAiXwAAAAJ;lA6P3zYAAAAJ;https://scholar.google.com/", "or_profile": "~Dhananjay_Ashok1;~Atharva_Kulkarni1;~Hai_Pham2;~Barnab\u00e1s_P\u00f3czos1", "aff": "School of Computer Science, Carnegie Mellon University;School of Computer Science, Carnegie Mellon University;Carnegie Mellon University;School of Computer Science", "aff_domain": "cs.cmu.edu;cs.cmu.edu;cmu.edu;cs.cmu.edu", "position": "MS student;MS student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nashok2023the,\ntitle={The student becomes the master: Outperforming {GPT}3 on Scientific Factual Error Correction},\nauthor={Dhananjay Ashok and Atharva Kulkarni and Hai Pham and Barnabas Poczos},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ak6PQPmmEK}\n}", "github": "", "project": "", "reviewers": "eYY6;hZmU;LL4j;RUuW", "site": "https://openreview.net/forum?id=ak6PQPmmEK", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;3;3", "excitement": "4;3;3;3", "reproducibility": "4;4;4;4", "correctness": "4;3;4;4", "rating_avg": 3.0, "confidence_avg": 3.5, "excitement_avg": 3.25, "reproducibility_avg": 4.0, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "dhananjay-ashok-576342142/;;;", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Carnegie Mellon University;School of Computer Science", "aff_unique_dep": "School of Computer Science;Computer Science", "aff_unique_url": "https://www.cmu.edu;", "aff_unique_abbr": "CMU;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Pittsburgh;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States;" }, { "id": "akJUrevmwI", "title": "Do Language Models Have a Common Sense regarding Time? Revisiting Temporal Commonsense Reasoning in the Era of Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Temporal reasoning represents a vital component of human communication and understanding, yet remains an underexplored area within the context of Large Language Models (LLMs). Despite LLMs demonstrating significant proficiency in a range of tasks, a comprehensive, large-scale analysis of their temporal reasoning capabilities is missing. Our paper addresses this gap, presenting the first extensive benchmarking of LLMs on temporal reasoning tasks. We critically evaluate 8 different LLMs across 6 datasets using 3 distinct prompting strategies. Additionally, we broaden the scope of our evaluation by including in our analysis 2 Code Generation LMs. Beyond broad benchmarking of models and prompts, we also conduct a fine-grained investigation of performance across different categories of temporal tasks. We further analyze the LLMs on varying temporal aspects, offering insights into their proficiency in understanding and predicting the continuity, sequence, and progression of events over time. Our findings reveal a nuanced depiction of the capabilities and limitations of the models within temporal reasoning, offering a comprehensive reference for future research in this pivotal domain.", "keywords": "Benchmarking;Temporal Reasoning;LLMs", "primary_area": "", "supplementary_material": "", "author": "Raghav Jain;Daivik Sojitra;Arkadeep Acharya;Sriparna Saha;Adam Jatowt;Sandipan Dandapat", "authorids": "~Raghav_Jain1;~Daivik_Sojitra1;~Arkadeep_Acharya1;~Sriparna_Saha1;~Adam_Jatowt2;~Sandipan_Dandapat2", "gender": "M;M;M;F;M;M", "homepage": ";;;http://www.iitp.ac.in/~sriparna;;https://ds-informatik.uibk.ac.at/doku.php?id=homepage", "dblp": ";;356/7885;27/1664-1;;", "google_scholar": ";;jJfLH7sAAAAJ;https://scholar.google.co.in/citations?user=Fj7jA_AAAAAJ;https://scholar.google.co.in/citations?user=DWD_FiQAAAAJ;l2vn9GoAAAAJ", "or_profile": "~Raghav_Jain1;~Daivik_Sojitra1;~Arkadeep_Acharya1;~Sriparna_Saha1;~Sandipan_Dandapat2;~Adam_Wladyslaw_Jatowt1", "aff": "Indian Institute of Technology, Patna.;Indian Institute of Technology, Patna;Indian Institute of Technology, Patna;Indian Institute of Technology Patna, India;Microsoft;Universit\u00e4t Innsbruck", "aff_domain": "iitp.ac.in;iitp.ac.in;iitp.ac.in;iitp.ac.in;microsoft.com;uibk.ac.at", "position": "Researcher;MS student;Undergrad student;Associate Professor;Principal Applied Researcher;Full Professor", "bibtex": "@inproceedings{\njain2023do,\ntitle={Do Language Models Have a Common Sense regarding Time? Revisiting Temporal Commonsense Reasoning in the Era of Large Language Models},\nauthor={Raghav Jain and Daivik Sojitra and Arkadeep Acharya and Sriparna Saha and Adam Jatowt and Sandipan Dandapat},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=akJUrevmwI}\n}", "github": "", "project": "", "reviewers": "Kfj1;topD;izhz", "site": "https://openreview.net/forum?id=akJUrevmwI", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "3;3;5", "reproducibility": "4;3;3", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0009-0002-3018-3541;;;", "linkedin": "raghav-jain-3a8076214;daivik-sojitra-929554159/;arkadeep-acharya-404b41226/;sriparna-saha-1a1338161/;;", "aff_unique_index": "0;0;0;0;1;2", "aff_unique_norm": "Indian Institute of Technology Patna;Microsoft;University of Innsbruck", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "https://www.iitp.ac.in;https://www.microsoft.com;https://www.uibk.ac.at", "aff_unique_abbr": "IIT Patna;Microsoft;UIBK", "aff_campus_unique_index": "0;0;0;0;2", "aff_campus_unique": "Patna;;Innsbruck", "aff_country_unique_index": "0;0;0;0;1;2", "aff_country_unique": "India;United States;Austria" }, { "id": "alxWMBcNVN", "title": "Personalized Distillation: Empowering Open-Sourced LLMs with Adaptive Learning for Code Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "With the rise of powerful closed-sourced LLMs (ChatGPT, GPT-4), there are increasing interests in distilling the capabilies of close-sourced LLMs to smaller open-sourced LLMs. Previous distillation methods usually prompt ChatGPT to generate a set of instructions and answers, for the student model to learn. However, such standard distillation approach neglects the merits and conditions of the student model. Inspired by modern teaching principles, we design a personalised distillation process, in which the student attempts to solve a task first, then the teacher provides an adaptive refinement for the student to improve. Instead of feeding the student with teacher's prior, personalised distillation enables personalised learning for the student model, as it only learns on examples it makes mistakes upon and learns to improve its own solution. On code generation, personalised distillation consistently outperforms standard distillation with only one third of the data. With only 2.5-3K personalised examples that incur a data-collection cost of 4-6\\$, we boost CodeGen-mono-16B by 7\\% to achieve 36.4\\% pass@1 and StarCoder by 12.2\\% to achieve 45.8\\% pass@1 on HumanEval.\\footnote{We will release our models and codes upon acceptance}", "keywords": "Distillation;code generation;adaptive learning", "primary_area": "", "supplementary_material": "", "author": "Hailin Chen;Amrita Saha;Steven Hoi;Shafiq Joty", "authorids": "~Hailin_Chen1;~Amrita_Saha2;~Steven_Hoi2;~Shafiq_Joty1", "gender": ";M;M;F", "homepage": ";http://stevenhoi.com;https://raihanjoty.github.io/;", "dblp": "36/8249;;62/2078;72/7720.html", "google_scholar": "oE4KrU0AAAAJ;JoLjflYAAAAJ;hR249csAAAAJ;https://scholar.google.co.uk/citations?user=3Zb5Y2YAAAAJ", "or_profile": "~Hailin_Chen1;~Steven_Hoi2;~Shafiq_Joty1;~Amrita_Saha1", "aff": "National Technological University;Singapore Management University;SalesForce.com;SalesForce.com", "aff_domain": "ntu.edu;smu.edu.sg;salesforce.com;salesforce.com", "position": "PhD student;Associate Professor;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nchen2023personalized,\ntitle={Personalized Distillation: Empowering Open-Sourced {LLM}s with Adaptive Learning for Code Generation},\nauthor={Hailin Chen and Amrita Saha and Steven Hoi and Shafiq Joty},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=alxWMBcNVN}\n}", "github": "", "project": "", "reviewers": "ksiw;Y76m;cUGP", "site": "https://openreview.net/forum?id=alxWMBcNVN", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;2;5", "excitement": "4;3;3", "reproducibility": "4;3;3", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "chenhailin/;;;amrita-saha87/", "aff_unique_index": "0;1;2;2", "aff_unique_norm": "National Technological University;Singapore Management University;Salesforce", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ntu.edu;https://www.smu.edu.sg;https://www.salesforce.com", "aff_unique_abbr": "NTU;SMU;Salesforce", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Singapore" }, { "id": "aolJqJ50ZA", "title": "Explore the Way: Exploring Reasoning Path by Bridging Entities for Effective Cross-Document Relation Extraction", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Cross-document relation extraction (CodRED) task aims to infer the relation between two entities mentioned in different documents within a reasoning path. Previous studies have concentrated on merely capturing implicit relations between the entities. However, humans usually utilize explicit information chains such as hyperlinks or additional searches to find the relations between two entities. Inspired by this, we propose Path wIth expLOraTion (PILOT) that provides the enhanced reasoning path by exploring the explicit clue information within the documents. PILOT finds the bridging entities which directly guide the paths between the entities and then employs them as stepstones to navigate desirable paths. We show that models with PILOT outperform the baselines in the CodRED task. Furthermore, we offer a variety of analyses to verify the validity of the reasoning paths constructed through PILOT, including evaluations using large language models such as ChatGPT.", "keywords": "Cross-document relation extraction;Document relation extraction;Reasoning Path Construction", "primary_area": "", "supplementary_material": "", "author": "Junyoung Son;Jinsung Kim;Jungwoo Lim;Yoonna Jang;Heuiseok Lim", "authorids": "~Junyoung_Son1;~Jinsung_Kim2;~Jungwoo_Lim1;~Yoonna_Jang1;~Heuiseok_Lim1", "gender": "M;M;F;F;M", "homepage": "https://rgop13.github.io/;https://jin62304.github.io;https://dlawjddn803.github.io/;https://yoonnajang.github.io/;http://nlp.korea.ac.kr", "dblp": "243/9058;;277/9191;277/9316;127/4881", "google_scholar": "d6b5a34AAAAJ;au6e9uUAAAAJ;ubIxtk8AAAAJ;https://scholar.google.com/citations?hl=ko;HMTkz7oAAAAJ", "or_profile": "~Junyoung_Son1;~Jinsung_Kim2;~Jungwoo_Lim1;~Yoonna_Jang1;~Heuiseok_Lim1", "aff": "Korea University;Korea University;Korea University;Korea University;Korea University", "aff_domain": "korea.ac.kr;korea.ac.kr;korea.ac.kr;korea.ac.kr;korea.ac.kr", "position": "MS student;PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nson2023explore,\ntitle={Explore the Way: Exploring Reasoning Path by Bridging Entities for Effective Cross-Document Relation Extraction},\nauthor={Junyoung Son and Jinsung Kim and Jungwoo Lim and Yoonna Jang and Heuiseok Lim},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=aolJqJ50ZA}\n}", "github": "", "project": "", "reviewers": "F5va;18D4;shPs", "site": "https://openreview.net/forum?id=aolJqJ50ZA", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "4;3;3", "reproducibility": "4;4;5", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.0, "replies_avg": 9, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-4142-6927;0000-0002-1587-0389;0000-0001-8988-2270;;", "linkedin": "junyoung-son-2836a2183/;jinsung-kim-703195178/;jungwoo-lim-3a5124202/;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Korea University", "aff_unique_dep": "", "aff_unique_url": "https://www.korea.ac.kr", "aff_unique_abbr": "KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "asYObzj0IT", "title": "Comparing Prompt-Based and Standard Fine-Tuning for Urdu Text Classification", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Recent advancements in natural language processing have demonstrated the efficacy of pre-trained language models for various downstream tasks through prompt-based fine-tuning. In contrast to standard fine-tuning, which relies solely on labeled examples, prompt-based fine-tuning combines a few labeled examples (few shot) with guidance through prompts tailored for the specific language and task. For low-resource languages, where labeled examples are limited, prompt-based fine-tuning appears to be a promising alternative. In this paper, we compare prompt-based and standard fine-tuning for the popular task of text classification in Urdu and Roman Urdu languages. We conduct experiments using five datasets, covering different domains, and pre-trained multilingual transformers. The results reveal that significant improvement of up to 13% in accuracy is achieved by prompt-based fine-tuning over standard fine-tuning approaches. This suggests the potential of prompt-based fine-tuning as a valuable approach for low-resource languages with limited labeled data.", "keywords": "Prompt-based Fine-tuning;Urdu Text Classification;Pre-trained Language Models", "primary_area": "", "supplementary_material": "", "author": "Faizad Ullah;Ubaid Azam;Ali Faheem;Faisal Kamiran;Asim Karim", "authorids": "~Faizad_Ullah1;~Ubaid_Azam2;~Ali_Faheem1;~Faisal_Kamiran1;~Asim_Karim1", "gender": "M;M;M;M;M", "homepage": ";;;http://itu.edu.pk/faculty-itu/dr-faisal-kamiran/;https://web.lums.edu.pk/~akarim", "dblp": ";;;07/7790;63/6093", "google_scholar": "3WIfNOgAAAAJ;N8YWidwAAAAJ;;yfyugf8AAAAJ;NXFekJ4AAAAJ", "or_profile": "~Faizad_Ullah1;~Ubaid_Azam2;~Ali_Faheem1;~Faisal_Kamiran1;~Asim_Karim1", "aff": "Lahore University of Management Sciences;Lahore University of Management Sciences;Lahore University of Management Sciences;Information Technology University, Lahore;Lahore University of Management Sciences", "aff_domain": "lums.edu.pk;lums.edu.pk;lums.edu.pk;itu.edu.pk;lums.edu.pk", "position": "PhD student;Researcher;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nullah2023comparing,\ntitle={Comparing Prompt-Based and Standard Fine-Tuning for Urdu Text Classification},\nauthor={Faizad Ullah and Ubaid Azam and Ali Faheem and Faisal Kamiran and Asim Karim},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=asYObzj0IT}\n}", "github": "", "project": "", "reviewers": "pxrY;18m6;XVEi", "site": "https://openreview.net/forum?id=asYObzj0IT", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "2;2;2", "reproducibility": "3;3;3", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.0, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-9981-2847;;0000-0003-0487-8016;;0000-0002-9872-5020", "linkedin": ";;;faisalkamiran/;https://www.linkedin.com/feed/", "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Lahore University of Management Sciences;Information Technology University", "aff_unique_dep": ";", "aff_unique_url": "https://lums.edu.pk;https://www.itu.edu.pk", "aff_unique_abbr": "LUMS;ITU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Lahore", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Pakistan" }, { "id": "ayoGdkXi4V", "title": "Lazy-k Decoding: Constrained Decoding for Information Extraction", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We explore the possibility of improving probabilistic models in structured prediction. Specifically, we combine the models with constrained decoding approaches in the context of token classification for information extraction. The decoding methods search for constraint-satisfying label-assignments while maximizing the total probability. To do this, we evaluate several existing approaches, as well as propose a novel decoding method called Lazy-$k$. Our findings demonstrate that constrained decoding approaches can significantly improve the models' performances, especially when using smaller models. The Lazy-$k$ approach allows for more flexibility between decoding time and accuracy. The code for using Lazy-$k$ decoding can be found at https://github.com/ArthurDevNL/lazyk.", "keywords": "structured prediction;constrained decoding;token classification;information extraction;beam search", "primary_area": "", "supplementary_material": "", "author": "Arthur Hemmer;Mickael Coustaty;Nicola Bartolo;Jerome Brachat;Jean-marc Ogier", "authorids": "~Arthur_Hemmer1;~Mickael_Coustaty1;~Nicola_Bartolo1;~Jerome_Brachat1;~Jean-marc_Ogier1", "gender": "M;M;M;M;", "homepage": "https://arthurhemmer.com/;;https://nicolabartolo.mystrikingly.com/;https://www.linkedin.com/in/jerome-brachat-608a6647;", "dblp": ";https://dblp.uni-trier.de/pid/16/905.html;;;", "google_scholar": ";https://scholar.google.com/citations?view_op=list_works;https://scholar.google.fr/citations?user=__tEFJYAAAAJ;;", "or_profile": "~Arthur_Hemmer1;~Mickael_Coustaty1;~Nicola_Bartolo1;~Jerome_Brachat1;~Jean-marc_Ogier1", "aff": "Universit\u00e9 de La Rochelle;La Rochelle University;Shift Technology;Shift Technology ;", "aff_domain": "univ-lr.fr;univ-larochelle.fr;shift-technology.com;shift-technology.com;", "position": "PhD student;Associate Professor;Researcher;Researcher;", "bibtex": "@inproceedings{\nhemmer2023lazyk,\ntitle={Lazy-k Decoding: Constrained Decoding for Information Extraction},\nauthor={Arthur Hemmer and Mickael Coustaty and Nicola Bartolo and Jerome Brachat and Jean-marc Ogier},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ayoGdkXi4V}\n}", "github": "", "project": "", "reviewers": "NKjo;fWdW;AmBy", "site": "https://openreview.net/forum?id=ayoGdkXi4V", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;3;3", "excitement": "2;4;3", "reproducibility": "3;4;4", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;nicolabartolo/;;", "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Universit\u00e9 de La Rochelle;La Rochelle University;Shift Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.univ-larochelle.fr;https://www.univ-larochelle.fr;https://www.shift.com", "aff_unique_abbr": "UdLR;LRU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "France;United States" }, { "id": "ayzVnzaUzB", "title": "When the Majority is Wrong: Modeling Annotator Disagreement for Subjective Tasks", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Though majority vote among annotators is typically used for ground truth labels in machine learning, annotator disagreement in tasks such as hate speech detection may reflect systematic differences in opinion across groups, not noise. Thus, a crucial problem in hate speech detection is determining if a statement is offensive to the demographic group that it targets, when that group may be a small fraction of the annotator pool. We construct a model that predicts individual annotator ratings on potentially offensive text and combines this information with the predicted target group of the text to predict the ratings of target group members. We show gains across a range of metrics, including raising performance over the baseline by 22% at predicting individual annotators' ratings and by 33% at predicting variance among annotators, which provides a metric for model uncertainty downstream. We find that annotators' ratings can be predicted using their demographic information as well as opinions on online content, and that non-invasive questions on annotators' online experiences minimize the need to collect demographic information when predicting annotators' opinions.", "keywords": "annotator disagreement;hate speech;toxicity detection;offensive content detection;AI fairness", "primary_area": "", "supplementary_material": "", "author": "Eve Fleisig;Rediet Abebe;Dan Klein", "authorids": "~Eve_Fleisig1;~Rediet_Abebe2;~Dan_Klein1", "gender": "F;;", "homepage": "https://www.efleisig.com;;http://people.eecs.berkeley.edu/~klein/", "dblp": "276/0223;;", "google_scholar": "NHlxXzwAAAAJ;;", "or_profile": "~Eve_Fleisig1;~Rediet_Abebe2;~Dan_Klein1", "aff": "University of California, Berkeley;;University of California, Berkeley", "aff_domain": "berkeley.edu;;berkeley.edu", "position": "PhD student;;Full Professor", "bibtex": "@inproceedings{\nfleisig2023when,\ntitle={When the Majority is Wrong: Modeling Annotator Disagreement for Subjective Tasks},\nauthor={Eve Fleisig and Rediet Abebe and Dan Klein},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ayzVnzaUzB}\n}", "github": "", "project": "", "reviewers": "aGsZ;7dom;Muv1", "site": "https://openreview.net/forum?id=ayzVnzaUzB", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;2;4", "excitement": "4;4;4", "reproducibility": "4;4;5", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "eve-fleisig/;;dan-klein/", "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "b07c10sXzN", "title": "Lifelong Sequence Generation with Dynamic Module Expansion and Adaptation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Lifelong sequence generation (LSG), a problem in continual learning, aims to continually train a model on a sequence of generation tasks to learn constantly emerging new generation patterns while avoiding the forgetting of previous knowledge. Existing LSG methods mainly focus on maintaining old knowledge while paying little attention to knowledge transfer across tasks. In contrast, humans can better learn new tasks by leveraging previously acquired knowledge from similar tasks. Inspired by the learning paradigm of humans, we propose Dynamic Module Expansion and Adaptation (DMEA), which enables the model to dynamically determine the architecture for acquiring new knowledge based on task correlation and select the most similar previous tasks to facilitate adaptation to new tasks. In addition, as the learning process can easily be biased towards the current task which might cause more severe forgetting of previously learned knowledge, we propose dynamic gradient scaling to balance the learning of the current task and replayed tasks. With extensive experiments, we demonstrate that DMEA can consistently outperform existing methods in different LSG settings.", "keywords": "lifelong learning;sequence generation;dynamic module;forward knowledge transfer", "primary_area": "", "supplementary_material": "", "author": "Chengwei Qin;CHEN CHEN;Shafiq Joty", "authorids": "~Chengwei_Qin1;~CHEN_CHEN37;~Shafiq_Joty1", "gender": "M;M;M", "homepage": ";;https://raihanjoty.github.io/", "dblp": "195/2732;65/4423;62/2078", "google_scholar": ";uUmSp1QAAAAJ;hR249csAAAAJ", "or_profile": "~Chengwei_Qin1;~CHEN_CHEN37;~Shafiq_Joty1", "aff": "Nanyang Technological University;Nanyang Technological University;SalesForce.com", "aff_domain": "ntu.edu.sg;ntu.edu;salesforce.com", "position": "PhD student;PhD student;Principal Researcher", "bibtex": "@inproceedings{\nqin2023lifelong,\ntitle={Lifelong Sequence Generation with Dynamic Module Expansion and Adaptation},\nauthor={Chengwei Qin and CHEN CHEN and Shafiq Joty},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=b07c10sXzN}\n}", "github": "", "project": "", "reviewers": "kDLs;X4NL;9rh6;5YgP", "site": "https://openreview.net/forum?id=b07c10sXzN", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "4;3;4;5", "excitement": "3;4;2;4", "reproducibility": "4;4;4;3", "correctness": "4;4;3;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.25, "reproducibility_avg": 3.75, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "chengwei-qin-3401a1107/;;", "aff_unique_index": "0;0;1", "aff_unique_norm": "Nanyang Technological University;Salesforce", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;https://www.salesforce.com", "aff_unique_abbr": "NTU;Salesforce", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Singapore;United States" }, { "id": "b1J3WplfgM", "title": "SKD-NER: Continual Named Entity Recognition via Span-based Knowledge Distillation with Reinforcement Learning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Continual learning for named entity recognition (CL-NER) aims to enable models to continuously learn new entity types while retaining the ability to recognize previously learned ones. However, the current strategies fall short of effectively addressing the catastrophic forgetting of previously learned entity types. To tackle this issue, we propose the SKD-NER model, an efficient continual learning NER model based on the span-based approach, which innovatively incorporates reinforcement learning strategies to enhance the model's ability against catastrophic forgetting. Specifically, we leverage knowledge distillation (KD) to retain memory and employ reinforcement learning strategies during the KD process to optimize the soft labeling and distillation losses generated by the teacher model to effectively prevent catastrophic forgetting during continual learning. This approach effectively prevents or mitigates catastrophic forgetting during continuous learning, allowing the model to retain previously learned knowledge while acquiring new knowledge. Our experiments on two benchmark datasets demonstrate that our model significantly improves the performance of the CL-NER task, outperforming state-of-the-art methods.", "keywords": "Continual learning;Named Entity Recognition;Knowledge Distillation;Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Yi Chen;Liang He", "authorids": "~Yi_Chen20;~Liang_He7", "gender": "M;M", "homepage": "http://www.stephenyc.top;http://web.ee.tsinghua.edu.cn/heliang/en/index.htm", "dblp": ";", "google_scholar": ";", "or_profile": "~Yi_Chen20;~Liang_He7", "aff": "Xinjiang University;Tsinghua University", "aff_domain": "xju.edu.cn;tsinghua.edu.cn", "position": "MS student;Full Professor", "bibtex": "@inproceedings{\nchen2023skdner,\ntitle={{SKD}-{NER}: Continual Named Entity Recognition via Span-based Knowledge Distillation with Reinforcement Learning},\nauthor={Yi Chen and Liang He},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=b1J3WplfgM}\n}", "github": "", "project": "", "reviewers": "8unw;gUVE;JuEv", "site": "https://openreview.net/forum?id=b1J3WplfgM", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;4;4", "excitement": "3;3;3", "reproducibility": "3;3;3", "correctness": "3;2;4", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;1", "aff_unique_norm": "Xinjiang University;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "http://www.xju.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "XJU;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "b1XS87j323", "title": "A Multi-Task Dataset for Assessing Discourse Coherence in Chinese Essays: Structure, Theme, and Logic Analysis", "track": "main", "status": "Long Main", "tldr": "", "abstract": "This paper introduces the \\textbf{C}hinese \\textbf{E}ssay \\textbf{D}iscourse \\textbf{C}oherence \\textbf{C}orpus (\\textbf{CEDCC}), a multi-task dataset for assessing discourse coherence. Existing research tends to focus on isolated dimensions of discourse coherence, a gap which the CEDCC addresses by integrating coherence grading, topical continuity, and discourse relations. This approach, alongside detailed annotations, captures the subtleties of real-world texts and stimulates progress in Chinese discourse coherence analysis. Our contributions include the development of the CEDCC, the establishment of baselines for further research, and the demonstration of the impact of coherence on discourse relation recognition and automated essay scoring. The dataset and related codes is available at \\url{https://github.com/cubenlp/CEDCC_corpus}.", "keywords": "discourse coherence assessment", "primary_area": "", "supplementary_material": "", "author": "Hongyi Wu;Xinshu Shen;Man Lan;Shaoguang Mao;Xiaopeng Bai;Yuanbin Wu", "authorids": "~Hongyi_Wu3;~Xinshu_Shen1;~Man_Lan1;~Shaoguang_Mao1;~Xiaopeng_Bai1;~Yuanbin_Wu1", "gender": "F;F;F;M;M;", "homepage": "https://github.com/qinaidedede0319/;;https://faculty.ecnu.edu.cn/_s16/lm2/main.psp;https://www.linkedin.com/in/shaoguang-mao-929733120/;https://faculty.ecnu.edu.cn/_s5/bxp/main.psp;", "dblp": "78/1033;;01/800;214/0365;;17/7186", "google_scholar": ";;48RFUQgAAAAJ;S6XnZsQAAAAJ;;", "or_profile": "~Hongyi_Wu3;~Xinshu_Shen1;~Man_Lan1;~Shaoguang_Mao1;~Xiaopeng_Bai1;~Yuanbin_Wu1", "aff": "East China Normal University;East China Normal University;East China Normal University;Microsoft;East China Normal University;East China Normal University", "aff_domain": "ecnu.edu.cn;ecnu.edu.cn;ecnu.edu.cn;microsoft.com;ecnu.edu.cn;ecnu.edu.cn", "position": "MS student;MS student;Full Professor;Researcher;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nwu2023a,\ntitle={A Multi-Task Dataset for Assessing Discourse Coherence in Chinese Essays: Structure, Theme, and Logic Analysis},\nauthor={Hongyi Wu and Xinshu Shen and Man Lan and Shaoguang Mao and Xiaopeng Bai and Yuanbin Wu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=b1XS87j323}\n}", "github": "", "project": "", "reviewers": "i1co;FdLx;cwRY", "site": "https://openreview.net/forum?id=b1XS87j323", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "3;2;3", "reproducibility": "3;2;3", "correctness": "3;2;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-9114-4629;;;", "linkedin": ";;;;;", "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "East China Normal University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "http://www.ecnu.edu.cn;https://www.microsoft.com", "aff_unique_abbr": "ECNU;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "id": "b3lGS64ZZK", "title": "A Fair and In-Depth Evaluation of Existing End-to-End Entity Linking Systems", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Existing evaluations of entity linking systems often say little about how the system is going to perform for a particular application. There are two fundamental reasons for this.\nOne is that many evaluations only use aggregate measures (like precision, recall, and F1 score), without a detailed error analysis or a closer look at the results.\nThe other is that all of the widely used benchmarks have strong biases and artifacts, in particular:\na strong focus on named entities, an unclear or missing specification of what else counts as an entity mention,\npoor handling of ambiguities, and an over- or underrepresentation of certain kinds of entities.\n\nWe provide a more meaningful and fair in-depth evaluation of a variety of existing end-to-end entity linkers.\nWe characterize their strengths and weaknesses and also report on reproducibility aspects.\nThe detailed results of our evaluation can be inspected under https://elevant.cs.uni-freiburg.de/emnlp2023.\nOur evaluation is based on several widely used benchmarks, which exhibit the problems mentioned above to various degrees,\nas well as on two new benchmarks, which address the problems mentioned above.\nThe new benchmarks can be found under https://github.com/ad-freiburg/fair-entity-linking-benchmarks.", "keywords": "entity linking;entity linking evaluation;entity linking benchmarks", "primary_area": "", "supplementary_material": "", "author": "Hannah Bast;Matthias Hertel;Natalie Prange", "authorids": "~Hannah_Bast1;~Matthias_Hertel1;~Natalie_Prange1", "gender": "F;M;F", "homepage": "https://ad.informatik.uni-freiburg.de/staff/bast;;", "dblp": "b/HannahBast;173/4726;326/5950", "google_scholar": "hqSjLE8AAAAJ;zP-dN6cAAAAJ;", "or_profile": "~Hannah_Bast1;~Matthias_Hertel1;~Natalie_Prange1", "aff": "Universit\u00e4t Freiburg;Karlsruher Institut f\u00fcr Technologie;University of Freiburg, Albert-Ludwigs-Universit\u00e4t Freiburg", "aff_domain": "uni-freiburg.de;kit.edu;cs.uni-freiburg.de", "position": "Full Professor;PhD student;PhD student", "bibtex": "@inproceedings{\nbast2023a,\ntitle={A Fair and In-Depth Evaluation of Existing End-to-End Entity Linking Systems},\nauthor={Hannah Bast and Matthias Hertel and Natalie Prange},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=b3lGS64ZZK}\n}", "github": "", "project": "", "reviewers": "wuuS;JFFU;PBJb", "site": "https://openreview.net/forum?id=b3lGS64ZZK", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;5;4", "excitement": "4;4;3", "reproducibility": "4;4;5", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-1213-6776;0000-0002-0814-766X;0000-0002-2999-4542", "linkedin": "hannahbast;;", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Freiburg;Karlsruher Institut f\u00fcr Technologie", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-freiburg.de;https://www.kit.edu", "aff_unique_abbr": "Uni Freiburg;KIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Freiburg", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "b5pbHYNJnX", "title": "Rethinking Model Selection and Decoding for Keyphrase Generation with Pre-trained Sequence-to-Sequence Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Keyphrase Generation (KPG) is a longstanding task in NLP with widespread applications. The advent of sequence-to-sequence (seq2seq) pre-trained language models (PLMs) has ushered in a transformative era for KPG, yielding promising performance improvements. However, many design decisions remain unexplored and are often made arbitrarily. This paper undertakes a systematic analysis of the influence of model selection and decoding strategies on PLM-based KPG. We begin by elucidating why seq2seq PLMs are apt for KPG, anchored by an attention-driven hypothesis. We then establish that conventional wisdom for selecting seq2seq PLMs lacks depth: (1) merely increasing model size or performing task-specific adaptation is not parameter-efficient; (2) although combining in-domain pre-training with task adaptation benefits KPG, it does partially hinder generalization. Regarding decoding, we demonstrate that while greedy search achieves strong F1 scores, it lags in recall compared with sampling-based methods. Based on these insights, we propose DeSel, a likelihood-based decode-select algorithm for seq2seq PLMs. DeSel improves greedy search by an average of 4.7% semantic F1 across five datasets. Our collective findings pave the way for deeper future investigations into PLM-based KPG.", "keywords": "Keyphrase Generation;Pre-trained Language Models;Text Generation Decoding;Performance Evaluation", "primary_area": "", "supplementary_material": "", "author": "Di Wu;Wasi Uddin Ahmad;Kai-Wei Chang", "authorids": "~Di_Wu14;~Wasi_Uddin_Ahmad1;~Kai-Wei_Chang1", "gender": "Not Specified;M;M", "homepage": "https://xiaowu0162.github.io/;http://wasiahmad.github.io/;http://kwchang.net", "dblp": "52/328-54.html;183/0576;18/2428", "google_scholar": "vu1pDZgAAAAJ;YCHJZOMAAAAJ;fqDBtzYAAAAJ", "or_profile": "~Di_Wu14;~Wasi_Uddin_Ahmad1;~Kai-Wei_Chang1", "aff": "University of California, Los Angeles;Amazon;Amazon", "aff_domain": "cs.ucla.edu;amazon.com;amazon.com", "position": "PhD student;Applied Scientist;Researcher", "bibtex": "@inproceedings{\nwu2023rethinking,\ntitle={Rethinking Model Selection and Decoding for Keyphrase Generation with Pre-trained Sequence-to-Sequence Models},\nauthor={Di Wu and Wasi Uddin Ahmad and Kai-Wei Chang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=b5pbHYNJnX}\n}", "github": "", "project": "", "reviewers": "vDDj;3t8S;TvwR;FavJ", "site": "https://openreview.net/forum?id=b5pbHYNJnX", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;3;5", "excitement": "3;3;3;4", "reproducibility": "3;3;3;4", "correctness": "4;4;3;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.25, "reproducibility_avg": 3.25, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-5365-0072", "linkedin": ";ahmadwasi/;kai-wei-chang-41239040", "aff_unique_index": "0;1;1", "aff_unique_norm": "University of California, Los Angeles;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.ucla.edu;https://www.amazon.com", "aff_unique_abbr": "UCLA;Amazon", "aff_campus_unique_index": "0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "b6JnUJxOpN", "title": "Temporal Extrapolation and Knowledge Transfer for Lifelong Temporal Knowledge Graph Reasoning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Real-world Temporal Knowledge Graphs keep growing with time and new entities and facts emerge continually, necessitating a model that can extrapolate to future timestamps and transfer knowledge for new components. Therefore, our work first dives into this more realistic issue, lifelong TKG reasoning, where existing methods can only address part of the challenges. Specifically, we formulate lifelong TKG reasoning as a temporal-path-based reinforcement learning (RL) framework. Then, we add temporal displacement into the action space of RL to extrapolate for the future and further propose a temporal-rule-based reward shaping to guide the training. To transfer and update knowledge, we design a new edge-aware message passing module, where the embeddings of new entities and edges are inductive. We conduct extensive experiments on three newly constructed benchmarks for lifelong TKG reasoning. Experimental results show the outperforming effectiveness of our model against all well-adapted baselines.", "keywords": "Temporal Knowledge Graph;Lifelong Reasoning;Temporal Extrapolation;Knowledge Transfer.", "primary_area": "", "supplementary_material": "", "author": "Zhongwu Chen;Chengjin Xu;Fenglong Su;Zhen Huang;Yong Dou", "authorids": "~Zhongwu_Chen1;~Chengjin_Xu1;~Fenglong_Su1;~Zhen_Huang3;~Yong_Dou1", "gender": ";M;M;M;M", "homepage": ";https://soledad921.github.io/chengjin_xu/;;;", "dblp": ";247/6268.html;205/0212.html;22/3870-6;76/305", "google_scholar": ";https://scholar.google.de/citations?user=sIts5VgAAAAJ;;;", "or_profile": "~Zhongwu_Chen1;~Chengjin_Xu1;~Fenglong_Su1;~Zhen_Huang3;~Yong_Dou1", "aff": ";University of Bonn;;National University of Defense Technology;National University of Defense Technology", "aff_domain": ";uni-bonn.de;;nudt.edu.cn;nudt.edu.cn", "position": ";PhD student;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nchen2023temporal,\ntitle={Temporal Extrapolation and Knowledge Transfer for Lifelong Temporal Knowledge Graph Reasoning},\nauthor={Zhongwu Chen and Chengjin Xu and Fenglong Su and Zhen Huang and Yong Dou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=b6JnUJxOpN}\n}", "github": "", "project": "", "reviewers": "kB2c;dRYt;NDHZ", "site": "https://openreview.net/forum?id=b6JnUJxOpN", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "4;3;3", "reproducibility": "4;3;4", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-4819-373X;", "linkedin": ";;;;", "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Bonn;National University of Defense Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-bonn.de/;http://www.nudt.edu.cn/", "aff_unique_abbr": "UBonn;NUDT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Germany;China" }, { "id": "b6e1wV03hy", "title": "Retrieval-Augmented Few-shot Text Classification", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Retrieval-augmented methods are successful in the standard scenario where the retrieval space is sufficient; whereas in the few-shot scenario with limited retrieval space, this paper shows it is non-trivial to put them into practice. First, it is impossible to retrieve semantically similar examples by using an off-the-shelf metric and it is crucial to learn a task-specific retrieval metric; Second, our preliminary experiments demonstrate that it is difficult to optimize a plausible metric by minimizing the standard cross-entropy loss.\nThe in-depth analyses quantitatively show minimizing cross-entropy loss suffers from the weak supervision signals and the severe gradient vanishing issue during the optimization. To address these issues, we introduce two novel training objectives, namely EM-L and R-L, which provide more task-specific guidance to the retrieval metric by the EM algorithm and a ranking-based loss, respectively. Extensive experiments on $10$ datasets prove the superiority of the proposed retrieval augmented methods on the performance.", "keywords": "Retrieval-augmented methods;Few-shot text classification", "primary_area": "", "supplementary_material": "", "author": "Guoxin Yu;Lemao Liu;Haiyun Jiang;Shuming Shi;Xiang Ao", "authorids": "~Guoxin_Yu1;~Lemao_Liu3;~Haiyun_Jiang1;~Shuming_Shi1;~Xiang_Ao2", "gender": "F;M;M;M;M", "homepage": "https://github.com/CatYu98/;;;https://aoxaustin.github.io/;https://lemaoliu.github.io/homepage/", "dblp": "https://dblp.uni-trier.de/pid/60/6415;;s/ShumingShi;71/1982-1;41/10887.html", "google_scholar": "6FRu3tAAAAAJ;fk684xEAAAAJ;Lg31AKMAAAAJ;W8wrWfMAAAAJ;", "or_profile": "~Guoxin_Yu1;~Haiyun_Jiang1;~Shuming_Shi1;~Xiang_Ao2;~lemao_liu1", "aff": "Institute of Computing Technology (ICT), CAS;Tencent AI Lab;Tencent AI Lab;Institute of Computing Technology, Chinese Academy of Sciences;Tencent", "aff_domain": "ict.ac.cn;tencent.com;tencent.com;ict.ac.cn;tencent.com", "position": "PhD student;Researcher;Principal Researcher;Associate Professor;Researcher", "bibtex": "@inproceedings{\nyu2023retrievalaugmented,\ntitle={Retrieval-Augmented Few-shot Text Classification},\nauthor={Guoxin Yu and Lemao Liu and Haiyun Jiang and Shuming Shi and Xiang Ao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=b6e1wV03hy}\n}", "github": "", "project": "", "reviewers": "VkCo;U8Tr;uG9t", "site": "https://openreview.net/forum?id=b6e1wV03hy", "pdf_size": 0, "rating": "2;2;2", "confidence": "3;3;3", "excitement": "4;4;4", "reproducibility": "5;4;3", "correctness": "4;4;3", "rating_avg": 2.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-9633-8361;", "linkedin": ";;;;", "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "Chinese Academy of Sciences;Tencent", "aff_unique_dep": "Institute of Computing Technology;Tencent AI Lab", "aff_unique_url": "http://www.ict.cas.cn;https://ai.tencent.com", "aff_unique_abbr": "CAS;Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "b7ZJcAkjC3", "title": "Enhancing Structured Evidence Extraction for Fact Verification", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Open-domain fact verification is the task of verifying claims in natural language texts against extracted evidence. FEVEROUS is a benchmark that requires extracting and integrating both unstructured and structured evidence to verify a given claim. Previous models suffer from low recall of structured evidence extraction, i.e., table extraction and cell selection. In this paper, we propose a simple but effective method to enhance the extraction of structured evidence by leveraging the row and column semantics of tables. Our method comprises two components: (i) a coarse-grained table extraction module that selects tables based on rows and columns relevant to the claim and (ii) a fine-grained cell selection graph that combines both formats of evidence and enables multi-hop and numerical reasoning. We evaluate our method on FEVEROUS and achieve an evidence recall of $60.01\\%$ on the test set, which is $6.14\\%$ higher than the previous state-of-the-art performance. Our results demonstrate that our method can extract tables and select cells effectively, and provide better evidence sets for verdict prediction. Our code is released at https://github.com/WilliamZR/see-st", "keywords": "fact verification; evidence extraction", "primary_area": "", "supplementary_material": "", "author": "Zirui Wu;Nan Hu;Yansong Feng", "authorids": "~Zirui_Wu1;~Nan_Hu3;~Yansong_Feng1", "gender": "M;F;M", "homepage": "https://williamzr.github.io/;;https://yansongfeng.github.io/", "dblp": "276/2418;;25/2643-2.html", "google_scholar": "lafBqa8AAAAJ;20fNgw8AAAAJ;https://scholar.google.com.tw/citations?user=67qAw_wAAAAJ", "or_profile": "~Zirui_Wu1;~Nan_Hu3;~Yansong_Feng1", "aff": "Yuanpei College, Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "Undergrad student;MS student;Associate Professor", "bibtex": "@inproceedings{\nwu2023enhancing,\ntitle={Enhancing Structured Evidence Extraction for Fact Verification},\nauthor={Zirui Wu and Nan Hu and Yansong Feng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=b7ZJcAkjC3}\n}", "github": "", "project": "", "reviewers": "cEAj;W4G6;CvBB", "site": "https://openreview.net/forum?id=b7ZJcAkjC3", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "4;3;3", "reproducibility": "4;2;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "Yuanpei College", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "PKU", "aff_campus_unique_index": "0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "b7gtyaaM2y", "title": "Towards General Error Diagnosis via Behavioral Testing in Machine Translation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Behavioral testing offers a crucial means of diagnosing linguistic errors and assessing capabilities of NLP models. However, applying behavioral testing to machine translation (MT) systems is challenging as it generally requires human efforts to craft references for evaluating the translation quality of such systems on newly generated test cases. Existing works in behavioral testing of MT systems circumvent this by evaluating translation quality without references, but this restricts diagnosis to specific types of errors, such as incorrect translation of single numeric or currency words. In order to diagnose general errors, this paper proposes a new Bilingual Translation Pair Generation based Behavior Testing (BTPGBT) framework for conducting behavioral testing of MT systems. The core idea of BTPGBT is to employ a novel bilingual translation pair generation (BTPG) approach that automates the construction of high-quality test cases and their pseudoreferences. Experimental results on various MT systems demonstrate that BTPGBT could provide comprehensive and accurate behavioral testing results for general error diagnosis, which further leads to several insightful findings. Our code and data are available at https: //github.com/wujunjie1998/BTPGBT.", "keywords": "behavioral testing;machine translation", "primary_area": "", "supplementary_material": "", "author": "Junjie Wu;Lemao Liu;Dit-Yan Yeung", "authorids": "~Junjie_Wu2;~Lemao_Liu3;~Dit-Yan_Yeung2", "gender": ";;M", "homepage": ";;https://cse.hkust.edu.hk/faculty/dyyeung/", "dblp": ";;41/5668", "google_scholar": ";;nEsOOx8AAAAJ", "or_profile": "~Junjie_Wu2;~Lemao_Liu3;~Dit-Yan_Yeung2", "aff": ";;Hong Kong University of Science and Technology", "aff_domain": ";;ust.hk", "position": ";;Chair Professor", "bibtex": "@inproceedings{\nwu2023towards,\ntitle={Towards General Error Diagnosis via Behavioral Testing in Machine Translation},\nauthor={Junjie Wu and Lemao Liu and Dit-Yan Yeung},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=b7gtyaaM2y}\n}", "github": "", "project": "", "reviewers": "Qgqn;59vL;5gEN", "site": "https://openreview.net/forum?id=b7gtyaaM2y", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;2;3", "excitement": "4;3;3", "reproducibility": "1;3;3", "correctness": "4;3;3", "rating_avg": 2.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-3716-8125", "linkedin": ";;", "aff_unique_index": "0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "bB32QLrpu4", "title": "Granularity Matters: Pathological Graph-driven Cross-modal Alignment for Brain CT Report Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The automatic Brain CT reports generation can improve the efficiency and accuracy of diagnosing cranial diseases. However, current methods are limited by 1) coarse-grained supervision: the training data in image-text format lacks detailed supervision for recognizing subtle abnormalities, and 2) coupled cross-modal alignment: visual-textual alignment may be inevitably coupled in a coarse-grained manner, resulting in tangled feature representation for report generation. In this paper, we propose a novel Pathological Graph-driven Cross-modal Alignment (PGCA) model for accurate and robust Brain CT report generation. Our approach effectively decouples the cross-modal alignment by constructing a Pathological Graph to learn fine-grained visual cues and align them with textual words. This graph comprises heterogeneous nodes representing essential pathological attributes (i.e., tissue and lesion) connected by intra- and inter-attribute edges with prior domain knowledge. Through carefully designed graph embedding and updating modules, our model refines the visual features of subtle tissues and lesions and aligns them with textual words using contrastive learning. Extensive experimental results confirm the viability of our method. We believe that our PGCA model holds the potential to greatly enhance the automatic generation of Brain CT reports and ultimately contribute to improved cranial disease diagnosis.", "keywords": "Medical Report Generation;Contrastive Learning;Knowledge Graph;Brain CT", "primary_area": "", "supplementary_material": "", "author": "Yanzhao Shi;Junzhong Ji;Xiaodan Zhang;Liangqiong Qu;Ying Liu", "authorids": "~Yanzhao_Shi1;~Junzhong_Ji1;~Xiaodan_Zhang1;~Liangqiong_Qu2;~Ying_Liu20", "gender": "M;M;F;F;F", "homepage": "https://yanzhaoshi.github.io/;https://xxxb.bjut.edu.cn/info/1409/2196.htm;https://zhangxiaodan-bjut.github.io/;https://liangqiong.github.io/;", "dblp": "299/4620;52/1893;29/2631-3;149/2634;", "google_scholar": "pRARRDQAAAAJ;;https://scholar.google.com/citations?view_op=list_works;ruKpgzwAAAAJ;", "or_profile": "~Yanzhao_Shi1;~Junzhong_Ji1;~Xiaodan_Zhang1;~Liangqiong_Qu2;~Ying_Liu20", "aff": "Beijing University of Technology ;Beijing University of Technology;Beijing University of Technology;University of Hong Kong;Peking University Third Hospital", "aff_domain": "bjut.edu.cn;bjut.edu.cn;bjut.edu.cn;hku.hk;bjmu.edu.cn", "position": "MS student;Full Professor;Associate Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nshi2023granularity,\ntitle={Granularity Matters: Pathological Graph-driven Cross-modal Alignment for Brain {CT} Report Generation},\nauthor={Yanzhao Shi and Junzhong Ji and Xiaodan Zhang and Liangqiong Qu and Ying Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=bB32QLrpu4}\n}", "github": "", "project": "", "reviewers": "kj9V;gRk7;nJDR", "site": "https://openreview.net/forum?id=bB32QLrpu4", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;2", "excitement": "4;4;4", "reproducibility": "4;4;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-5653-7630;;0000-0001-7002-5447;0000-0001-8235-7852;0000-0002-0007-9519", "linkedin": "YanzhaoShi/;;;;", "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Beijing University of Technology;University of Hong Kong;Peking University Third Hospital", "aff_unique_dep": ";;", "aff_unique_url": "http://www.bjut.edu.cn;https://www.hku.hk;http://www.puh3.net.cn", "aff_unique_abbr": "BJUT;HKU;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "bNeDLx5O6w", "title": "MAF: Multi-Aspect Feedback for Improving Reasoning in Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Language Models (LMs) have shown impressive performance in various natural language tasks. However, when it comes to natural language reasoning, LMs still face challenges such as hallucination, generating incorrect intermediate reasoning steps, and making mathematical errors. Recent research has focused on enhancing LMs through *self-improvement* using feedback. Nevertheless, existing approaches relying on a single generic feedback source fail to address the diverse error types found in LM-generated reasoning chains. In this work, we propose **Multi-Aspect Feedback**, an iterative refinement framework that integrates multiple feedback modules, including frozen LMs and external tools, each focusing on a specific error category. Our experimental results demonstrate the efficacy of our approach to addressing several errors in the LM-generated reasoning chain and thus improving the overall performance of an LM in several reasoning tasks. We see an improvement of up to 20% in Mathematical Reasoning and up to 18% in Logical Entailment.", "keywords": "large language models;feedback generation;reasoning;self-correction;in-context learning;prompting;prompt engineering;error correction", "primary_area": "", "supplementary_material": "", "author": "Deepak Nathani;David Wang;Liangming Pan;William Yang Wang", "authorids": "~Deepak_Nathani2;~David_Wang4;~Liangming_Pan1;~William_Yang_Wang2", "gender": "M;M;M;M", "homepage": "https://www.dnathani.net;;https://liangmingpan.bio;https://www.cs.ucsb.edu/~william/", "dblp": "222/9844;;186/9707;08/9282", "google_scholar": "ItTsP6IAAAAJ;;JcjjOTUAAAAJ;gf8Ms_8AAAAJ", "or_profile": "~Deepak_Nathani2;~David_Wang4;~Liangming_Pan1;~William_Wang1", "aff": "Amazon;University of California, Santa Barbara;University of California, Santa Barbara;UC Santa Barbara", "aff_domain": "amazon.com;ucsb.edu;ucsb.edu;ucsb.edu", "position": "Intern;Undergrad student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nnathani2023maf,\ntitle={{MAF}: Multi-Aspect Feedback for Improving Reasoning in Large Language Models},\nauthor={Deepak Nathani and David Wang and Liangming Pan and William Yang Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=bNeDLx5O6w}\n}", "github": "", "project": "", "reviewers": "J3st;Bg9c;zRzu", "site": "https://openreview.net/forum?id=bNeDLx5O6w", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;4;5", "reproducibility": "3;3;5", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "deepak-nathani/;dayvidwang/;;", "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Amazon;University of California, Santa Barbara", "aff_unique_dep": "Amazon.com, Inc.;", "aff_unique_url": "https://www.amazon.com;https://www.ucsb.edu", "aff_unique_abbr": "Amazon;UCSB", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Santa Barbara", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "bQLMv4v0Gc", "title": "Construction Artifacts in Metaphor Identification Datasets", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Metaphor identification aims at understanding whether a given expression is used figuratively in context. However, in this paper we show how existing metaphor identification datasets can be gamed by fully ignoring the potential metaphorical expression or the context in which it occurs. We test this hypothesis in a variety of datasets and settings, and show that metaphor identification systems based on language models without complete information can be competitive with those using the full context. This is due to the construction procedures to build such datasets, which introduce unwanted biases for positive and negative classes. Finally, we test the same hypothesis on datasets that are carefully sampled from natural corpora and where this bias is not present, making these datasets more challenging and reliable.", "keywords": "metaphors;resources;bias", "primary_area": "", "supplementary_material": "", "author": "Joanne Boisson;Luis Espinosa-Anke;Jose Camacho-Collados", "authorids": "~Joanne_Boisson2;~Luis_Espinosa-Anke1;~Jose_Camacho-Collados1", "gender": "F;M;M", "homepage": ";http://www.luisespinosa.net;http://www.josecamachocollados.com", "dblp": "136/8721;140/3490.html;165/0790", "google_scholar": "o4N5Tk4AAAAJ;;NP4KdQQAAAAJ", "or_profile": "~Joanne_Boisson2;~Luis_Espinosa-Anke1;~Jose_Camacho-Collados1", "aff": "Cardiff University;AMPLYFI;Cardiff University", "aff_domain": "cardiff.ac.uk;amplyfi.com;cardiff.ac.uk", "position": "PhD student;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nboisson2023construction,\ntitle={Construction Artifacts in Metaphor Identification Datasets},\nauthor={Joanne Boisson and Luis Espinosa-Anke and Jose Camacho-Collados},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=bQLMv4v0Gc}\n}", "github": "", "project": "", "reviewers": "JUSd;CchZ;PbYa", "site": "https://openreview.net/forum?id=bQLMv4v0Gc", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;5", "excitement": "3;4;3", "reproducibility": "4;4;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "joanne-boisson-b5b2638b/;;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Cardiff University;AMPLYFI", "aff_unique_dep": ";", "aff_unique_url": "https://www.cardiff.ac.uk;", "aff_unique_abbr": "Cardiff;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom;" }, { "id": "bSfBgrmabV", "title": "Dual-Feedback Knowledge Retrieval for Task-Oriented Dialogue Systems", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Efficient knowledge retrieval plays a pivotal role in ensuring the success of end-to-end task-oriented dialogue systems by facilitating the selection of relevant information necessary to fulfill user requests. However, current approaches generally integrate knowledge retrieval and response generation, which poses scalability challenges when dealing with extensive knowledge bases. Taking inspiration from open-domain question answering, we propose a retriever-generator architecture that harnesses a retriever to retrieve pertinent knowledge and a generator to generate system responses. Due to the lack of retriever training labels, we propose relying on feedback from the generator as pseudo-labels to train the retriever. To achieve this, we introduce a dual-feedback mechanism that generates both positive and negative feedback based on the output of the generator. Our method demonstrates superior performance in task-oriented dialogue tasks, as evidenced by experimental results on three benchmark datasets.", "keywords": "End-to-End Task-Oriented Dialogue System;Knowledge Retrieval;Retriever training", "primary_area": "", "supplementary_material": "", "author": "Tianyuan Shi;Liangzhi Li;Zijian Lin;Tao Yang;Xiaojun Quan;Qifan Wang", "authorids": "~Tianyuan_Shi1;~Liangzhi_Li1;~Zijian_Lin1;~Tao_Yang13;~Xiaojun_Quan1;~Qifan_Wang2", "gender": "M;M;M;M;M;M", "homepage": "https://www.sysu.edu.cn/;;;https://taoyang225.github.io/;https://sites.google.com/site/xiaojunquan/;https://wqfcr.github.io/", "dblp": "341/4890;169/4123;;;90/5936;33/8610", "google_scholar": ";JIRw_tMAAAAJ;;i3to2x8AAAAJ;dRpg4t8AAAAJ;LrSyLosAAAAJ", "or_profile": "~Tianyuan_Shi1;~Liangzhi_Li1;~Zijian_Lin1;~Tao_Yang13;~Xiaojun_Quan1;~Qifan_Wang2", "aff": "SUN YAT-SEN UNIVERSITY;Osaka University;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;Meta AI", "aff_domain": "sysu.edu.cn;osaka-u.ac.jp;sysu.edu.cn;sysu.edu.cn;sysu.edu.cn;fb.com", "position": "PhD student;Assistant Professor;Undergrad student;PhD student;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nshi2023dualfeedback,\ntitle={Dual-Feedback Knowledge Retrieval for Task-Oriented Dialogue Systems},\nauthor={Tianyuan Shi and Liangzhi Li and Zijian Lin and Tao Yang and Xiaojun Quan and Qifan Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=bSfBgrmabV}\n}", "github": "", "project": "", "reviewers": "gvAr;iQGR;ctRL", "site": "https://openreview.net/forum?id=bSfBgrmabV", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "3;3;4", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-5086-4657;;;0000-0002-7570-5756", "linkedin": ";;;;;", "aff_unique_index": "0;1;0;0;0;2", "aff_unique_norm": "Sun Yat-sen University;Osaka University;Meta", "aff_unique_dep": ";;Meta AI", "aff_unique_url": "http://www.sysu.edu.cn;https://www.osaka-u.ac.jp;https://meta.com", "aff_unique_abbr": "SYSU;Osaka U;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;2", "aff_country_unique": "China;Japan;United States" }, { "id": "bVO1sWgnTx", "title": "Efficient Classification of Long Documents via State-Space Models", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Transformer-based models have achieved state-of-the-art performance on numerous NLP applications. However, long documents which are prevalent in real-world scenarios cannot be efficiently processed by transformers with the vanilla self-attention module due to their quadratic computation complexity and limited length extrapolation ability. Instead of tackling the computation difficulty for self-attention with sparse or hierarchical structures, in this paper, we investigate the use of State-Space Models (SSMs) for long document classification tasks. \nWe conducted extensive experiments on six long document classification datasets, including binary, multi-class, and multi-label classification, comparing SSMs (with and without pre-training) to self-attention-based models. We also introduce the SSM-pooler model and demonstrate that it achieves comparable performance while being on average 36\\% more efficient. Additionally our method exhibits higher robustness to the input noise even in the extreme scenario of 40\\%.", "keywords": "Long Document Classification;State Space Models;Efficient NLP", "primary_area": "", "supplementary_material": "", "author": "Peng Lu;Suyuchen Wang;Mehdi Rezagholizadeh;Bang Liu;Ivan Kobyzev", "authorids": "~Peng_Lu6;~Suyuchen_Wang1;~Mehdi_Rezagholizadeh1;~Bang_Liu1;~Ivan_Kobyzev1", "gender": "M;M;M;M;", "homepage": ";https://suyuchen.wang/;;http://www-labs.iro.umontreal.ca/~liubang/;", "dblp": ";264/2706;;;", "google_scholar": "c4xAa8gAAAAJ;fiy_i68AAAAJ;MvXlF6kAAAAJ;lmfAnP4AAAAJ;", "or_profile": "~Peng_Lu6;~Suyuchen_Wang1;~Mehdi_Rezagholizadeh1;~Bang_Liu1;~Ivan_Kobyzev1", "aff": "University of Montreal;Mila - Quebec Artificial Intelligence Institute;Huawei Technologies Ltd.;University of Montreal;", "aff_domain": "umontreal.ca;mila.quebec;huawei.com;umontreal.ca;", "position": "PhD student;PhD student;Principal Researcher;Assistant Professor;", "bibtex": "@inproceedings{\nlu2023efficient,\ntitle={Efficient Classification of Long Documents via State-Space Models},\nauthor={Peng Lu and Suyuchen Wang and Mehdi Rezagholizadeh and Bang Liu and Ivan Kobyzev},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=bVO1sWgnTx}\n}", "github": "", "project": "", "reviewers": "VUdY;FpWq;Y6PV", "site": "https://openreview.net/forum?id=bVO1sWgnTx", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;4", "excitement": "3;3;3", "reproducibility": "4;3;3", "correctness": "4;4;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-9483-8984;", "linkedin": "peng-lu-211b7617a/;suyuchenwang/;;bang-liu-12b66789/?originalSubdomain=ca;", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Montreal;Quebec Artificial Intelligence Institute;Huawei", "aff_unique_dep": ";Artificial Intelligence;Huawei Technologies", "aff_unique_url": "https://wwwumontreal.ca;https://mila.quebec;https://www.huawei.com", "aff_unique_abbr": "UM;Mila;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Canada;China" }, { "id": "bWXIut4pNM", "title": "INGENIOUS: Using Informative Data Subsets for Efficient Pre-Training of Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "A salient characteristic of pre-trained language models (PTLMs) is a remarkable improvement in their generalization capability and emergence of new capabilities with increasing model capacity and pre-training dataset size. Consequently, we are witnessing the development of enormous models pushing the state-of-the-art. It is, however, imperative to realize that this inevitably leads to prohibitively long training times, extortionate computing costs, and a detrimental environmental impact. Significant efforts are underway to make PTLM training more efficient through innovations in model architectures, training pipelines, and loss function design, with scant attention being paid to optimizing the utility of training data. The key question that we ask is whether it is possible to train PTLMs by employing only highly informative subsets of the training data while maintaining downstream performance? Building upon the recent progress in informative data subset selection, we show how we can employ submodular optimization to select highly representative subsets of the training corpora and demonstrate that the proposed framework can be applied to efficiently train multiple PTLMs (BERT, BioBERT, GPT-2) using only a fraction of data. Further, we perform a rigorous empirical evaluation to show that the resulting models achieve up to $\\sim99\\%$ of the performance of the fully-trained models. We made our framework publicly available at \\url{https://github.com/Efficient-AI/ingenious}.", "keywords": "Efficient training;Language Models;Data selection", "primary_area": "", "supplementary_material": "", "author": "H S V N S Kowndinya Renduchintala;Krishnateja Killamsetty;Sumit Bhatia;Milan Aggarwal;Ganesh Ramakrishnan;Rishabh K Iyer;Balaji Krishnamurthy", "authorids": "~H_S_V_N_S_Kowndinya_Renduchintala1;~Krishnateja_Killamsetty1;~Sumit_Bhatia1;~Milan_Aggarwal2;~Ganesh_Ramakrishnan1;~Rishabh_K_Iyer2;~Balaji_Krishnamurthy1", "gender": "M;M;;M;M;M;M", "homepage": "https://kowndinya-renduchintala.github.io/;https://krishnatejakillamsetty.me;http://sumitbhatia.net/;https://www.cse.iitb.ac.in/~ganesh/;https://www.rishiyer.com;;", "dblp": ";https://dblp.uni-trier.de/pid/273/3972;52/7536;r/GaneshRamakrishnan;37/10544.html;79/1076;206/6244.html", "google_scholar": "ekUSmaEAAAAJ;cHDE-2YAAAAJ;8HVTWNkAAAAJ;https://scholar.google.com/scholar?hl=hi;l_XxJ1kAAAAJ;n8iUBg8AAAAJ;YiMNG_QAAAAJ", "or_profile": "~H_S_V_N_S_Kowndinya_Renduchintala1;~Krishnateja_Killamsetty1;~Sumit_Bhatia1;~Ganesh_Ramakrishnan1;~Rishabh_K_Iyer2;~Balaji_Krishnamurthy1;~Milan_Aggarwal1", "aff": "Indian Institute of Technology Bombay;University of Texas, Dallas;Adobe Systems;Indian Institute of Technology Bombay, Indian Institute of Technology Bombay;Microsoft;Adobe Systems;Adobe Systems", "aff_domain": "iitb.ac.in;utdallas.edu;adobe.com;cse.iitb.ac.in;microsoft.com;adobe.com;adobe.com", "position": "Undergrad student;PhD student;Senior ML Scientist;Full Professor;Research Scientist;Principal Scientist;Researcher", "bibtex": "@inproceedings{\nrenduchintala2023ingenious,\ntitle={{INGENIOUS}: Using Informative Data Subsets for Efficient Pre-Training of Language Models},\nauthor={H S V N S Kowndinya Renduchintala and Krishnateja Killamsetty and Sumit Bhatia and Milan Aggarwal and Ganesh Ramakrishnan and Rishabh K Iyer and Balaji Krishnamurthy},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=bWXIut4pNM}\n}", "github": "", "project": "", "reviewers": "NYV4;m6xK;Ex9w;ETAv", "site": "https://openreview.net/forum?id=bWXIut4pNM", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "2;4;3;4", "excitement": "3;3;3;4", "reproducibility": "5;4;3;4", "correctness": "2;2;2;4", "rating_avg": 3.0, "confidence_avg": 3.25, "excitement_avg": 3.25, "reproducibility_avg": 4.0, "correctness_avg": 2.5, "replies_avg": 14, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0002-0366-2427;", "linkedin": "kowndinya-renduchintala/;krishnateja-killamsetty/;;;rishabh-iyer-36893717/;balaji-krishnamurthy-4241695/;milan-aggarwal-31a954b5/", "aff_unique_index": "0;1;2;0;3;2;2", "aff_unique_norm": "Indian Institute of Technology Bombay;University of Texas at Dallas;Adobe;Microsoft", "aff_unique_dep": ";;Adobe Systems Incorporated;Microsoft Corporation", "aff_unique_url": "https://www.iitb.ac.in;https://www.utdallas.edu;https://www.adobe.com;https://www.microsoft.com", "aff_unique_abbr": "IIT Bombay;UT Dallas;Adobe;Microsoft", "aff_campus_unique_index": "0;1;0", "aff_campus_unique": "Bombay;Dallas;", "aff_country_unique_index": "0;1;1;0;1;1;1", "aff_country_unique": "India;United States" }, { "id": "bZel7wM6fN", "title": "Understanding the Inner-workings of Language Models Through Representation Dissimilarity", "track": "main", "status": "Short Main", "tldr": "", "abstract": "As language models are applied to an increasing number of real-world applications, understanding their inner workings has become an important issue in model trust, interpretability, and transparency. In this work we show that representation dissimilarity measures, which are functions that measure the extent to which two model's internal representations differ, can be a valuable tool for gaining insight into the mechanics of language models. Among our insights are: (i) an apparent asymmetry in the internal representations of model using SoLU and GeLU activation functions, (ii) evidence that dissimilarity measures can identify and locate generalization properties of models that are invisible via in-distribution test set performance, and (iii) new evaluations of how language model features vary as width and depth are increased. Our results suggest that dissimilarity measures are a promising set of tools for shedding light on the inner workings of language models.", "keywords": "language models;interpretability;stitching", "primary_area": "", "supplementary_material": "", "author": "Davis Brown;Charles Godfrey;Nicholas Konz;Jonathan Tu;Henry Kvinge", "authorids": "~Davis_Brown1;~Charles_Godfrey1;~Nicholas_Konz1;~Jonathan_Tu1;~Henry_Kvinge1", "gender": ";M;M;M;", "homepage": "https://davisrbrown.com/;https://godfrey-cw.github.io/;https://nickk124.github.io/;;https://hkvinge.github.io/", "dblp": "304/3144;317/0066;307/2984;;223/4356", "google_scholar": "https://scholar.google.com/citations?hl=en;yfT92d4AAAAJ;https://scholar.google.com/citations?hl=en;;vfFn_QsAAAAJ", "or_profile": "~Davis_Brown1;~Charles_Godfrey1;~Nicholas_Konz1;~Jonathan_Tu1;~Henry_Kvinge1", "aff": "Pacific Northwest National Laboratory;Pacific Northwest National Laboratory;Pacific Northwest National Laboratory;;Pacific Northwest National Laboratory", "aff_domain": "pnnl.gov;pnnl.gov;pnnl.gov;;pnnl.gov", "position": "Researcher;Postdoc;Intern;;Principal Researcher", "bibtex": "@inproceedings{\nbrown2023understanding,\ntitle={Understanding the Inner-workings of Language Models Through Representation Dissimilarity},\nauthor={Davis Brown and Charles Godfrey and Nicholas Konz and Jonathan Tu and Henry Kvinge},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=bZel7wM6fN}\n}", "github": "", "project": "", "reviewers": "ur3B;Dvu9;Mj3F;EMMQ;rr9F", "site": "https://openreview.net/forum?id=bZel7wM6fN", "pdf_size": 0, "rating": "5;5;5;5;5", "confidence": "3;3;3;1;5", "excitement": "4;4;4;4;4", "reproducibility": "4;3;3;3;5", "correctness": "4;4;4;3;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 3.6, "correctness_avg": 3.8, "replies_avg": 17, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-1698-2718;0000-0003-0230-1598;;", "linkedin": ";godfrey-cw/;nick-konz-247988168;jonathan-tu/;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Pacific Northwest National Laboratory", "aff_unique_dep": "", "aff_unique_url": "https://www.pnnl.gov", "aff_unique_abbr": "PNNL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "bc2xgl7oGf", "title": "The Iron(ic) Melting Pot: Reviewing Human Evaluation in Humour, Irony and Sarcasm Generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Human evaluation in often considered to be the gold standard method of evaluating a Natural Language Generation system. However, whilst its importance is accepted by the community at large, the quality of its execution is often brought into question. In this position paper, we argue that the generation of more esoteric forms of language - humour, irony and sarcasm - constitutes a subdomain where the characteristics of selected evaluator panels are of utmost importance, and every effort should be made to report demographic characteristics wherever possible, in the interest of transparency and replicability. We support these claims with an overview of each language form and an analysis of examples in terms of how their interpretation is affected by different participant variables. We additionally perform a critical survey of recent works in NLG to assess how well evaluation procedures are reported in this subdomain, and note a severe lack of open reporting of evaluator demographic information, and a significant reliance on crowdsourcing platforms for recruitment.", "keywords": "human evaluation;humour;sarcasm;irony;natural language generation;position paper;critical survey;sociology", "primary_area": "", "supplementary_material": "", "author": "Tyler Loakman;Aaron Maladry;Chenghua Lin", "authorids": "~Tyler_Loakman1;~Aaron_Maladry1;~Chenghua_Lin1", "gender": "M;M;", "homepage": ";https://www.lt3.ugent.be/people/aaron-maladry/;", "dblp": "331/6135;321/0584;", "google_scholar": "EIdFfNAAAAAJ;https://scholar.google.com/citations?hl=en;", "or_profile": "~Tyler_Loakman1;~Aaron_Maladry1;~Chenghua_Lin1", "aff": "University of Sheffield;Universiteit Gent;", "aff_domain": "shef.ac.uk;ugent.be;", "position": "PhD student;PhD student;", "bibtex": "@inproceedings{\nloakman2023the,\ntitle={The Iron(ic) Melting Pot: Reviewing Human Evaluation in Humour, Irony and Sarcasm Generation},\nauthor={Tyler Loakman and Aaron Maladry and Chenghua Lin},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=bc2xgl7oGf}\n}", "github": "", "project": "", "reviewers": "bfpd;Txig;4dTM", "site": "https://openreview.net/forum?id=bc2xgl7oGf", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;5;4", "excitement": "4;4;2", "reproducibility": "4;5;4", "correctness": "3;4;2", "rating_avg": 4.0, "confidence_avg": 4.666666666666667, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-5333-7780;0000-0003-2857-5132;", "linkedin": "tyler-loakman/?originalSubdomain=uk;;", "aff_unique_index": "0;1", "aff_unique_norm": "University of Sheffield;University of Ghent", "aff_unique_dep": ";", "aff_unique_url": "https://www.sheffield.ac.uk;https://www.ugent.be/en", "aff_unique_abbr": "Sheffield;UGent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;Belgium" }, { "id": "bdgUPZhF9b", "title": "InstructoR: Instructing Unsupervised Conversational Dense Retrieval with Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Compared to traditional single-turn ad-hoc retrieval, conversational retrieval needs to handle the multi-turn conversation and understand the user\u2019s real query intent. However, most existing methods simply fine-tune the pre-trained ad-hoc retriever on limited supervised data, making it challenging for the retriever to fully grasp the entirety of the conversation. In this paper, we find that large language models (LLMs) can accurately discover the user\u2019s query intent from the complex conversation context and provide the supervised signal to instruct the retriever in an unsupervised manner. Therefore, we propose a novel method termed InstructoR to Instruct unsupervised conversational dense Retrieval with LLMs. We design an unsupervised training framework that employs LLMs to estimate the session-passage relevance score as the soft label to guide the retriever's training. Specially, we devise three instructing strategies from context, query and response perspectives to calculate the relevance score more precisely, including conversational retrieval as conversation generation, question rewrite as latent variable and question response as posterior guide. Experimental results show InstructoR can bring significant improvements across various ad-hoc retrievers, even surpassing the current supervised state-of-the-art method. We also demonstrate the effectiveness of our method under low-resource and zero-shot settings. Our code is publicly available at https://github.com/jinzhuoran/InstructoR/.", "keywords": "Conversational Dense Retrieval;Unsupervised Information Retrieval;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "Zhuoran Jin;Pengfei Cao;Yubo Chen;Kang Liu;Jun Zhao", "authorids": "~Zhuoran_Jin1;~Pengfei_Cao1;~Yubo_Chen1;~Kang_Liu1;~Jun_Zhao4", "gender": "M;;M;M;M", "homepage": "https://scholar.google.com/citations?user=Am8WsCkAAAAJ;https://cpf-nlpr.github.io/;http://www.nlpr.ia.ac.cn/cip/yubochen/index.html;http://www.nlpr.ia.ac.cn/cip/~liukang/index.html;http://nlpr-web.ia.ac.cn/cip/english/~junzhao/index.html", "dblp": "320/9888;182/7941;https://dblp.uni-trier.de/pid/90/7879.html;42/4903.html;https://dblp.uni-trier.de/pid/47/2026-1.html", "google_scholar": "Am8WsCkAAAAJ;lP5_LJIAAAAJ;https://scholar.google.com.hk/citations?user=9z7GPxIAAAAJ;DtZCfl0AAAAJ;https://scholar.google.com.hk/citations?user=HljRttwAAAAJ", "or_profile": "~Zhuoran_Jin1;~Pengfei_Cao1;~Yubo_Chen1;~Kang_Liu1;~Jun_Zhao4", "aff": "Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of automation, Chinese academy of science;Institute of Automation, Chinese Academy of Sciences;Institute of automation, Chinese academy of science", "aff_domain": "nlpr.ia.ac.cn;ia.ac.cn;nlpr.ia.ac.cn;ia.ac.cn;nlpr.ia.ac.cn", "position": "PhD student;PhD student;Associate Professor;Professor;Full Professor", "bibtex": "@inproceedings{\njin2023instructor,\ntitle={InstructoR: Instructing Unsupervised Conversational Dense Retrieval with Large Language Models},\nauthor={Zhuoran Jin and Pengfei Cao and Yubo Chen and Kang Liu and Jun Zhao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=bdgUPZhF9b}\n}", "github": "", "project": "", "reviewers": "3sLe;nZaB;UCrL;WnM3", "site": "https://openreview.net/forum?id=bdgUPZhF9b", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;4;4;4", "excitement": "3;3;3;3", "reproducibility": "3;4;3;4", "correctness": "4;3;3;3", "rating_avg": 3.0, "confidence_avg": 3.75, "excitement_avg": 3.0, "reproducibility_avg": 3.5, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation", "aff_unique_url": "http://www.ia.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "bflAMCWJh8", "title": "PerturbScore: Connecting Discrete and Continuous Perturbations in NLP", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "With the rapid development of neural network applications in NLP, model robustness problem is gaining more attention. Different from computer vision, the discrete nature of texts makes it more challenging to explore robustness in NLP. Therefore, in this paper, we aim to connect discrete perturbations with continuous perturbations, therefore we can use such connections as a bridge to help understand discrete perturbations in NLP models. Specifically, we first explore how to connect and measure the correlation between discrete perturbations and continuous perturbations. Then we design a regression task as a PerturbScore to learn the correlation automatically. Through experimental results, we find that we can build a connection between discrete and continuous perturbations and use the proposed PerturbScore to learn such correlation, surpassing previous methods used in discrete perturbation measuring. Further, the proposed PerturbScore can be well generalized to different datasets, perturbation methods, indicating that we can use it as a powerful tool to study model robustness in NLP.", "keywords": "perturbation in NLP;robustness in NLP", "primary_area": "", "supplementary_material": "", "author": "Linyang Li;Ke Ren;Yunfan Shao;Pengyu Wang;Xipeng Qiu", "authorids": "~Linyang_Li1;~Ke_Ren2;~Yunfan_Shao1;~Pengyu_Wang2;~Xipeng_Qiu1", "gender": "M;M;M;M;M", "homepage": "https://github.com/LinyangLee;https://github.com/renke999;;;https://xpqiu.github.io/", "dblp": "228/8051;;236/5806;14/3832-6;69/1395", "google_scholar": "T6eEqcMAAAAJ;https://scholar.google.com/citations?view_op=list_works;pw5QEtoAAAAJ;https://scholar.google.co.jp/citations?user=NGniJS0AAAAJ;Pq4Yp_kAAAAJ", "or_profile": "~Linyang_Li1;~Ke_Ren2;~Yunfan_Shao1;~Pengyu_Wang2;~Xipeng_Qiu1", "aff": "Fudan University;Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "PhD student;MS student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nli2023perturbscore,\ntitle={PerturbScore: Connecting Discrete and Continuous Perturbations in {NLP}},\nauthor={Linyang Li and Ke Ren and Yunfan Shao and Pengyu Wang and Xipeng Qiu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=bflAMCWJh8}\n}", "github": "", "project": "", "reviewers": "Rjy5;WvbR;pixE", "site": "https://openreview.net/forum?id=bflAMCWJh8", "pdf_size": 0, "rating": "1;1;1", "confidence": "3;3;3", "excitement": "5;4;4", "reproducibility": "4;4;4", "correctness": "4;3;3", "rating_avg": 1.0, "confidence_avg": 3.0, "excitement_avg": 4.333333333333333, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0001-7163-5247", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "bgskDuMqcz", "title": "Adapt in Contexts: Retrieval-Augmented Domain Adaptation via In-Context Learning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) have showcased their capability with few-shot inference known as in-context learning. However, in-domain demonstrations are not always readily available in real scenarios, leading to cross-domain in-context learning. Besides, LLMs are still facing challenges in long-tail knowledge in unseen and unfamiliar domains. The above limitations demonstrate the necessity of Unsupervised Domain Adaptation (UDA). In this paper, we study the UDA problem under an in-context learning setting to adapt language models from the source domain to the target domain without any target labels. The core idea is to retrieve a subset of cross-domain elements that are the most similar to the query, and elicit language model to adapt in an in-context manner by learning both target domain distribution and the discriminative task signal simultaneously with the augmented cross-domain in-context examples. We devise different prompting and training strategies, accounting for different LM architectures to learn the target distribution via language modeling. With extensive experiments on Sentiment Analysis (SA) and Named Entity Recognition (NER) tasks, we thoroughly study the effectiveness of ICL for domain transfer and demonstrate significant improvements over baseline models.", "keywords": "Domain Adaptation;in-context learning;Retrieval Augmentation", "primary_area": "", "supplementary_material": "", "author": "Quanyu Long;Wenya Wang;Sinno Jialin Pan", "authorids": "~Quanyu_Long1;~Wenya_Wang1;~Sinno_Jialin_Pan1", "gender": "M;F;M", "homepage": "http://quanyulong.net;https://personal.ntu.edu.sg/wangwy/;http://www.cse.cuhk.edu.hk/~sinnopan/", "dblp": "259/0397;;80/5412", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.sg/citations?user=eOKISncAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Quanyu_Long1;~Wenya_Wang1;~Sinno_Pan1", "aff": "Nanyang Technological University;University of Washington;Nanyang Technological University", "aff_domain": "ntu.edu.sg;cs.washington.edu;ntu.edu.sg", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nlong2023adapt,\ntitle={Adapt in Contexts: Retrieval-Augmented Domain Adaptation via In-Context Learning},\nauthor={Quanyu Long and Wenya Wang and Sinno Jialin Pan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=bgskDuMqcz}\n}", "github": "", "project": "", "reviewers": "PvYw;gjmy;cMth;9Foj", "site": "https://openreview.net/forum?id=bgskDuMqcz", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;4;2;4", "excitement": "2;4;4;3", "reproducibility": "3;4;3;4", "correctness": "2;4;3;3", "rating_avg": 4.0, "confidence_avg": 3.5, "excitement_avg": 3.25, "reproducibility_avg": 3.5, "correctness_avg": 3.0, "replies_avg": 13, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-5612-7818;", "linkedin": ";;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Nanyang Technological University;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.sg;https://www.washington.edu", "aff_unique_abbr": "NTU;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Singapore;United States" }, { "id": "bkGVmCE3UJ", "title": "Counting the Bugs in ChatGPT's Wugs: A Multilingual Investigation into the Morphological Capabilities of a Large Language Model", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) have recently reached an impressive level of linguistic capability, prompting comparisons with human language skills. However, there have been relatively few systematic inquiries into the linguistic capabilities of the latest generation of LLMs, and those studies that do exist (i) ignore the remarkable ability of humans to generalize, (ii) focus only on English, and (iii) investigate syntax or semantics and overlook other capabilities that lie at the heart of human language, like morphology. Here, we close these gaps by conducting the first rigorous analysis of the morphological capabilities of ChatGPT in four typologically varied languages (specifically, English, German, Tamil, and Turkish). We apply a version of Berko's (1958) wug test to ChatGPT, using novel, uncontaminated datasets for the four examined languages. We find that ChatGPT massively underperforms purpose-built systems, particularly in English. Overall, our results---through the lens of morphology---cast a new light on the linguistic capabilities of ChatGPT, suggesting that claims of human-like language skills are premature and misleading.", "keywords": "wug-testing;morphology;chatGPT;morphological generalization", "primary_area": "", "supplementary_material": "", "author": "Leonie Weissweiler;Valentin Hofmann;Anjali Kantharuban;Anna Cai;Ritam Dutt;Amey Hengle;Anubha Kabra;Atharva Kulkarni;Abhishek Vijayakumar;Haofei Yu;Hinrich Schuetze;Kemal Oflazer;David R Mortensen", "authorids": "~Leonie_Weissweiler1;~Valentin_Hofmann1;~Anjali_Kantharuban1;~Anna_Cai1;~Ritam_Dutt1;~Amey_Hengle1;~Anubha_Kabra1;~Atharva_Kulkarni1;~Abhishek_Vijayakumar1;~Haofei_Yu1;~Hinrich_Schuetze3;~Kemal_Oflazer1;~David_R_Mortensen1", "gender": ";;F;F;M;M;F;M;M;M;M;M;M", "homepage": "https://www.cis.lmu.de/~weissweiler/;https://valentinhofmann.github.io/;http://www.anjaliruban.com;https://www.andrew.cmu.edu/user/annacai/;;https://ameyhengle.github.io/;https://sites.google.com/view/anubha-kabra;https://athrvkk.github.io;;https://www.haofeiyu.me;https://www.cis.uni-muenchen.de/schuetze/;https://www.andrew.cmu.edu/user/ko/;http://www.cs.cmu.edu/~dmortens/", "dblp": "212/0281;264/4665;;295/9845;213/7740;;266/2823;261/0205;;156/1412;s/HinrichSchutze;95/1344;180/5443", "google_scholar": "o4fK4n4AAAAJ;bbHOPKwAAAAJ;;;https://scholar.google.com/citations?hl=en;hzNuZEoAAAAJ;bMie1tIAAAAJ;tQTAiXwAAAAJ;;EL-QbZ4AAAAJ;;https://scholar.google.com.tw/citations?user=JLk0hs8AAAAJ;https://scholar.google.com/citations?authuser=1", "or_profile": "~Leonie_Weissweiler1;~Valentin_Hofmann1;~Anjali_Kantharuban1;~Anna_Cai1;~Ritam_Dutt1;~Amey_Hengle1;~Anubha_Kabra1;~Atharva_Kulkarni1;~Abhishek_Vijayakumar1;~Haofei_Yu1;~Hinrich_Schuetze3;~Kemal_Oflazer1;~David_R_Mortensen1", "aff": "LMU Munich;University of Oxford;University of Cambridge;Carnegie Mellon University;Carnegie Mellon University;Indian Institute of Technology, Delhi;Carnegie Mellon University;School of Computer Science, Carnegie Mellon University;School of Computer Science, Carnegie Mellon University;Apple;Center for Information and Language Processing;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "lmu.de;ox.ac.uk;cam.ac.uk;cmu.edu;cmu.edu;iitd.ac.in;cmu.edu;cs.cmu.edu;cs.cmu.edu;apple.com;lmu.de;cmu.edu;cmu.edu", "position": "PhD student;PhD student;MS student;MS student;PhD student;Predoctoral Researcher;MS student;MS student;MS student;Intern;Full Professor;Professor;Systems Scientist", "bibtex": "@inproceedings{\nweissweiler2023counting,\ntitle={Counting the Bugs in Chat{GPT}'s Wugs: A Multilingual Investigation into the Morphological Capabilities of a Large Language Model},\nauthor={Leonie Weissweiler and Valentin Hofmann and Anjali Kantharuban and Anna Cai and Ritam Dutt and Amey Hengle and Anubha Kabra and Atharva Kulkarni and Abhishek Vijayakumar and Haofei Yu and Hinrich Schuetze and Kemal Oflazer and David R Mortensen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=bkGVmCE3UJ}\n}", "github": "", "project": "", "reviewers": "hkP1;hyZR;oAs1", "site": "https://openreview.net/forum?id=bkGVmCE3UJ", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;3;3", "reproducibility": "1;3;3", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 13, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;;;;;;0000-0002-3927-6851", "linkedin": ";;;;ritam-dutt/;amey-hengle;anubha-kabra/;;abhishek-vijayakumar/;%E6%98%8A%E9%A3%9E-%E4%BA%8E-a04247188/;;;davidrmortensen/", "aff_unique_index": "0;1;2;3;3;4;3;3;3;5;6;3;3", "aff_unique_norm": "Ludwig Maximilian University of Munich;University of Oxford;University of Cambridge;Carnegie Mellon University;Indian Institute of Technology Delhi;Apple;Center for Information and Language Processing", "aff_unique_dep": ";;;;;Apple Inc.;", "aff_unique_url": "https://www.lmu.de;https://www.ox.ac.uk;https://www.cam.ac.uk;https://www.cmu.edu;https://www.iitdelhi.ac.in;https://www.apple.com;", "aff_unique_abbr": "LMU;Oxford;Cambridge;CMU;IIT Delhi;Apple;", "aff_campus_unique_index": "0;2;3;4;4", "aff_campus_unique": "Munich;;Cambridge;Delhi;Pittsburgh", "aff_country_unique_index": "0;1;1;2;2;3;2;2;2;2;2;2", "aff_country_unique": "Germany;United Kingdom;United States;India;" }, { "id": "bmeKrAzRqz", "title": "Pre-Trained Language Models Augmented with Synthetic Scanpaths for Natural Language Understanding", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Human gaze data offer cognitive information that reflects natural language comprehension. Indeed, augmenting language models with human scanpaths has proven beneficial for a range of NLP tasks, including language understanding. However, the applicability of this approach is hampered because the abundance of text corpora is contrasted by a scarcity of gaze data. Although models for the generation of human-like scanpaths during reading have been developed, the potential of synthetic gaze data across NLP tasks remains largely unexplored. We develop a model that integrates synthetic scanpath generation with a scanpath-augmented language model, eliminating the need for human gaze data. Since the model's error gradient can be propagated throughout all parts of the model, the scanpath generator can be fine-tuned to downstream tasks. We find that the proposed model not only outperforms the underlying language model, but achieves a performance that is comparable to a language model augmented with real human gaze data. Our code is publicly available.", "keywords": "Human gaze data;synthetic scanpaths;gaze-augmented language model;natural language understanding;transformer;deep neural networks", "primary_area": "", "supplementary_material": "", "author": "Shuwen Deng;Paul Prasse;David Robert Reich;Tobias Scheffer;Lena Ann J\u00e4ger", "authorids": "~Shuwen_Deng1;~Paul_Prasse1;~David_Robert_Reich1;~Tobias_Scheffer1;~Lena_Ann_J\u00e4ger1", "gender": "F;;M;;F", "homepage": "https://www.uni-potsdam.de/de/cs-ml/staff/shuwen-deng;https://www.uni-potsdam.de/de/cs-ml/staff/phd/prasse;https://david.reich.ai;https://www.uni-potsdam.de/en/cs-ml/staff/contacts/scheffer;https://www.cl.uzh.ch/en/research-groups/digital-linguistics.html", "dblp": ";116/3028;321/1783.html;s/TobiasScheffer;198/0994.html", "google_scholar": "https://scholar.google.de/citations?user=O0JlpeEAAAAJ;https://scholar.google.de/citations?user=qAbXPJQAAAAJ;Tc-NKJgAAAAJ;UjV0M9QAAAAJ;3vfyy40AAAAJ", "or_profile": "~Shuwen_Deng1;~Paul_Prasse1;~David_Robert_Reich1;~Tobias_Scheffer1;~Lena_Ann_J\u00e4ger1", "aff": "Universit\u00e4t Potsdam;Universit\u00e4t Potsdam;Universit\u00e4t Potsdam;Universit\u00e4t Potsdam;Universit\u00e4t Potsdam", "aff_domain": "uni-potsdam.de;uni-potsdam.de;uni-potsdam.de;uni-potsdam.de;uni-potsdam.de", "position": "PhD student;Postdoc;PhD student;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\ndeng2023pretrained,\ntitle={Pre-Trained Language Models Augmented with Synthetic Scanpaths for Natural Language Understanding},\nauthor={Shuwen Deng and Paul Prasse and David Robert Reich and Tobias Scheffer and Lena Ann J{\\\"a}ger},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=bmeKrAzRqz}\n}", "github": "", "project": "", "reviewers": "biQN;ER3y;3vtZ", "site": "https://openreview.net/forum?id=bmeKrAzRqz", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;4", "excitement": "4;3;3", "reproducibility": "4;4;4", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-0185-2825;0000-0003-1842-3645;0000-0002-3524-3788;0000-0003-4405-7925;0000-0001-9018-9713", "linkedin": ";;;tobiasscheffer/;https://ch.linkedin.com/company/digital-linguistics-uzh", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Potsdam", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-potsdam.de", "aff_unique_abbr": "UP", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Germany" }, { "id": "bpArUWbkUF", "title": "Argue with Me Tersely: Towards Sentence-Level Counter-Argument Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Counter-argument generation\u2014a captivating area in computational linguistics\u2014seeks to craft statements that offer opposing views. While most research has ventured into paragraph-level generation, sentence-level counter-argument generation beckons with its unique constraints and brevity-focused challenges. Furthermore, the diverse nature of counter-arguments poses challenges for evaluating model performance solely based on n-gram-based metrics. In this paper, we present the ArgTersely benchmark for sentence-level counter-argument generation, drawing from a manually annotated dataset from the ChangeMyView debate forum. We also propose Arg-LlaMA for generating high-quality counter-argument. For better evaluation, we trained a BERT-based evaluator Arg-Judge with human preference data. We conducted comparative experiments involving various baselines such as LlaMA, Alpaca, GPT-3, and others. The results show the competitiveness of our proposed framework and evaluator in counter-argument generation tasks. Code and data are available at https://github.com/amazingljy1206/ArgTersely.", "keywords": "Counter-Argument Generation;ArgTersely;Arg-LLaMA;Arg-Judge", "primary_area": "", "supplementary_material": "", "author": "Jiayu Lin;Rong Ye;Meng Han;Qi Zhang;Ruofei Lai;Xinyu Zhang;Zhao Cao;Xuanjing Huang;zhongyu wei", "authorids": "~Jiayu_Lin2;~Rong_Ye1;~Meng_Han5;~Qi_Zhang8;~Ruofei_Lai1;~Xinyu_Zhang6;~Zhao_Cao1;~Xuanjing_Huang1;~zhongyu_wei1", "gender": "M;F;F;M;M;M;M;F;M", "homepage": "https://github.com/amazingljy1206;https://reneeye.github.io/;;http://qizhang.info;;https://scholar.google.com/citations?hl=en&user=W_WZEQEAAAAJ;http://caozhao.hw;https://xuanjing-huang.github.io/;http://www.sdspeople.fudan.edu.cn/zywei/", "dblp": ";84/5795.html;;52/323-1;301/9182;https://dblp.uni-trier.de/pid/58/4582;69/8078;05/6735-1;31/10489", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;UV4u5UQAAAAJ;https://scholar.google.com/citations?hl=zh-CN;XfqR3yYAAAAJ;;https://scholar.google.com/citations?hl=en;aJmTPaoAAAAJ;RGsMgZA4H78C;AjLDxxgAAAAJ", "or_profile": "~Jiayu_Lin2;~Rong_Ye1;~Meng_Han5;~Qi_Zhang8;~Ruofei_Lai1;~Xinyu_Zhang6;~Zhao_Cao1;~Xuanjing_Huang1;~zhongyu_wei1", "aff": "Fudan University;ByteDance;;Fudan University;;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;bytedance.com;;fudan.edu.cn;;huawei.com;huawei.com;fudan.edu.cn;fudan.edu.cn", "position": "MS student;Researcher;;Full Professor;;Principal Researcher;Principal Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nlin2023argue,\ntitle={Argue with Me Tersely: Towards Sentence-Level Counter-Argument Generation},\nauthor={Jiayu Lin and Rong Ye and Meng Han and Qi Zhang and Ruofei Lai and Xinyu Zhang and Zhao Cao and Xuanjing Huang and zhongyu wei},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=bpArUWbkUF}\n}", "github": "", "project": "", "reviewers": "C3k2;mjoU;ccTv", "site": "https://openreview.net/forum?id=bpArUWbkUF", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;4;4", "reproducibility": "4;3;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0002-6829-4522;0000-0002-4214-7858;0000-0001-9197-9426;", "linkedin": ";;;;;;;;", "aff_unique_index": "0;1;0;2;2;0;0", "aff_unique_norm": "Fudan University;ByteDance;Huawei", "aff_unique_dep": ";;Huawei Technologies", "aff_unique_url": "https://www.fudan.edu.cn;https://www.bytedance.com;https://www.huawei.com", "aff_unique_abbr": "Fudan;ByteDance;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "bqaW5sGZOq", "title": "Revisiting Machine Translation for Cross-lingual Classification", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Machine Translation (MT) has been widely used for cross-lingual classification, either by translating the test set into English and running inference with a monolingual model (translate-test), or translating the training set into the target languages and finetuning a multilingual model (translate-train). However, most research in the area focuses on the multilingual models rather than the MT component. We show that, by using a stronger MT system and mitigating the mismatch between training on original text and running inference on machine translated text, translate-test can do substantially better than previously assumed. The optimal approach, however, is highly task dependent, as we identify various sources of cross-lingual transfer gap that affect different tasks and approaches differently. Our work calls into question the dominance of multilingual models for cross-lingual classification, and prompts to pay more attention to MT-based baselines.", "keywords": "multilinguality;cross-lingual classification;machine translation", "primary_area": "", "supplementary_material": "", "author": "Mikel Artetxe;Vedanuj Goswami;Shruti Bhosale;Angela Fan;Luke Zettlemoyer", "authorids": "~Mikel_Artetxe1;~Vedanuj_Goswami1;~Shruti_Bhosale1;~Angela_Fan2;~Luke_Zettlemoyer1", "gender": "M;M;;;M", "homepage": "http://www.mikelartetxe.com;https://vedanuj.github.io/;https://ai.facebook.com/people/shruti-bhosale/;;https://www.cs.washington.edu/people/faculty/lsz/", "dblp": "168/0354;156/5885;136/9081;192/1872;21/6793", "google_scholar": "N5InzP8AAAAJ;bh08FeIAAAAJ;69JJbWoAAAAJ;TLZR9zgAAAAJ;https://scholar.google.com.tw/citations?user=UjpbO6IAAAAJ", "or_profile": "~Mikel_Artetxe1;~Vedanuj_Goswami1;~Shruti_Bhosale1;~Angela_Fan2;~Luke_Zettlemoyer1", "aff": "Facebook AI Research;;Meta Facebook;Meta Facebook;Meta", "aff_domain": "fb.com;;fb.com;facebook.com;meta.com", "position": "Research Scientist;;Research Engineer;Research Engineer;Researcher", "bibtex": "@inproceedings{\nartetxe2023revisiting,\ntitle={Revisiting Machine Translation for Cross-lingual Classification},\nauthor={Mikel Artetxe and Vedanuj Goswami and Shruti Bhosale and Angela Fan and Luke Zettlemoyer},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=bqaW5sGZOq}\n}", "github": "", "project": "", "reviewers": "chth;S3YL;p9fP", "site": "https://openreview.net/forum?id=bqaW5sGZOq", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "4;4;4", "reproducibility": "5;4;4", "correctness": "4;5;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "artetxem;;shrutibhosale/;;luke-zettlemoyer-a0109b226/", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Meta", "aff_unique_dep": "Facebook AI Research", "aff_unique_url": "https://research.facebook.com", "aff_unique_abbr": "FAIR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "bt9Ho2FMxd", "title": "Unmasking the Hidden Meaning: Bridging Implicit and Explicit Hate Speech Embedding Representations", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Research on automatic hate speech (HS) detection has mainly focused on identifying explicit forms of hateful expressions on user-generated content. Recently, a few works have started to investigate methods to address more implicit and subtle abusive content. However, despite these efforts, automated systems still struggle to correctly recognize implicit and more veiled forms of HS. As these systems heavily rely on proper textual representations for classification, it is crucial to investigate the differences in embedding implicit and explicit messages. Our contribution to address this challenging task is fourfold. First, we present a comparative analysis of transformer-based models, evaluating their performance across five datasets containing implicit HS messages. Second, we examine the embedding representations of implicit messages across different targets, gaining insight into how veiled cases are encoded. Third, we compare and link explicit and implicit hateful messages across these datasets through their targets, enforcing the relation between explicitness and implicitness and obtaining more meaningful embedding representations. Lastly, we show how these newer representation maintains high performance on HS labels, while improving classification in borderline cases.", "keywords": "Hate Speech Detection;Implicit Hate Embeddings", "primary_area": "", "supplementary_material": "", "author": "Nicol\u00e1s Benjam\u00edn Ocampo;Elena Cabrio;Serena Villata", "authorids": "~Nicol\u00e1s_Benjam\u00edn_Ocampo1;~Elena_Cabrio1;~Serena_Villata1", "gender": "M;F;F", "homepage": "https://www.nicolasbenjaminocampo.com/;https://www-sop.inria.fr/members/Elena.Cabrio/;http://www.i3s.unice.fr/~villata/", "dblp": "346/1892;35/7561;84/5009", "google_scholar": "9kMJiKcAAAAJ;hEP0YzwAAAAJ;", "or_profile": "~Nicol\u00e1s_Benjam\u00edn_Ocampo1;~Elena_Cabrio1;~Serena_Villata1", "aff": "Universit\u00e9 C\u00f4te d'Azur;Universit\u00e9 C\u00f4te d'Azur;CNRS", "aff_domain": "univ-cotedazur.fr;univ-cotedazur.fr;cnrs.fr", "position": "PhD student;Full Professor;Researcher", "bibtex": "@inproceedings{\nocampo2023unmasking,\ntitle={Unmasking the Hidden Meaning: Bridging Implicit and Explicit Hate Speech Embedding Representations},\nauthor={Nicol\u00e1s Benjam\u00edn Ocampo and Elena Cabrio and Serena Villata},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=bt9Ho2FMxd}\n}", "github": "", "project": "", "reviewers": "QeTe;UB9Q;TtZW", "site": "https://openreview.net/forum?id=bt9Ho2FMxd", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;5;4", "excitement": "3;3;3", "reproducibility": "3;4;4", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0001-0077-4626;0000-0001-7124-8300;", "linkedin": "nicolasbenjaminocampo/;;", "aff_unique_index": "0;0;1", "aff_unique_norm": "Universit\u00e9 C\u00f4te d'Azur;Centre National de la Recherche Scientifique", "aff_unique_dep": ";", "aff_unique_url": "https://www.univ-cotedazur.fr;https://www.cnrs.fr", "aff_unique_abbr": "UCA;CNRS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "id": "bvl3p6JUlv", "title": "Mitigating Biases in Hate Speech Detection from A Causal Perspective", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Nowadays, many hate speech detectors are built to automatically detect hateful content. However, their training sets are sometimes skewed towards certain stereotypes (e.g., race or religion-related). As a result, the detectors are prone to depend on some shortcuts for predictions. Previous works mainly focus on token-level analysis and heavily rely on human experts' annotations to identify spurious correlations, which is not only costly but also incapable of discovering higher-level artifacts. In this work, we use grammar induction to find grammar patterns for hate speech and analyze this phenomenon from a causal perspective. Concretely, we categorize and verify different biases based on their spuriousness and influence on the model prediction. Then, we propose two mitigation approaches including Multi-Task Intervention and Data-Specific Intervention based on these confounders. Experiments conducted on 9 hate speech datasets demonstrate the effectiveness of our approaches.", "keywords": "Hate speech detection;Causal inference;Bias mitigation", "primary_area": "", "supplementary_material": "", "author": "Zhehao Zhang;Jiaao Chen;Diyi Yang", "authorids": "~Zhehao_Zhang1;~Jiaao_Chen2;~Diyi_Yang2", "gender": "M;M;F", "homepage": "https://zzh-sjtu.github.io/zhehaozhang.github.io/;https://cs.stanford.edu/people/jiaaoc/;https://cs.stanford.edu/~diyiy/", "dblp": "223/7963.html;230/3663;70/11145", "google_scholar": "QG-BAGwAAAAJ;Pi9IVvUAAAAJ;j9jhYqQAAAAJ", "or_profile": "~Zhehao_Zhang1;~Jiaao_Chen2;~Diyi_Yang2", "aff": "Shanghai Jiaotong University;Georgia Institute of Technology;Stanford University", "aff_domain": "sjtu.edu.cn;gatech.edu;stanford.edu", "position": "Undergrad student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023mitigating,\ntitle={Mitigating Biases in Hate Speech Detection from A Causal Perspective},\nauthor={Zhehao Zhang and Jiaao Chen and Diyi Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=bvl3p6JUlv}\n}", "github": "", "project": "", "reviewers": "CfXf;W9Qi;5jqy", "site": "https://openreview.net/forum?id=bvl3p6JUlv", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "4;4;4", "reproducibility": "4;5;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;1;2", "aff_unique_norm": "Shanghai Jiao Tong University;Georgia Institute of Technology;Stanford University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.gatech.edu;https://www.stanford.edu", "aff_unique_abbr": "SJTU;Georgia Tech;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States" }, { "id": "bxFwIn0wZ0", "title": "Enabling Large Language Models to Generate Text with Citations", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) have emerged as a widely-used tool for information seeking, but their generated outputs are prone to hallucination. In this work, our aim is to allow LLMs to generate text with citations, improving their factual correctness and verifiability. Existing work mainly relies on commercial search engines and human evaluation, making it challenging to reproduce and compare different modeling approaches. We propose ALCE, the first benchmark for Automatic LLMs\u2019 Citation Evaluation. ALCE collects a diverse set of questions and retrieval corpora and requires building end-to-end systems to retrieve supporting evidence and generate answers with citations. We develop automatic metrics along three dimensions\u2014fluency, correctness, and citation quality\u2014and demonstrate their strong correlation with human judgements. Our experiments with state-of-the-art LLMs and novel prompting strategies show that current systems have considerable room for improvement\u2014For example, on the ELI5 dataset, even the best models lack complete citation support 50% of the time. Our analyses further highlight promising future directions, including developing better retrievers, advancing long-context LLMs, and improving the ability to synthesize information from multiple sources.", "keywords": "large language model;llm;citation;attribution;qa;evaluation;benchmark", "primary_area": "", "supplementary_material": "", "author": "Tianyu Gao;Howard Yen;Jiatong Yu;Danqi Chen", "authorids": "~Tianyu_Gao1;~Howard_Yen1;~Jiatong_Yu1;~Danqi_Chen1", "gender": "M;M;F;F", "homepage": "https://gaotianyu.xyz/about/;https://howard-yen.github.io;https://www.cs.princeton.edu/~jiatongy/;https://www.cs.princeton.edu/~danqic/", "dblp": "207/8893-1.html;348/5988.html;;87/7949", "google_scholar": "il-F8YYAAAAJ;8rJOrBEAAAAJ;;sVR8ktkAAAAJ", "or_profile": "~Tianyu_Gao1;~Howard_Yen1;~Jiatong_Yu1;~Danqi_Chen1", "aff": "Princeton University;Princeton University;Princeton University;Princeton University", "aff_domain": "princeton.edu;princeton.edu;princeton.edu;cs.princeton.edu", "position": "PhD student;Undergrad student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\ngao2023enabling,\ntitle={Enabling Large Language Models to Generate Text with Citations},\nauthor={Tianyu Gao and Howard Yen and Jiatong Yu and Danqi Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=bxFwIn0wZ0}\n}", "github": "", "project": "", "reviewers": "jGRf;D8zW;CvfW", "site": "https://openreview.net/forum?id=bxFwIn0wZ0", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "4;4;3", "reproducibility": "4;3;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 9, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-5178-0866;;;", "linkedin": ";;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Princeton University", "aff_unique_dep": "", "aff_unique_url": "https://www.princeton.edu", "aff_unique_abbr": "Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "bxltAqTJe2", "title": "$\\textit{From Chaos to Clarity}$: Claim Normalization to Empower Fact-Checking", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "With the rise of social media, users are exposed to many misleading claims. However, the pervasive noise inherent in these posts presents a challenge in identifying precise and prominent claims that require verification. Extracting the important claims from such posts is arduous and time-consuming, yet it is an underexplored problem. Here, we aim to bridge this gap. We introduce a novel task, Claim Normalization (aka ClaimNorm), which aims to decompose complex and noisy social media posts into more straightforward and understandable forms, termed normalized claims. We propose CACN, a pioneering approach that leverages chain-of-thought and claim check-worthiness estimation, mimicking human reasoning processes, to comprehend intricate claims. Moreover, we capitalize on the in-context learning capabilities of large language models to provide guidance and to improve claim normalization. To evaluate the effectiveness of our proposed model, we meticulously compile a comprehensive real-world dataset, CLAN, comprising more than 6k instances of social media posts alongside their respective normalized claims. Our experiments demonstrate that CACN outperforms several baselines across various evaluation measures. Finally, our rigorous error analysis validates CACN\u2019s capabilities and pitfalls.", "keywords": "Claim Normalization;Social Media;Claims;Misinformation", "primary_area": "", "supplementary_material": "", "author": "Megha Sundriyal;Tanmoy Chakraborty;Preslav Nakov", "authorids": "~Megha_Sundriyal1;~Tanmoy_Chakraborty2;~Preslav_Nakov2", "gender": "F;M;M", "homepage": ";http://tanmoychak.com;https://mbzuai.ac.ae/study/faculty/preslav-nakov/", "dblp": "284/1031;65/2136-2.html;https://dblp.uni-trier.de/pid/19/1947", "google_scholar": "vbmdVSAAAAAJ;https://scholar.google.co.in/citations?user=C5S9JnIAAAAJ;DfXsKZ4AAAAJ", "or_profile": "~Megha_Sundriyal1;~Tanmoy_Chakraborty2;~Preslav_Nakov2", "aff": "Indraprastha Institute of Information Technology, Delhi;Indian Institute of Technology, Delhi;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "iiitd.ac.in;iitd.ac.in;mbzuai.ac.ae", "position": "PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nsundriyal2023textitfrom,\ntitle={\\${\\textbackslash}textit\\{From Chaos to Clarity\\}\\$: Claim Normalization to Empower Fact-Checking},\nauthor={Megha Sundriyal and Tanmoy Chakraborty and Preslav Nakov},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=bxltAqTJe2}\n}", "github": "", "project": "", "reviewers": "H3iM;uYfo;GSfB", "site": "https://openreview.net/forum?id=bxltAqTJe2", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "4;2;4", "reproducibility": "4;3;5", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-2268-0137;0000-0002-0210-0369;0000-0002-3600-1510", "linkedin": "sundriyalmegha/;tanmoy-chakraborty-89553324/;preslavnakov/", "aff_unique_index": "0;1;2", "aff_unique_norm": "Indraprastha Institute of Information Technology;Indian Institute of Technology Delhi;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "http://www.iiitd.ac.in;https://www.iitdelhi.ac.in;https://mbzuai.ac.ae", "aff_unique_abbr": "IIIT-D;IIT Delhi;MBZUAI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Delhi;", "aff_country_unique_index": "0;0;1", "aff_country_unique": "India;United Arab Emirates" }, { "id": "bxsrykzSnq", "title": "HaluEval: A Large-Scale Hallucination Evaluation Benchmark for Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs), such as ChatGPT, are prone to generate hallucinations, i.e., content that conflicts with the source or cannot be verified by the factual knowledge. To understand what types of content and to which extent LLMs are apt to hallucinate, we introduce the Hallucination Evaluation for Large Language Models (HaluEval) benchmark, a large collection of \ngenerated and human-annotated hallucinated samples for evaluating the performance of LLMs in recognizing hallucination. To generate these samples, we propose a ChatGPT-based two-step framework, i.e., sampling-then-filtering. \nBesides, we also hire some human labelers to annotate the hallucinations in ChatGPT responses. The empirical results suggest that ChatGPT is likely to generate hallucinated content in specific topics by fabricating unverifiable information (i.e., about $19.5\\%$ user queries).\nMoreover, existing LLMs face great challenges in recognizing the hallucinations in texts. While, our experiments also prove that the hallucination recognition can be improved by providing external knowledge or adding reasoning steps.", "keywords": "Large Language Models;Hallucination", "primary_area": "", "supplementary_material": "", "author": "Junyi Li;Xiaoxue Cheng;Xin Zhao;Jian-Yun Nie;Ji-Rong Wen", "authorids": "~Junyi_Li4;~Xiaoxue_Cheng1;~Xin_Zhao10;~Jian-Yun_Nie1;~Ji-Rong_Wen1", "gender": "M;;M;M;M", "homepage": "http://lijunyi.tech;https://xiaoxue-xx.github.io/;https://gsai.ruc.edu.cn/addons/teacher/index/info.html?user_id=5&ruccode=20140041&ln=cn;http://rali.iro.umontreal.ca/nie-site/jian-yun-nie-en/;https://gsai.ruc.edu.cn/english/jrwen", "dblp": ";138/4231;https://dblp.uni-trier.de/pid/52/8700.html;n/JianYunNie;w/JRWen", "google_scholar": "zeWrn-4AAAAJ;;JNhNacoAAAAJ;W7uYg0UAAAAJ;tbxCHJgAAAAJ", "or_profile": "~Junyi_Li4;~Xiaoxue_Cheng1;~Xin_Zhao10;~Jian-Yun_Nie1;~Ji-Rong_Wen1", "aff": "Renmin University of China;Tianjin University;Renmin University of China;University of Montreal;Renmin University of China", "aff_domain": "ruc.edu.cn;tju.edu.cn;ruc.edu.cn;umontreal.ca;ruc.edu.cn", "position": "PhD student;Undergrad student;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2023halueval,\ntitle={HaluEval: A Large-Scale Hallucination Evaluation Benchmark for Large Language Models},\nauthor={Junyi Li and Xiaoxue Cheng and Xin Zhao and Jian-Yun Nie and Ji-Rong Wen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=bxsrykzSnq}\n}", "github": "", "project": "", "reviewers": "ANev;XvWz;WrMd", "site": "https://openreview.net/forum?id=bxsrykzSnq", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "3;4;3", "reproducibility": "4;4;3", "correctness": "4;5;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-8333-6196;;0000-0002-9777-9676", "linkedin": ";;;;", "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Renmin University of China;Tianjin University;University of Montreal", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ruc.edu.cn;http://www.tju.edu.cn;https://wwwumontreal.ca", "aff_unique_abbr": "RUC;TJU;UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;Canada" }, { "id": "c0utj9Q4YY", "title": "Toward Joint Language Modeling for Speech Units and Text", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Speech and text are two major forms of human language. The research community has been focusing on mapping speech to text or vice versa for many years. However, in the field of language modeling, very little effort has been made to model them jointly. In light of this, we explore joint language modeling for speech units and text. Specifically, we compare different speech tokenizers to transform continuous speech signals into discrete units and use different methods to construct mixed speech-text data. We introduce automatic metrics to evaluate how well the joint LM mixes speech and text. We also fine-tune the LM on downstream spoken language understanding (SLU) tasks with different modalities (speech or text) and test its performance to assess the model's learning of shared representations. Our results show that by mixing speech units and text with our proposed mixing techniques, the joint LM improves over a speech-only baseline on SLU tasks and shows zero-shot cross-modal transferability.", "keywords": "Language modeling;Speech processing;Spoken Language Understanding", "primary_area": "", "supplementary_material": "", "author": "Ju-Chieh Chou;Chung-Ming Chien;Wei-Ning Hsu;Karen Livescu;Arun Babu;Alexis Conneau;Alexei Baevski;Michael Auli", "authorids": "~Ju-Chieh_Chou1;~Chung-Ming_Chien1;~Wei-Ning_Hsu2;~Karen_Livescu1;~Arun_Babu1;~Alexis_Conneau1;~Alexei_Baevski1;~Michael_Auli1", "gender": "M;M;;;M;;;", "homepage": "https://home.ttic.edu/~jcchou/;https://cmchien.ttic.edu/;;;https://scholar.google.co.uk/citations?user=oJfoTakAAAAJ&hl=en;;;", "dblp": ";277/6333;;;;;;", "google_scholar": "Ip8Z8uMAAAAJ;R3Wh6vUAAAAJ;;;https://scholar.google.co.uk/citations?user=oJfoTakAAAAJ;;;", "or_profile": "~Ju-Chieh_Chou1;~Chung-Ming_Chien1;~Wei-Ning_Hsu2;~Karen_Livescu1;~Arun_Babu1;~Alexis_Conneau1;~Alexei_Baevski1;~Michael_Auli1", "aff": "Toyota Technological Institute at Chicago;Meta Facebook;;;;;;", "aff_domain": "ttic.edu;meta.com;;;;;;", "position": "PhD student;Intern;;;;;;", "bibtex": "@inproceedings{\nchou2023toward,\ntitle={Toward Joint Language Modeling for Speech Units and Text},\nauthor={Ju-Chieh Chou and Chung-Ming Chien and Wei-Ning Hsu and Karen Livescu and Arun Babu and Alexis Conneau and Alexei Baevski and Michael Auli},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=c0utj9Q4YY}\n}", "github": "", "project": "", "reviewers": "oo6z;HZX6;rsNT", "site": "https://openreview.net/forum?id=c0utj9Q4YY", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;3;5", "excitement": "3;2;2", "reproducibility": "4;4;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;", "linkedin": ";chungmingchien;;;;;;", "aff_unique_index": "0;1", "aff_unique_norm": "Toyota Technological Institute at Chicago;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.tti-chicago.org;https://meta.com", "aff_unique_abbr": "TTI Chicago;Meta", "aff_campus_unique_index": "0", "aff_campus_unique": "Chicago;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "c27QqxALfo", "title": "MM-Reasoner: A Multi-Modal Knowledge-Aware Framework for Knowledge-Based Visual Question Answering", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Thanks to the strong reasoning capabilities of Large Language Models (LLMs), recent approaches to knowledge-based visual question answering (KVQA) utilize LLMs with a global caption of an input image to answer a question. However, these approaches may miss key visual information that is not captured by the caption. Moreover, they cannot fully utilize the visual information required to answer the question. To address these issues, we introduce a new framework called Multi-Modal Knowledge-Aware Reasoner (MM-Reasoner) for KVQA. MM-Reasoner first utilizes a set of vision APIs, such as dense captioners, object detectors, and OCR, to extract detailed information from the image in textual format. Then, it prompts an LLM to extract query-specific knowledge from the extracted textual information to provide a rich representation that contains external knowledge, commonsense, explicit supporting facts, and rationales required for reasoning. Finally, the knowledge, query, and visual input are used to fine-tune a Vision-Language Model (VLM). At test time, MM-Reasoner uses the potential answers predicted by the VLM to iteratively update and optimize the prompt, refining its answer. Empirical studies show that MM-Reasoner achieves state-of-the-art performance on several KVQA datasets.", "keywords": "Vision-Language Models;Large Language Models;Visual Question Answering", "primary_area": "", "supplementary_material": "", "author": "MAHMOUD KHADEMI;Ziyi Yang;Felipe Vieira Frujeri;Chenguang Zhu", "authorids": "~MAHMOUD_KHADEMI2;~Ziyi_Yang1;~Felipe_Vieira_Frujeri1;~Chenguang_Zhu1", "gender": "M;M;;M", "homepage": "https://www.microsoft.com/en-us/research/people/mkhademi/;;;", "dblp": ";;;48/7536-1.html", "google_scholar": "x7Ddt3oAAAAJ;JkyLIM0AAAAJ;wy0FA1cAAAAJ;1b2kKWoAAAAJ", "or_profile": "~MAHMOUD_KHADEMI2;~Ziyi_Yang1;~Felipe_Vieira_Frujeri1;~Chenguang_Zhu1", "aff": "University of British Columbia;Microsoft;;Zoom", "aff_domain": "ubc.ca;microsoft.com;;zoom.us", "position": "MS student;Principal Researcher;;Principal Researcher", "bibtex": "@inproceedings{\nkhademi2023mmreasoner,\ntitle={{MM}-Reasoner: A Multi-Modal Knowledge-Aware Framework for Knowledge-Based Visual Question Answering},\nauthor={MAHMOUD KHADEMI and Ziyi Yang and Felipe Vieira Frujeri and Chenguang Zhu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=c27QqxALfo}\n}", "github": "", "project": "", "reviewers": "tUjL;XP8j;wKib", "site": "https://openreview.net/forum?id=c27QqxALfo", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "4;4;4", "reproducibility": "3;3;3", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";ziyi-yang;;", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of British Columbia;Microsoft;Zoom Video Communications Inc.", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "https://www.ubc.ca;https://www.microsoft.com;https://zoom.us", "aff_unique_abbr": "UBC;Microsoft;Zoom", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Canada;United States" }, { "id": "c2xBtTNceS", "title": "Inverse Reinforcement Learning for Text Summarization", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We introduce inverse reinforcement learning (IRL) as an effective paradigm for training abstractive summarization models, imitating human summarization behaviors. Our IRL model estimates the reward function using a suite of important sub-rewards for summarization and concurrently optimizes the policy network. Experimental results across datasets in different domains (CNN/DailyMail and WikiHow) and various model sizes (BART-base and BART-large) demonstrate the superiority of our proposed IRL model for summarization over MLE and RL baselines. The resulting summaries exhibit greater similarity to human-crafted gold references, outperforming MLE and RL baselines on metrics such as ROUGE, coverage, novelty, compression ratio, factuality, and human evaluations.", "keywords": "Abstractive Summarization;Inverse Reinforcement Learning;Reward Function Optimization", "primary_area": "", "supplementary_material": "", "author": "Yu Fu;Deyi Xiong;Yue Dong", "authorids": "~Yu_Fu2;~Deyi_Xiong2;~Yue_Dong2", "gender": "M;M;F", "homepage": "https://github.com/FYYFU;https://dyxiong.github.io;https://yuedong.us/", "dblp": ";55/6548;84/486", "google_scholar": "hhnq8CUAAAAJ;QPLO3myO5PkC;https://scholar.google.ca/citations?user=WYkn4loAAAAJ", "or_profile": "~Yu_Fu2;~Deyi_Xiong2;~Yue_Dong2", "aff": "Tianjin University;Tianjin University;McGill University", "aff_domain": "tju.edu.cn;tju.edu.cn;mcgill.ca", "position": "MS student;Full Professor;PhD student", "bibtex": "@inproceedings{\nfu2023inverse,\ntitle={Inverse Reinforcement Learning for Text Summarization},\nauthor={Yu Fu and Deyi Xiong and Yue Dong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=c2xBtTNceS}\n}", "github": "", "project": "", "reviewers": "D2vy;k2JP;B2Fc", "site": "https://openreview.net/forum?id=c2xBtTNceS", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;4;3", "excitement": "3;2;3", "reproducibility": "3;4;3", "correctness": "3;2;3", "rating_avg": 2.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-2353-5038;", "linkedin": ";;", "aff_unique_index": "0;0;1", "aff_unique_norm": "Tianjin University;McGill University", "aff_unique_dep": ";", "aff_unique_url": "http://www.tju.edu.cn;https://www.mcgill.ca", "aff_unique_abbr": "TJU;McGill", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;Canada" }, { "id": "cBhzqp8WlV", "title": "Advancements in Arabic Grammatical Error Detection and Correction: An Empirical Investigation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Grammatical error correction (GEC) is a well-explored problem in English with many existing models and datasets. However, research on GEC in morphologically rich languages has been limited due to challenges such as data scarcity and language complexity.\nIn this paper, we present the first results on Arabic GEC using two newly developed Transformer-based pretrained sequence-to-sequence models. We also define the task of multi-class Arabic grammatical error detection (GED) and present the first results on multi-class Arabic GED. We show that using GED information as auxiliary input in GEC models improves GEC performance across three datasets spanning different genres. Moreover, we also investigate the use of contextual morphological preprocessing in aiding GEC systems.\nOur models achieve SOTA results on two Arabic GEC shared task datasets and establish a strong benchmark on a recently created dataset. We make our code, data, and pretrained models publicly available.", "keywords": "Grammatical Error Correction;Grammatical Error Detection;Arabic", "primary_area": "", "supplementary_material": "", "author": "Bashar Alhafni;Go Inoue;Christian Khairallah;Nizar Habash", "authorids": "~Bashar_Alhafni1;~Go_Inoue1;~Christian_Khairallah1;~Nizar_Habash1", "gender": "M;;M;M", "homepage": "https://basharalhafni.com/;https://go-inoue.github.io/;;https://www.nizarhabash.com/", "dblp": "234/6160.html;204/1153.html;;34/1998", "google_scholar": "DFkVNJwAAAAJ;h7prKx0AAAAJ;9ICGiFQAAAAJ;DGb-sBwAAAAJ", "or_profile": "~Bashar_Alhafni1;~Go_Inoue1;~Christian_Khairallah1;~Nizar_Habash1", "aff": "New York University;Mohamed bin Zayed University of Artificial Intelligence;New York University;New York University Abu Dhabi", "aff_domain": "nyu.edu;mbzuai.ac.ae;nyu.edu;nyu.edu", "position": "PhD student;PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nalhafni2023advancements,\ntitle={Advancements in Arabic Grammatical Error Detection and Correction: An Empirical Investigation},\nauthor={Bashar Alhafni and Go Inoue and Christian Khairallah and Nizar Habash},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=cBhzqp8WlV}\n}", "github": "", "project": "", "reviewers": "XEzF;2Jvn;51Ai", "site": "https://openreview.net/forum?id=cBhzqp8WlV", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;5;4", "excitement": "4;4;3", "reproducibility": "5;3;3", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-1831-3457", "linkedin": "balhafni;;;nizar-habash-5792a46/", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "New York University;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.nyu.edu;https://mbzuai.ac.ae", "aff_unique_abbr": "NYU;MBZUAI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Abu Dhabi", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "United States;United Arab Emirates" }, { "id": "cCJGuKJYG8", "title": "Towards Reliable Misinformation Mitigation: Generalization, Uncertainty, and GPT-4", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Misinformation poses a critical societal challenge, and current approaches have yet to produce an effective solution. We propose focusing on generalization, uncertainty, and how to leverage recent large language models, in order to create more practical tools to evaluate information veracity in contexts where perfect classification is impossible. We first demonstrate that GPT-4 can outperform prior methods in multiple settings and languages. Next, we explore generalization, revealing that GPT-4 and RoBERTa-large exhibit differences in failure modes. Third, we propose techniques to handle uncertainty that can detect impossible examples and strongly improve outcomes. We also discuss results on other language models, temperature, prompting, versioning, explainability, and web retrieval, each one providing practical insights and directions for future research. Finally, we publish the LIAR-New dataset with novel paired English and French misinformation data and Possibility labels that indicate if there is sufficient context for veracity evaluation. Overall, this research lays the groundwork for future tools that can drive real-world progress to combat misinformation.", "keywords": "misinformation;LLM;GPT-4;uncertainty quantification;generalization", "primary_area": "", "supplementary_material": "", "author": "Kellin Pelrine;Anne Imouza;Camille Thibault;Meilina Reksoprodjo;Caleb Alexander Gupta;Joel Christoph;Jean-Fran\u00e7ois Godbout;Reihaneh Rabbany", "authorids": "~Kellin_Pelrine1;~Anne_Imouza1;~Camille_Thibault1;~Meilina_Reksoprodjo1;~Caleb_Alexander_Gupta1;~Joel_Christoph1;~Jean-Fran\u00e7ois_Godbout1;~Reihaneh_Rabbany1", "gender": ";F;F;F;M;M;M;F", "homepage": "https://kellinpelrine.github.io/;;;https://meilinar.github.io/;https://caleb.guptafamilyri.com/;https://www.eui.eu/people?id=joel-christoph-1;https://jf-godbout.github.io/;http://www.reirab.com/", "dblp": "281/0602;;;;;;213/9094;94/9024", "google_scholar": "_s2HT_0AAAAJ;;r0mfGz0AAAAJ;y18A1xcAAAAJ;;;SsVx064AAAAJ;https://scholar.google.ca/citations?user=Foh_c-QAAAAJ", "or_profile": "~Kellin_Pelrine1;~Anne_Imouza1;~Camille_Thibault1;~Meilina_Reksoprodjo1;~Caleb_Alexander_Gupta1;~Joel_Christoph1;~Jean-Fran\u00e7ois_Godbout1;~Reihaneh_Rabbany1", "aff": "McGill University;McGill University, McGill University;Universit\u00e9 de Montr\u00e9al;Eindhoven University of Technology;University of Pennsylvania;European University Institute;Universit\u00e9 de Montr\u00e9al;Montreal Institute for Learning Algorithms, University of Montreal, University of Montreal", "aff_domain": "mcgill.ca;mail.mcgill.ca;umontreal.ca;tue.nl;upenn.edu;eui.eu;umontreal.ca;mila.umontreal.ca", "position": "PhD student;PhD student;Undergrad student;MS student;Undergrad student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\npelrine2023towards,\ntitle={Towards Reliable Misinformation Mitigation: Generalization, Uncertainty, and {GPT}-4},\nauthor={Kellin Pelrine and Anne Imouza and Camille Thibault and Meilina Reksoprodjo and Caleb Alexander Gupta and Joel Christoph and Jean-Fran{\\c{c}}ois Godbout and Reihaneh Rabbany},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=cCJGuKJYG8}\n}", "github": "", "project": "", "reviewers": "FWg4;H7JA;nBWy", "site": "https://openreview.net/forum?id=cCJGuKJYG8", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;5;3", "excitement": "3;5;4", "reproducibility": "5;3;5", "correctness": "2;5;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-4445-4074;;;;", "linkedin": "kellin-pelrine/;anne-imz-a7a21a1a5/;camille-thibault-2b9929220?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3Bfq5FubvwTQKmytoHAUVB7A%3D%3D;meilina/;caleb-gupta/;joelchristoph/;;", "aff_unique_index": "0;0;1;2;3;4;1;5", "aff_unique_norm": "McGill University;Universit\u00e9 de Montr\u00e9al;Eindhoven University of Technology;University of Pennsylvania;European University Institute;University of Montreal", "aff_unique_dep": ";;;;;Montreal Institute for Learning Algorithms", "aff_unique_url": "https://www.mcgill.ca;https://www.umontreal.ca;https://www.tue.nl;https://www.upenn.edu;https://www.eui.europa.eu;https://www.umontreal.ca", "aff_unique_abbr": "McGill;UdeM;TU/e;UPenn;EUI;UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0;0;1;2;3;0;0", "aff_country_unique": "Canada;Netherlands;United States;Italy" }, { "id": "cD9blNBYF2", "title": "DialogQAE: N-to-N Question Answer Pair Extraction from Customer Service Chatlog", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Harvesting question-answer (QA) pairs from customer service chatlog in the wild is an efficient way to enrich the knowledge base for customer service chatbots in the cold start or continuous integration scenarios. Prior work attempts to obtain 1-to-1 QA pairs from growing customer service chatlog, which fails to integrate the incomplete utterances from the dialog context for composite QA retrieval. In this paper, we propose N-to-N QA extraction task in which the derived questions and corresponding answers might be separated across different utterances. We introduce a suite of generative/discriminative tagging based methods with end-to-end and two-stage variants that perform well on 5 customer service datasets and for the first time setup a benchmark for N-to-N DialogQAE with utterance and session level evaluation metrics. With a deep dive into extracted QA pairs, we find that the relations between and inside the QA pairs can be indicators to analyze the dialogue structure, e.g. information seeking, clarification, barge-in and elaboration. We also show that the proposed models can adapt to different domains and languages, and reduce the labor cost of knowledge accumulation in the real-world product dialogue platform.", "keywords": "Dialogue QA Extraction", "primary_area": "", "supplementary_material": "", "author": "Xin Zheng;Tianyu Liu;Haoran Meng;Xu Wang;Yufan Jiang;Mengliang Rao;Binghuai Lin;Yunbo Cao;Zhifang Sui", "authorids": "~Xin_Zheng6;~Tianyu_Liu3;~Haoran_Meng1;~Xu_Wang25;~Yufan_Jiang1;~Mengliang_Rao1;~Binghuai_Lin1;~Yunbo_Cao3;~Zhifang_Sui1", "gender": "M;M;M;F;M;M;;M;F", "homepage": ";;https://scholar.google.com/citations?hl=en&pli=1&user=L4VAZVEAAAAJ;;;https://scholar.google.com/citations?user=_qX_eRUAAAAJ&hl=en;;;http://eecs.pku.edu.cn/EN/People/Faculty/Detail/?ID=6024", "dblp": ";134/1099-1;;;;;146/2946;33/4066.html;", "google_scholar": "jTn0fiAAAAAJ;https://scholar.google.com.hk/citations?user=6hHbBwwAAAAJ;L4VAZVEAAAAJ;;95ABG3wAAAAJ;;;nNVDLb4AAAAJ;", "or_profile": "~Xin_Zheng6;~Tianyu_Liu3;~Haoran_Meng1;~Xu_Wang25;~Yufan_Jiang1;~Mengliang_Rao1;~Binghuai_Lin1;~Yunbo_Cao3;~Zhifang_Sui1", "aff": "University of Chinese Academy of Sciences;Tencent Cloud AI (LLM);Peking University;Tencent AI Lab;Tencent Cloud Xiaowei;;Tencent;Tencent;Peking University", "aff_domain": "ucas.ac.cn;tencent.com;pku.edu.cn;tencent.com;tencent.com;;tencent.com;tencent.com;pku.edu.cn", "position": "PhD student;Senior Researcher;MS student;Researcher;Researcher;;Principal Researcher;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nzheng2023dialogqae,\ntitle={Dialog{QAE}: N-to-N Question Answer Pair Extraction from Customer Service Chatlog},\nauthor={Xin Zheng and Tianyu Liu and Haoran Meng and Xu Wang and Yufan Jiang and Mengliang Rao and Binghuai Lin and Yunbo Cao and Zhifang Sui},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=cD9blNBYF2}\n}", "github": "", "project": "", "reviewers": "psJC;AycG;VBi6", "site": "https://openreview.net/forum?id=cD9blNBYF2", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;3", "excitement": "2;3;4", "reproducibility": "3;2;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;;", "linkedin": ";;;;;;;;", "aff_unique_index": "0;1;2;1;1;1;1;2", "aff_unique_norm": "University of Chinese Academy of Sciences;Tencent;Peking University", "aff_unique_dep": ";LLM;", "aff_unique_url": "http://www.ucas.ac.cn;https://cloud.tencent.com;http://www.pku.edu.cn", "aff_unique_abbr": "UCAS;Tencent AI;Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "cFXHe1mW7V", "title": "Can You Follow Me? Testing Situational Understanding for ChatGPT", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Understanding sentence meanings and updating information states appropriately across time---what we call ``situational understanding'' (SU)---is a critical ability for human-like AI agents. SU is essential in particular for chat models, such as ChatGPT, to enable consistent, coherent, and effective dialogue between humans and AI. Previous works have identified certain SU limitations in non-chatbot Large Language models (LLMs), but the extent and causes of these limitations are not well understood, and capabilities of current chat-based models in this domain have not been explored. In this work we tackle these questions, proposing a novel synthetic environment for SU testing which allows us to do controlled and systematic testing of SU in chat-oriented models, through assessment of models' ability to track and enumerate environment states. Our environment also allows for close analysis of dynamics of model performance, to better understand underlying causes for performance patterns. We apply our test to ChatGPT, the state-of-the-art chatbot, and find that despite the fundamental simplicity of the task, the model's performance reflects an inability to retain correct environment states across time. Our follow-up analyses suggest that performance degradation is largely because ChatGPT has non-persistent in-context memory (although it can access the full dialogue history) and it is susceptible to hallucinated updates---including updates that artificially inflate accuracies. Our findings suggest overall that ChatGPT is not currently equipped for robust tracking of situation states, and that trust in the impressive dialogue performance of ChatGPT comes with risks. We release the codebase for reproducing our test environment, as well as all prompts and API responses from ChatGPT, at https://github.com/yangalan123/SituationalTesting.", "keywords": "Situational Understanding;Analysis of Models;ChatGPT", "primary_area": "", "supplementary_material": "", "author": "Chenghao Yang;Allyson Ettinger", "authorids": "~Chenghao_Yang1;~Allyson_Ettinger1", "gender": "M;F", "homepage": "https://yangalan123.github.io/;https://aetting.github.io", "dblp": "229/4179;165/0758", "google_scholar": "B28fiOAAAAAJ;", "or_profile": "~Chenghao_Yang1;~Allyson_Ettinger1", "aff": "Google;University of Chicago", "aff_domain": "google.com;uchicago.edu", "position": "Student Researcher;Assistant Professor", "bibtex": "@inproceedings{\nyang2023can,\ntitle={Can You Follow Me? Testing Situational Understanding for Chat{GPT}},\nauthor={Chenghao Yang and Allyson Ettinger},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=cFXHe1mW7V}\n}", "github": "", "project": "", "reviewers": "q8HG;rJkr;xaGB", "site": "https://openreview.net/forum?id=cFXHe1mW7V", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;3;3", "reproducibility": "4;2;5", "correctness": "5;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": "chenghao-yang-857b51178/;", "aff_unique_index": "0;1", "aff_unique_norm": "Google;University of Chicago", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.uchicago.edu", "aff_unique_abbr": "Google;UChicago", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "cFsfgaEMlw", "title": "4 and 7-bit Labeling for Projective and Non-Projective Dependency Trees", "track": "main", "status": "Short Main", "tldr": "", "abstract": "We introduce an encoding for parsing as sequence labeling that can represent any projective dependency tree as a sequence of 4-bit labels, one per word. The bits in each word's label represent (1) whether it is a right or left dependent, (2) whether it is the outermost (left/right) dependent of its parent, (3) whether it has any left children and (4) whether it has any right children. We show that this provides an injective mapping from trees to labels that can be encoded and decoded in linear time. We then define a 7-bit extension that represents an extra plane of arcs, extending the coverage to almost full non-projectivity (over 99.9% empirical arc coverage). Results on a set of diverse treebanks show that our 7-bit encoding obtains substantial accuracy gains over the previously best-performing sequence labeling encodings.", "keywords": "parsing;dependency parsing;sequence labeling;parsing as sequence labeling;encoding", "primary_area": "", "supplementary_material": "", "author": "Carlos G\u00f3mez-Rodr\u00edguez;Diego Roca;David Vilares", "authorids": "~Carlos_G\u00f3mez-Rodr\u00edguez1;~Diego_Roca1;~David_Vilares1", "gender": "M;M;M", "homepage": "http://www.grupolys.org/~cgomezr;https://github.com/Polifack;https://www.grupolys.org/~david.vilares/", "dblp": "95/3319;;128/2835", "google_scholar": "BeNhySQAAAAJ;;https://scholar.google.es/citations?user=3KHyYsMAAAAJ", "or_profile": "~Carlos_G\u00f3mez-Rodr\u00edguez1;~Diego_Roca1;~David_Vilares1", "aff": "Universidade da Coru\u00f1a;Universidad de La Coru\u00f1a;Universidade da Coru\u00f1a", "aff_domain": "udc.es;udc.es;udc.es", "position": "Full Professor;MS student;Assistant Professor", "bibtex": "@inproceedings{\ng{\\'o}mez-rodr{\\'\\i}guez2023,\ntitle={4 and 7-bit Labeling for Projective and Non-Projective Dependency Trees},\nauthor={Carlos G{\\'o}mez-Rodr{\\'\\i}guez and Diego Roca and David Vilares},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=cFsfgaEMlw}\n}", "github": "", "project": "", "reviewers": "JxDe;U1jy;SmyL", "site": "https://openreview.net/forum?id=cFsfgaEMlw", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;4", "excitement": "4;4;4", "reproducibility": "3;5;4", "correctness": "4;4;5", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-0752-8812;;0000-0002-1295-3840", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of A Coru\u00f1a", "aff_unique_dep": "", "aff_unique_url": "https://www.udc.es", "aff_unique_abbr": "UDC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "A Coru\u00f1a;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Spain" }, { "id": "cI6oe7i5mj", "title": "GPT Deciphering Fedspeak: Quantifying Dissent Among Hawks and Doves", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Markets and policymakers around the world hang on the consequential monetary policy decisions made by the Federal Open Market Committee (FOMC). Publicly available textual documentation of their meetings provides insight into members\u2019 attitudes about the economy. We use GPT-4 to quantify dissent among members on the topic of inflation. We find that transcripts and minutes reflect the diversity of member views about the macroeconomic outlook in a way that is lost or omitted from the public statements. In fact, diverging opinions that shed light upon the committee\u2019s \"true\" attitudes are almost entirely omitted from the final statements. Hence, we argue that forecasting FOMC sentiment based solely on statements will not sufficiently reflect dissent among the hawks and doves.", "keywords": "FOMC;Fed;GPT;LLM;dissent", "primary_area": "", "supplementary_material": "", "author": "Denis Peskoff;Adam Visokay;Sander V Schulhoff;Benjamin Wachspress;alan blinder;Brandon M. Stewart", "authorids": "~Denis_Peskoff1;~Adam_Visokay1;~Sander_V_Schulhoff1;~Benjamin_Wachspress1;~alan_blinder1;~Brandon_M._Stewart2", "gender": "M;M;;;M;", "homepage": "https://avisokay.github.io/;https://trigaten.github.io;;;https://scholar.princeton.edu/bstewart;https://denis.ai", "dblp": ";;;;136/8689;203/9242", "google_scholar": ";;;;miPgny8AAAAJ;MzAxJT8AAAAJ", "or_profile": "~Adam_Visokay1;~Sander_V_Schulhoff1;~Benjamin_Wachspress1;~alan_blinder1;~Brandon_Stewart1;~Denis_Peskov1", "aff": "University of Washington;University of Maryland, College Park;Princeton University;;Princeton University;Princeton University", "aff_domain": "u.washington.edu;umd.edu;princeton.edu;;princeton.edu;princeton.edu", "position": "PhD student;Undergrad student;Undergrad student;;Associate Professor;Postdoc", "bibtex": "@inproceedings{\npeskoff2023gpt,\ntitle={{GPT} Deciphering Fedspeak: Quantifying Dissent Among Hawks and Doves},\nauthor={Denis Peskoff and Adam Visokay and Sander V Schulhoff and Benjamin Wachspress and alan blinder and Brandon M. Stewart},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=cI6oe7i5mj}\n}", "github": "", "project": "", "reviewers": "gKYa;TSKC;dn37", "site": "https://openreview.net/forum?id=cI6oe7i5mj", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "4;4;3", "reproducibility": "4;4;2", "correctness": "3;4;2", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-7657-3089;", "linkedin": "avisokay/;;benwachspress/;;;", "aff_unique_index": "0;1;2;2;2", "aff_unique_norm": "University of Washington;University of Maryland;Princeton University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.washington.edu;https://www/umd.edu;https://www.princeton.edu", "aff_unique_abbr": "UW;UMD;Princeton", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Park", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "cMMxJxzYkZ", "title": "Harnessing the Power of Large Language Models for Empathetic Response Generation: Empirical Investigations and Improvements", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Empathetic dialogue is an indispensable part of building harmonious social relationships and contributes to the development of a helpful AI. Previous approaches are mainly based on fine small-scale language models. With the advent of ChatGPT, the application effect of large language models (LLMs) in this field has attracted great attention. This work empirically investigates the performance of LLMs in generating empathetic responses and proposes three improvement methods of semantically similar in-context learning, two-stage interactive generation, and combination with the knowledge base. Extensive experiments show that LLMs can significantly benefit from our proposed methods and is able to achieve state-of-the-art performance in both automatic and human evaluations. Additionally, we explore the possibility of GPT-4 simulating human evaluators.", "keywords": "Empathetic Response Generation;Large Language Models;ChatGPT", "primary_area": "", "supplementary_material": "", "author": "Yushan Qian;Weinan Zhang;Ting Liu", "authorids": "~Yushan_Qian1;~Weinan_Zhang4;~Ting_Liu2", "gender": ";M;M", "homepage": ";https://homepage.hit.edu.cn/zhangweinan;", "dblp": ";28/10261-3;52/5150-1", "google_scholar": ";DBLdEf4AAAAJ;zyMJ1V0AAAAJ", "or_profile": "~Yushan_Qian1;~Weinan_Zhang4;~Ting_Liu2", "aff": ";Harbin Institute of Technology;Harbin Institute of Technology", "aff_domain": ";hit.edu.cn;hit.edu.cn", "position": ";Full Professor;Full Professor", "bibtex": "@inproceedings{\nqian2023harnessing,\ntitle={Harnessing the Power of Large Language Models for Empathetic Response Generation: Empirical Investigations and Improvements},\nauthor={Yushan Qian and Weinan Zhang and Ting Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=cMMxJxzYkZ}\n}", "github": "", "project": "", "reviewers": "UTmi;emhy;h8tQ;LJBU;5Xfj", "site": "https://openreview.net/forum?id=cMMxJxzYkZ", "pdf_size": 0, "rating": "5;5;5;5;5", "confidence": "4;3;4;5;4", "excitement": "3;3;3;4;3", "reproducibility": "4;4;5;4;4", "correctness": "4;4;4;4;3", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.2, "reproducibility_avg": 4.2, "correctness_avg": 3.8, "replies_avg": 16, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0", "aff_unique_norm": "Harbin Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hit.edu.cn/", "aff_unique_abbr": "HIT", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Harbin", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "cOxL1tlSQw", "title": "Dynamic Stance: Modeling Discussions by Labeling the Interactions", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Stance detection is an increasingly popular task that has been mainly modeled as a static task, by assigning the expressed attitude of a text toward a given topic. Such a framing presents limitations, with trained systems showing poor generalization capabilities and being strongly topic-dependent. In this work, we propose modeling stance as a dynamic task, by focusing on the interactions between a message and their replies. For this purpose, we present a new annotation scheme that enables the categorization of all kinds of textual interactions. As a result, we have created a new corpus, the Dynamic Stance Corpus (DySC), consisting of three datasets in two middle-resourced languages: Catalan and Dutch. Our data analysis further supports our modeling decisions, empirically showing differences between the annotation of stance in static and dynamic contexts. We fine-tuned a series of monolingual and multilingual models on DySC, showing portability across topics and languages.", "keywords": "stance;corpus;multi-lingual;cross-topic", "primary_area": "", "supplementary_material": "", "author": "Blanca Calvo Figueras;Irene Baucells;Tommaso Caselli", "authorids": "~Blanca_Calvo_Figueras1;~Irene_Baucells1;~Tommaso_Caselli1", "gender": "F;F;Not Specified", "homepage": ";;https://research.rug.nl/en/persons/tommaso-caselli", "dblp": "270/2029.html;356/8512;85/7943", "google_scholar": "https://scholar.google.es/citations?user=lTEWac0AAAAJ;OsyhV-wAAAAJ;fxQvP_QAAAAJ", "or_profile": "~Blanca_Calvo_Figueras1;~Irene_Baucells1;~Tommaso_Caselli1", "aff": "Barcelona Supercomputing Center;Universidad del Pa\u00eds Vasco;University of Groningen", "aff_domain": "bsc.es;ehu.es;rug.nl", "position": "Researcher;MS student;Assistant Professor", "bibtex": "@inproceedings{\nfigueras2023dynamic,\ntitle={Dynamic Stance: Modeling Discussions by Labeling the Interactions},\nauthor={Blanca Calvo Figueras and Irene Baucells and Tommaso Caselli},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=cOxL1tlSQw}\n}", "github": "", "project": "", "reviewers": "m8Kb;Q591;yDAp", "site": "https://openreview.net/forum?id=cOxL1tlSQw", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "4;3;3", "reproducibility": "4;5;3", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-6939-3576;0009-0007-9069-6548;0000-0003-2936-0256", "linkedin": ";irene-baucells-de-la-pe%C3%B1a-967289238/;", "aff_unique_index": "0;1;2", "aff_unique_norm": "Barcelona Supercomputing Center;Universidad del Pa\u00eds Vasco;University of Groningen", "aff_unique_dep": ";;", "aff_unique_url": "https://www.bsc.es;https://www.ehu.eus/en;https://www.rug.nl", "aff_unique_abbr": "BSC;UPV/EHU;RUG", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Spain;Netherlands" }, { "id": "cVAHzYRVUO", "title": "Inductive Relation Inference of Knowledge Graph Enhanced by Ontology Information", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The inductive inference of the knowledge graph aims to complete the potential relations between the new unknown entities in the graph. Most existing methods are based on entity-independent features such as graph structure information and relationship information to inference. However, the neighborhood of these new entities is often too sparse to obtain enough information to build these features effectively. In this work, we propose a knowledge graph inductive inference method that fuses ontology information. Based on the enclosing subgraph, we bring in feature embeddings of concepts corresponding to entities to learn the semantic information implicit in the ontology. Considering that the ontology information of entities may be missing, we build a type constraint regular loss to explicitly model the semantic connections between entities and concepts, and thus capture the missing concepts of entities. Experimental results show that our approach significantly outperforms large language models like ChatGPT on two benchmark datasets, YAGO21K-610 and DB45K-165, and improves the MRR metrics by 15.4% and 44.1%, respectively, when compared with the state-of-the-art methods.", "keywords": "Ontology Information;Inductive Relation Inference;Knowledge Graph", "primary_area": "", "supplementary_material": "", "author": "Wentao Zhou;Jun Zhao;Tao Gui;Qi Zhang;Xuanjing Huang", "authorids": "~Wentao_Zhou1;~Jun_Zhao5;~Tao_Gui1;~Qi_Zhang8;~Xuanjing_Huang1", "gender": ";M;M;M;F", "homepage": "https://chasers-of-qs.github.io/WentaoZhou.github.io/;;;http://qizhang.info;https://xuanjing-huang.github.io/", "dblp": ";;135/6973;52/323-1;05/6735-1", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;;XfqR3yYAAAAJ;RGsMgZA4H78C", "or_profile": "~Wentao_Zhou1;~Jun_Zhao5;~Tao_Gui1;~Qi_Zhang8;~Xuanjing_Huang1", "aff": "Fudan University;Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "MS student;PhD student;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhou2023inductive,\ntitle={Inductive Relation Inference of Knowledge Graph Enhanced by Ontology Information},\nauthor={Wentao Zhou and Jun Zhao and Tao Gui and Qi Zhang and Xuanjing Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=cVAHzYRVUO}\n}", "github": "", "project": "", "reviewers": "6Xgf;pao7;TJtS", "site": "https://openreview.net/forum?id=cVAHzYRVUO", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;4;4", "excitement": "4;3;3", "reproducibility": "3;3;3", "correctness": "4;3;4", "rating_avg": 2.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0001-9197-9426", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "cWw5FfVhvl", "title": "MoT: Memory-of-Thought Enables ChatGPT to Self-Improve", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large Language Models (LLMs) have shown impressive abilities on various tasks. However, fundamentally improving them depends on high-quality datasets or computationally expensive fine-tuning. On the contrary, humans can easily improve themselves by self-thinking and memory, without external resources. In this paper, we propose a framework, **MoT**, to let the LLM self-improve through **M**emory **o**f **T**houghts, without annotated datasets and parameter updates. Specifically, MoT is divided into two stages: 1. before the test stage, the LLM pre-thinks on the unlabeled dataset and saves the high-confidence thoughts as external memory; 2. During the test stage, given a test question, the LLM recalls relevant memory to help itself reason and answer it. Experimental results show that MoT can help ChatGPT significantly improve its abilities in arithmetic reasoning, commonsense reasoning, factual reasoning, and natural language inference. Further analyses show that each component contributes critically to the improvements and MoT can lead to consistent improvements across various CoT methods and LLMs.", "keywords": "LLM;ChatGPT;Self-Improve;Large Language Model;Memory", "primary_area": "", "supplementary_material": "", "author": "Xiaonan Li;Xipeng Qiu", "authorids": "~Xiaonan_Li1;~Xipeng_Qiu1", "gender": "M;M", "homepage": ";https://xpqiu.github.io/", "dblp": "84/6885;69/1395", "google_scholar": "ldEcEjEAAAAJ;Pq4Yp_kAAAAJ", "or_profile": "~Xiaonan_Li1;~Xipeng_Qiu1", "aff": "Fudan University;Fudan University", "aff_domain": "fudan.edu;fudan.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nli2023mot,\ntitle={MoT: Memory-of-Thought Enables Chat{GPT} to Self-Improve},\nauthor={Xiaonan Li and Xipeng Qiu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=cWw5FfVhvl}\n}", "github": "", "project": "", "reviewers": "E9wd;qXWz;v4Dz", "site": "https://openreview.net/forum?id=cWw5FfVhvl", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;3;4", "excitement": "3;4;4", "reproducibility": "4;2;5", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-7163-5247", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "cgmlfA1sPl", "title": "Late Fusion of Transformers for Sentiment Analysis of Code-Switched Data", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Code-switching is a common phenomenon in multilingual communities and is often used on social media. However, sentiment analysis of code-switched data is a challenging yet less explored area of research. This paper aims to develop a sentiment analysis system for code-switched data. In this paper, we present a novel approach combining two transformers using logits of their output and feeding them to a neural network for classification. We show the efficacy of our approach using two benchmark datasets, viz., English-Hindi (En-Hi), and English-Spanish (En-Es) availed by Microsoft GLUECoS. Our approach results in an F1 score of 73.66% for En-Es and 61.24% for En-Hi, significantly higher than the best model reported for the GLUECoS benchmark dataset.", "keywords": "Code-switched;Transformer;Sentiment Analysis;GLUECoS benchmark dataset;Late Fusion;Neural Network", "primary_area": "", "supplementary_material": "", "author": "Gagan Sharma;R Chinmay;Raksha Sharma", "authorids": "~Gagan_Sharma1;~R_Chinmay1;~Raksha_Sharma1", "gender": "M;M;F", "homepage": "https://gagansh7171.github.io/#/;;https://www.iitr.ac.in/~CSE/Raksha_Sharma", "dblp": ";;46/7472.html", "google_scholar": ";;https://scholar.google.co.in/citations?user=V9oafzsAAAAJ", "or_profile": "~Gagan_Sharma1;~R_Chinmay1;~Raksha_Sharma1", "aff": "Indian Institute of Technology, Roorkee;Indian Institute of Technology, Roorkee;", "aff_domain": "iitr.ac.in;iitr.ac.in;", "position": "Undergrad student;Undergrad student;", "bibtex": "@inproceedings{\nsharma2023late,\ntitle={Late Fusion of Transformers for Sentiment Analysis of Code-Switched Data},\nauthor={Gagan Sharma and R Chinmay and Raksha Sharma},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=cgmlfA1sPl}\n}", "github": "", "project": "", "reviewers": "L6it;FBU8;ZSXs", "site": "https://openreview.net/forum?id=cgmlfA1sPl", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;4;4", "excitement": "4;3;2", "reproducibility": "4;4;4", "correctness": "4;3;2", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "gagan-sharma-206303/;r-chinmay-b79a60192;raksha-sharma-1a216410/", "aff_unique_index": "0;0", "aff_unique_norm": "Indian Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.iitr.ac.in", "aff_unique_abbr": "IIT Roorkee", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Roorkee", "aff_country_unique_index": "0;0", "aff_country_unique": "India" }, { "id": "chCrhE2kl4", "title": "TK-KNN: A Balanced Distance-Based Pseudo Labeling Approach for Semi-Supervised Intent Classification", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The ability to detect intent in dialogue systems has become increasingly important in modern technology.\nThese systems often generate a large amount of unlabeled data, and manually labeling this data requires substantial human effort.\nSemi-supervised methods attempt to remedy this cost by using a model trained on a few labeled examples and then by assigning pseudo-labels to further a subset of unlabeled examples that has a model prediction confidence higher than a certain threshold. However, one particularly perilous consequence of these methods is the risk of picking an imbalanced set of examples across classes, which could lead to poor labels. In the present work, we describe Top-K K-Nearest Neighbor (TK-KNN), which uses a more robust pseudo-labeling approach based on distance in the embedding space while maintaining a balanced set of pseudo-labeled examples across classes through a ranking-based approach. Experiments on several datasets show that TK-KNN outperforms existing models, particularly when labeled data is scarce on popular datasets such as CLINC150 and Banking77.", "keywords": "semi-supervised learning;intent classification;contrastive learning", "primary_area": "", "supplementary_material": "", "author": "Nicholas Botzer;David Vazquez;Tim Weninger;Issam H. Laradji", "authorids": "~Nicholas_Botzer1;~David_Vazquez1;~Tim_Weninger1;~Issam_H._Laradji1", "gender": "M;M;M;M", "homepage": ";http://www.david-vazquez.com;https://www3.nd.edu/~tweninge/;https://issamlaradji.github.io/", "dblp": ";94/8653;73/2015;142/0043", "google_scholar": "5rS0yCoAAAAJ;1jHvtfsAAAAJ;V1js0MUAAAAJ;https://scholar.google.ca/citations?user=8vRS7F0AAAAJ", "or_profile": "~Nicholas_Botzer1;~David_Vazquez1;~Tim_Weninger1;~Issam_H._Laradji1", "aff": "University of Notre Dame;ServiceNow research;University of Notre Dame;ServiceNow", "aff_domain": "nd.edu;servicenow.com;nd.edu;servicenow.com", "position": "PhD student;Researcher;Associate Professor;Researcher", "bibtex": "@inproceedings{\nbotzer2023tkknn,\ntitle={{TK}-{KNN}: A Balanced Distance-Based Pseudo Labeling Approach for Semi-Supervised Intent Classification},\nauthor={Nicholas Botzer and David Vazquez and Tim Weninger and Issam H. Laradji},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=chCrhE2kl4}\n}", "github": "", "project": "", "reviewers": "67QQ;Rv94;Z4is", "site": "https://openreview.net/forum?id=chCrhE2kl4", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "4;3;3", "reproducibility": "3;4;4", "correctness": "3;2;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-2845-8158;0000-0003-3164-2615;", "linkedin": ";https://www.linkedin.com/company/david-vazquez/;tim-weninger-b462277b/;issam-laradji-67ba1a99/", "aff_unique_index": "0;1;0;1", "aff_unique_norm": "University of Notre Dame;ServiceNow", "aff_unique_dep": ";research", "aff_unique_url": "https://www.nd.edu;https://www.servicenow.com", "aff_unique_abbr": "Notre Dame;ServiceNow", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "ci6cexmrmD", "title": "DepWiGNN: A Depth-wise Graph Neural Network for Multi-hop Spatial Reasoning in Text", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Spatial reasoning in text plays a crucial role in various real-world applications. Existing approaches for spatial reasoning typically infer spatial relations from pure text, which overlook the gap between natural language and symbolic structures. Graph neural networks (GNNs) have showcased exceptional proficiency in inducing and aggregating symbolic structures. However, classical GNNs face challenges in handling multi-hop spatial reasoning due to the over-smoothing issue, i.e., the performance decreases substantially as the number of graph layers increases. To cope with these challenges, we propose a novel Depth-Wise Graph Neural Network (DepWiGNN). Specifically, we design a novel node memory scheme and aggregate the information over the depth dimension instead of the breadth dimension of the graph, which empowers the ability to collect long dependencies without stacking multiple layers. Experimental results on two challenging multi-hop spatial reasoning datasets show that DepWiGNN outperforms existing spatial reasoning methods. The comparisons with the other three GNNs further demonstrate its superiority in capturing long dependency in the graph.", "keywords": "Spatial Reasoning;Tensor Product Representation;Graph Neural Network", "primary_area": "", "supplementary_material": "", "author": "Shuaiyi Li;Yang Deng;Wai Lam", "authorids": "~Shuaiyi_Li1;~Yang_Deng4;~Wai_Lam1", "gender": "M;M;M", "homepage": "https://github.com/Syon-Li;https://dengyang17.github.io/;http://www.se.cuhk.edu.hk/~textmine", "dblp": "293/6684;115/6282-2;48/1707", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=OshWT3UAAAAJ;ewA4NAcAAAAJ", "or_profile": "~Shuaiyi_Li1;~Yang_Deng4;~Wai_Lam1", "aff": "Chinese University of Hong Kong, The Chinese University of Hong Kong;The Chinese University of Hong Kong;The Chinese University of Hong Kong", "aff_domain": "se.cuhk.edu.hk;cuhk.edu.hk;cuhk.edu.hk", "position": "PhD student;PhD student;Professor", "bibtex": "@inproceedings{\nli2023depwignn,\ntitle={DepWi{GNN}: A Depth-wise Graph Neural Network for Multi-hop Spatial Reasoning in Text},\nauthor={Shuaiyi Li and Yang Deng and Wai Lam},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ci6cexmrmD}\n}", "github": "", "project": "", "reviewers": "b7C1;YL2U;jQ72", "site": "https://openreview.net/forum?id=ci6cexmrmD", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "4;3;3", "reproducibility": "4;4;4", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0001-7014-5251;;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cuhk.edu.hk", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "cjbdRN8Yxy", "title": "Compressing Context to Enhance Inference Efficiency of Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) achieved remarkable performance across various tasks. However, they face challenges in managing long documents and extended conversations, due to significantly increased computational requirements, both in memory and inference time, and potential context truncation when the input exceeds the LLM's fixed context length. This paper proposes a method called \\textit{Selective Context} that enhances the inference efficiency of LLMs by identifying and pruning redundancy in the input context to make the input more compact. We test our approach using common data sources requiring long context processing: arXiv papers, news articles, and long conversations, on tasks of summarisation, question answering, and response generation. Experimental results show that Selective Context significantly reduces memory cost and decreases generation latency while maintaining comparable performance compared to that achieved when full context is used. Specifically, we achieve a 50\\% reduction in context cost, resulting in a 36\\% reduction in inference memory usage and a 32\\% reduction in inference time, while observing only a minor drop of .023 in BERTscore and .038 in faithfulness on four downstream applications, indicating that our method strikes a good balance between efficiency and performance.", "keywords": "Large Language Model;Input Compression", "primary_area": "", "supplementary_material": "", "author": "YUCHENG LI;BO DONG;Frank Guerin;Chenghua Lin", "authorids": "~YUCHENG_LI2;~BO_DONG13;~Frank_Guerin3;~Chenghua_Lin1", "gender": "M;M;M;", "homepage": ";https://github.com/JacksonDongg;https://www.surrey.ac.uk/people/frank-guerin;", "dblp": "72/7816-1;;66/1072.html;", "google_scholar": ";;https://scholar.google.co.uk/citations?user=OK8EYAoAAAAJ;", "or_profile": "~YUCHENG_LI2;~BO_DONG13;~Frank_Guerin3;~Chenghua_Lin1", "aff": "University of Surrey;University of Surrey;University of Surrey;", "aff_domain": "surrey.ac.uk;surrey.ac.uk;surrey.ac.uk;", "position": "PhD student;MS student;Lecturer;", "bibtex": "@inproceedings{\nli2023compressing,\ntitle={Compressing Context to Enhance Inference Efficiency of Large Language Models},\nauthor={YUCHENG LI and BO DONG and Frank Guerin and Chenghua Lin},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=cjbdRN8Yxy}\n}", "github": "", "project": "", "reviewers": "6QAt;eKVf;PgsA", "site": "https://openreview.net/forum?id=cjbdRN8Yxy", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;3;2", "reproducibility": "4;5;4", "correctness": "4;4;2", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "yuchengli09/;;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Surrey", "aff_unique_dep": "", "aff_unique_url": "https://www.surrey.ac.uk", "aff_unique_abbr": "Surrey", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "ckKuQDW2RZ", "title": "Conic10K: A Challenging Math Problem Understanding and Reasoning Dataset", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Mathematical understanding and reasoning are crucial tasks for assessing the capabilities of artificial intelligence (AI). However, existing benchmarks either require just a few steps of reasoning, or only contain a small amount of data in one specific topic, making it hard to analyse AI's behaviour with reference to different problems within a specific topic in detail. In this work, we propose Conic10K, a challenging math problem dataset on conic sections in Chinese senior high school education. Our dataset contains various problems with different reasoning depths, while only the knowledge from conic sections is required. Since the dataset only involves a narrow range of knowledge, it is easy to separately analyse the knowledge a model possesses and the reasoning ability it has. For each problem, we provide a high-quality formal representation, the reasoning steps, and the final solution. Experiments show that existing large language models, including GPT-4, exhibit weak performance on complex reasoning. We hope that our findings could inspire more advanced techniques for precise natural language understanding and reasoning. Our dataset and codes are available at https://github.com/whyNLP/Conic10K.", "keywords": "math problem;reasoning", "primary_area": "", "supplementary_material": "", "author": "Haoyi Wu;Wenyang Hui;Yezeng Chen;Weiqi Wu;Kewei Tu;Yi Zhou", "authorids": "~Haoyi_Wu1;~Wenyang_Hui1;~Yezeng_Chen3;~Weiqi_Wu1;~Kewei_Tu1;~Yi_Zhou23", "gender": ";M;M;F;M;", "homepage": "https://github.com/whyNLP;;https://github.com/cyzhh;https://vickywu1022.github.io;https://faculty.sist.shanghaitech.edu.cn/faculty/tukw/;https://eeis.ustc.edu.cn/_t780/2022/0607/c2648a557568/page.htm", "dblp": "158/6931;;;235/2808;22/918;", "google_scholar": "6uxDTuAAAAAJ;;https://scholar.google.com/citations?view_op=list_works;2MhAt7EAAAAJ;5gi3Pm0AAAAJ;", "or_profile": "~Haoyi_Wu1;~Wenyang_Hui1;~Yezeng_Chen3;~Weiqi_Wu1;~Kewei_Tu1;~Yi_Zhou23", "aff": "ShanghaiTech University;ShanghaiTech University;ShanghaiTech University;ShanghaiTech University;ShanghaiTech University;University of Science and Technology of China", "aff_domain": "shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;ustc.edu.cn", "position": "PhD student;MS student;MS student;Undergrad student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nwu2023conick,\ntitle={Conic10K: A Challenging Math Problem Understanding and Reasoning Dataset},\nauthor={Haoyi Wu and Wenyang Hui and Yezeng Chen and Weiqi Wu and Kewei Tu and Yi Zhou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ckKuQDW2RZ}\n}", "github": "", "project": "", "reviewers": "4LaT;As6G;WDny", "site": "https://openreview.net/forum?id=ckKuQDW2RZ", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;3;4", "reproducibility": "4;4;2", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0009-0004-3532-968X;0009-0009-1051-448X;;", "linkedin": ";;;;;", "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "ShanghaiTech University;University of Science and Technology of China", "aff_unique_dep": ";", "aff_unique_url": "https://www.shanghaitech.edu.cn;http://www.ustc.edu.cn", "aff_unique_abbr": "ShanghaiTech;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "clTPP37Rpu", "title": "Beyond Factuality: A Comprehensive Evaluation of Large Language Models as Knowledge Generators", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) outperform information retrieval techniques for downstream knowledge-intensive tasks when being prompted to generate world knowledge. However, community concerns abound regarding the factuality and potential implications of using this uncensored knowledge. In light of this, we introduce CONNER, a COmpreheNsive kNowledge Evaluation fRamework, designed to systematically and automatically evaluate generated knowledge from six important perspectives -- Factuality, Relevance, Coherence, Informativeness, Helpfulness and Validity. We conduct an extensive empirical analysis of the generated knowledge from three different types of LLMs on two widely studied knowledge-intensive tasks, i.e., open-domain question answering and knowledge-grounded dialogue. Surprisingly, our study reveals that the factuality of generated knowledge, even if lower, does not significantly hinder downstream tasks. Instead, the relevance and coherence of the outputs are more important than small factual mistakes. Further, we show how to use CONNER to improve knowledge-intensive tasks by designing two strategies: Prompt Engineering and Knowledge Selection. Our evaluation code and LLM-generated knowledge with human annotations will be released to facilitate future research.", "keywords": "Evaluation framework;Knowledge generation;Large language model;", "primary_area": "", "supplementary_material": "", "author": "Liang CHEN;Yang Deng;Yatao Bian;Zeyu Qin;Bingzhe Wu;Tat-Seng Chua;Kam-Fai Wong", "authorids": "~Liang_CHEN15;~Yang_Deng4;~Yatao_Bian1;~Zeyu_Qin1;~Bingzhe_Wu1;~Tat-Seng_Chua2;~Kam-Fai_Wong2", "gender": "M;M;M;M;M;M;M", "homepage": "https://chanliang.github.io/;https://dengyang17.github.io/;https://alan-qin.github.io/;;http://www.se.cuhk.edu.hk/~kfwong;http://www.comp.nus.edu.sg/~chuats/;https://yataobian.com", "dblp": ";115/6282-2;271/5778;207/4843;w/KamFaiWong;;222/2694", "google_scholar": "0iatxnIAAAAJ;https://scholar.google.com.hk/citations?user=OshWT3UAAAAJ;3LXI4-MAAAAJ;_3hgtf8AAAAJ;;https://scholar.google.com.tw/citations?user=Z9DWCBEAAAAJ;oZBTlBkAAAAJ", "or_profile": "~Liang_CHEN15;~Yang_Deng4;~Zeyu_Qin1;~Bingzhe_Wu1;~Kam-Fai_Wong2;~Tat-seng_Chua1;~An_Bian1", "aff": "Chinese University of Hong Kong, The Chinese University of Hong Kong;The Chinese University of Hong Kong;Hong Kong University of Science and Technology;Tencent AI Lab;The Chinese University of Hong Kong;National University of Singapore;Tencent AI Lab", "aff_domain": "se.cuhk.edu.hk;cuhk.edu.hk;ust.hk;tencent.com;cuhk.edu.hk;nus.edu.sg;tencent.com", "position": "PhD student;PhD student;PhD student;Researcher;Full Professor;Full Professor;Senior researcher ", "bibtex": "@inproceedings{\nchen2023beyond,\ntitle={Beyond Factuality: A Comprehensive Evaluation of Large Language Models as Knowledge Generators},\nauthor={Liang CHEN and Yang Deng and Yatao Bian and Zeyu Qin and Bingzhe Wu and Tat-Seng Chua and Kam-Fai Wong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=clTPP37Rpu}\n}", "github": "", "project": "", "reviewers": "QUWs;2CNk;rAA7", "site": "https://openreview.net/forum?id=clTPP37Rpu", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;4;3", "reproducibility": "4;4;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-1733-7892;;0000-0002-9427-5659;0000-0001-6097-7807;0000-0002-2368-4084", "linkedin": ";;zeyu-qin-546398179/;;;;", "aff_unique_index": "0;0;1;2;0;3;2", "aff_unique_norm": "Chinese University of Hong Kong;Hong Kong University of Science and Technology;Tencent;National University of Singapore", "aff_unique_dep": ";;Tencent AI Lab;", "aff_unique_url": "https://www.cuhk.edu.hk;https://www.ust.hk;https://ai.tencent.com;https://www.nus.edu.sg", "aff_unique_abbr": "CUHK;HKUST;Tencent AI Lab;NUS", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;1;0", "aff_country_unique": "China;Singapore" }, { "id": "clxLDVanxO", "title": "ReTAG: Reasoning Aware Table to Analytic Text Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The task of table summarization involves generating text that both succinctly and accurately represents the table or a specific set of highlighted cells within a table. While significant progress has been made in table to text generation techniques, models still mostly generate descriptive summaries, which reiterates the information contained within the table in sentences. Through analysis of popular table to text benchmarks (ToTTo (Parikh et al., 2020 and InfoTabs (Gupta et al., 2020) we observe that in order to generate the ideal summary, multiple types of reasoning is needed coupled with access to knowledge beyond the scope of the table. To address this gap, we propose ReTAG, a table and reasoning aware model that uses vector-quantization to infuse different types of analytical reasoning into the output. ReTAG achieves 2.2%, 2.9% improvement on the PARENT metric in the relevant slice of ToTTo and InfoTabs for the table to text generation task over state of the art baselines. Through human evaluation, we observe that output from ReTAG is upto 12% more faithful and analytical compared to a strong table-aware model. To the best of our knowledge, ReTAG is the first model that can controllably use multiple reasoning methods within a structure-aware sequence to sequence model to surpass state of the art performance in multiple table to text tasks. We extend (and open source 35.6K analytical, 55.9k descriptive instances) the ToTTo, InfoTabs datasets with the reasoning categories used in each reference sentences.", "keywords": "Table to Text Generation;Table Understanding;Structured Reasoning", "primary_area": "", "supplementary_material": "", "author": "Deepanway Ghosal;Preksha Nema;Aravindan Raghuveer", "authorids": "~Deepanway_Ghosal1;~Preksha_Nema1;~Aravindan_Raghuveer1", "gender": ";F;M", "homepage": ";;", "dblp": "203/9407;;20/1664", "google_scholar": "https://scholar.google.co.in/citations?user=95YiIWUAAAAJ;https://scholar.google.co.in/citations?user=hmoy8ssAAAAJ;", "or_profile": "~Deepanway_Ghosal1;~Preksha_Nema1;~Aravindan_Raghuveer1", "aff": "Singapore University of Technology and Design;Google;Google", "aff_domain": "sutd.edu.sg;google.com;google.com", "position": "PhD student;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nghosal2023retag,\ntitle={Re{TAG}: Reasoning Aware Table to Analytic Text Generation},\nauthor={Deepanway Ghosal and Preksha Nema and Aravindan Raghuveer},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=clxLDVanxO}\n}", "github": "", "project": "", "reviewers": "fnkP;5oq8;CsWy", "site": "https://openreview.net/forum?id=clxLDVanxO", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;4", "excitement": "3;3;4", "reproducibility": "3;3;4", "correctness": "4;4;4", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;1;1", "aff_unique_norm": "Singapore University of Technology and Design;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.sutd.edu.sg;https://www.google.com", "aff_unique_abbr": "SUTD;Google", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Singapore;United States" }, { "id": "cmQj1FdsOJ", "title": "Evaluating the Knowledge Base Completion Potential of GPT", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Structured knowledge bases (KBs) are an asset for search engines and other applications but are inevitably incomplete. \nLanguage models (LMs) have been proposed for unsupervised knowledge base completion (KBC), yet, their ability to do this at scale and with high accuracy remains an open question. \nPrior experimental studies mostly fall short because they only evaluate on popular subjects, or sample already existing facts from KBs. \nIn this work, we perform a careful evaluation of GPT's \npotential to complete the largest public KB: Wikidata. \nWe find that, despite their \nsize and capabilities, \nmodels like GPT-3, ChatGPT and GPT-4 do\nnot achieve fully convincing results on this task. \nNonetheless, it provides solid improvements over earlier approaches with smaller LMs. \nIn particular, we show that it is feasible \nto extend Wikidata by 27M facts at 90% precision.", "keywords": "knowledge base completion;knowledge graphs;probing language models;evaluation", "primary_area": "", "supplementary_material": "", "author": "Blerta Veseli;Simon Razniewski;Jan-Christoph Kalo;Gerhard Weikum", "authorids": "~Blerta_Veseli1;~Simon_Razniewski1;~Jan-Christoph_Kalo1;~Gerhard_Weikum1", "gender": "F;;M;M", "homepage": "https://www.mpi-sws.org/people/bveseli/;http://simonrazniewski.com;;http://www.mpi-inf.mpg.de/~weikum/", "dblp": ";48/10142;146/2827;w/GerhardWeikum", "google_scholar": ";;https://scholar.google.de/citations?user=2EE-YUsAAAAJ;https://scholar.google.com.tw/citations?user=vNAD0mAAAAAJ", "or_profile": "~Blerta_Veseli1;~Simon_Razniewski1;~Jan-Christoph_Kalo1;~Gerhard_Weikum1", "aff": "Saarland Informatics Campus, Max-Planck Institute;Bosch Center for AI;VU Amsterdam;Max Planck Institute", "aff_domain": "mpi-inf.mpg.de;bosch.com;vu.nl;", "position": "MS student;Researcher;Postdoc;Full Professor", "bibtex": "@inproceedings{\nveseli2023evaluating,\ntitle={Evaluating the Knowledge Base Completion Potential of {GPT}},\nauthor={Blerta Veseli and Simon Razniewski and Jan-Christoph Kalo and Gerhard Weikum},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=cmQj1FdsOJ}\n}", "github": "", "project": "", "reviewers": "Fktf;5ndf;QRC9", "site": "https://openreview.net/forum?id=cmQj1FdsOJ", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;5;4", "excitement": "3;4;4", "reproducibility": "2;4;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Max-Planck Institute;Bosch Center for AI;Vrije Universiteit Amsterdam;Max Planck Institute", "aff_unique_dep": "Informatics;Center for AI;;", "aff_unique_url": "https://www.mpi-sws.org;https://www.bosch-ai.com;https://www.vu.nl;https://www.mpiwg-berlin.mpg.de", "aff_unique_abbr": "MPI-SWS;BCAI;VU;MPI", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Saarland;;Amsterdam", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "Germany;Netherlands" }, { "id": "cooAE3hYUC", "title": "BasahaCorpus: An Expanded Linguistic Resource for Readability Assessment in Central Philippine Languages", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Current research on automatic readability assessment (ARA) has focused on improving the performance of models in high-resource languages such as English. In this work, we introduce and release BasahaCorpus as part of an initiative aimed at expanding available corpora and baseline models for readability assessment in lower resource languages in the Philippines. We compiled a corpus of short fictional narratives written in Hiligaynon, Minasbate, Karay-a, and Rinconada\u2014languages belonging to the Central Philippine family tree subgroup\u2014to train ARA models using surface-level, syllable-pattern, and n-gram overlap features. We also propose a new hierarchical cross-lingual modeling approach that takes advantage of a language's placement in the family tree to increase the amount of available training data. Our study yields encouraging results that support previous work showcasing the efficacy of cross-lingual models in low-resource settings, as well as similarities in highly informative linguistic features for mutually intelligible languages.", "keywords": "readability assessment;corpus;linguistic resource;cross-lingual;Philippine languages;low-resource NLP", "primary_area": "", "supplementary_material": "", "author": "Joseph Marvin Imperial;Ekaterina Kochmar", "authorids": "~Joseph_Marvin_Imperial1;~Ekaterina_Kochmar2", "gender": "M;", "homepage": "https://www.josephimperial.com;https://ekochmar.github.io/about/", "dblp": "246/4647;140/3465.html", "google_scholar": "irs_5ekAAAAJ;https://scholar.google.co.uk/citations?user=e2HTYnkAAAAJ", "or_profile": "~Joseph_Marvin_Imperial1;~Ekaterina_Kochmar2", "aff": "University of Bath;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "bath.ac.uk;mbzuai.ac.ae", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nimperial2023basahacorpus,\ntitle={BasahaCorpus: An Expanded Linguistic Resource for Readability Assessment in Central Philippine Languages},\nauthor={Joseph Marvin Imperial and Ekaterina Kochmar},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=cooAE3hYUC}\n}", "github": "", "project": "", "reviewers": "PKp6;KDgi;v7Sy", "site": "https://openreview.net/forum?id=cooAE3hYUC", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;3;3", "reproducibility": "5;3;3", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-1073-6129;0000-0003-3328-1374", "linkedin": "joseph-marvin-imperial-9382b9a7/;ekaterina-kochmar-0a655b14/", "aff_unique_index": "0;1", "aff_unique_norm": "University of Bath;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.bath.ac.uk;https://mbzuai.ac.ae", "aff_unique_abbr": "Bath;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United Kingdom;United Arab Emirates" }, { "id": "crfQrbxWAK", "title": "Schema-adaptable Knowledge Graph Construction", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Conventional Knowledge Graph Construction (KGC) approaches typically follow the static information extraction paradigm with a closed set of pre-defined schema. As a result, such approaches fall short when applied to dynamic scenarios or domains, whereas a new type of knowledge emerges. This necessitates a system that can handle evolving schema automatically to extract information for KGC. To address this need, we propose a new task called schema-adaptable KGC, which aims to continually extract entity, relation, and event based on a dynamically changing schema graph without re-training. We first split and convert existing datasets based on three principles to build a benchmark, i.e., horizontal schema expansion, vertical schema expansion, and hybrid schema expansion; then investigate the schema-adaptable performance of several well-known approaches such as Text2Event, TANL, UIE and GPT-3.5. We further propose a simple yet effective baseline dubbed AdaKGC, which contains schema-enriched prefix instructor and schema-conditioned dynamic decoding to better handle evolving schema. Comprehensive experimental results illustrate that AdaKGC can outperform baselines but still have room for improvement. We hope the proposed work can deliver benefits to the community.", "keywords": "Knowledge Graph Construction;Information Extraction", "primary_area": "", "supplementary_material": "", "author": "Hongbin Ye;Honghao Gui;Xin Xu;Xi Chen;Huajun Chen;Ningyu Zhang", "authorids": "~Hongbin_Ye1;~Honghao_Gui1;~Xin_Xu8;~Xi_Chen21;~Huajun_Chen1;~Ningyu_Zhang1", "gender": "M;M;F;M;M;M", "homepage": ";;https://xxupiano.github.io/;;;https://person.zju.edu.cn/en/ningyu", "dblp": "274/3132.html;;66/3874-10;;94/5089;139/4181-1.html", "google_scholar": "IcpPEoQAAAAJ;ekxyQTYAAAAJ;KBdTqoEAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;xQDOPvsAAAAJ", "or_profile": "~Hongbin_Ye1;~Honghao_Gui1;~Xin_Xu8;~Xi_Chen21;~Huajun_Chen1;~Ningyu_Zhang1", "aff": "Zhejiang University of Technology;Zhejiang University;Zhejiang University;Tencent Content and Platform Group;Zhejiang University;Zhejiang University", "aff_domain": "zjut.edu.cn;cn.edu;zju.edu.cn;tencent.com;zju.edu.cn;zju.edu.cn", "position": "PhD student;MS student;MS student;Researcher;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nye2023schemaadaptable,\ntitle={Schema-adaptable Knowledge Graph Construction},\nauthor={Hongbin Ye and Honghao Gui and Xin Xu and Xi Chen and Huajun Chen and Ningyu Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=crfQrbxWAK}\n}", "github": "", "project": "", "reviewers": "n6r2;rg33;hsKx", "site": "https://openreview.net/forum?id=crfQrbxWAK", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;4;3", "excitement": "3;3;4", "reproducibility": "5;4;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-5238-0955;;;0000-0002-1970-0678", "linkedin": ";;xin-xu-800886176/;;;ningyuzhang/", "aff_unique_index": "0;1;1;2;1;1", "aff_unique_norm": "Zhejiang University of Technology;Zhejiang University;Tencent", "aff_unique_dep": ";;Content and Platform Group", "aff_unique_url": "https://www.zjut.edu.cn;https://www.zju.edu.cn;https://www.tencent.com", "aff_unique_abbr": "ZJUT;ZJU;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "csBtifBXKo", "title": "Adaptive End-to-End Metric Learning for Zero-Shot Cross-Domain Slot Filling", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recently slot filling has witnessed great development thanks to deep learning and the availability of large-scale annotated data. However, it poses a critical challenge to handle a novel domain whose samples are never seen during training. The recognition performance might be greatly degraded due to severe domain shifts. Most prior works deal with this problem in a two-pass pipeline manner based on metric learning. In practice, these dominant pipeline models may be limited in computational efficiency and generalization capacity because of non-parallel inference and context-free discrete label embeddings. To this end, we re-examine the typical metric-based methods, and propose a new adaptive end-to-end metric learning scheme for the challenging zero-shot slot filling. Considering simplicity, efficiency and generalizability, we present a cascade-style joint learning framework coupled with context-aware soft label representations and slot-level contrastive representation learning to mitigate the data and label shift problems effectively. Extensive experiments on public benchmarks demonstrate the superiority of the proposed approach over a series of competitive baselines.", "keywords": "end-to-end;zero-shot learning;metric learning;slot filling", "primary_area": "", "supplementary_material": "", "author": "Yuanjun Shi;Linzhi Wu;Minglai Shao", "authorids": "~Yuanjun_Shi1;~Linzhi_Wu1;~Minglai_Shao2", "gender": "M;M;M", "homepage": ";;", "dblp": ";;", "google_scholar": "IGWjhDgAAAAJ;es-q3Y8AAAAJ;https://scholar.google.com.sg/citations?user=2FDgNl0AAAAJ", "or_profile": "~Yuanjun_Shi1;~Linzhi_Wu1;~Minglai_Shao2", "aff": "Tianjin University;University of Electronic Science and Technology of China;Tianjin University", "aff_domain": "tju.edu.cn;uestc.edu.cn;tju.edu.cn", "position": "MS student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nshi2023adaptive,\ntitle={Adaptive End-to-End Metric Learning for Zero-Shot Cross-Domain Slot Filling},\nauthor={Yuanjun Shi and Linzhi Wu and Minglai Shao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=csBtifBXKo}\n}", "github": "", "project": "", "reviewers": "yN6g;MsTt;aDbx", "site": "https://openreview.net/forum?id=csBtifBXKo", "pdf_size": 0, "rating": "5;5;5", "confidence": "2;3;2", "excitement": "4;4;3", "reproducibility": "3;4;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 2.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-1496-1756;0000-0003-1830-9797", "linkedin": ";;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Tianjin University;University of Electronic Science and Technology of China", "aff_unique_dep": ";", "aff_unique_url": "http://www.tju.edu.cn;https://www.uestc.edu.cn", "aff_unique_abbr": "TJU;UESTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "cvRvFj3Pyv", "title": "Empathy Intent Drives Empathy Detection", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Empathy plays an important role in the human dialogue. Detecting the empathetic direction expressed by the user is necessary for empathetic dialogue systems because it is highly relevant to understanding the user's needs. Several studies have shown that empathy intent information improves the ability to response capacity of empathetic dialogue. However, the interaction between empathy detection and empathy intent recognition has not been explored. To this end, we invite 3 experts to manually annotate the healthy empathy detection datasets IEMPATHIZE and TwittEmp with 8 empathy intent labels, and perform joint training for the two tasks. Empirical study has shown that the introduction of empathy intent recognition task can improve the accuracy of empathy detection task, and we analyze possible reasons for this improvement. To make joint training of the two tasks more challenging, we propose a novel framework, Cascaded Label Signal Network, which uses the cascaded interactive attention module and the label signal enhancement module to capture feature exchange information between empathy and empathy intent representations. Experimental results show that our framework outperforms all baselines under both settings on the two datasets.", "keywords": "Empathy detection;Empathy intent recognition;Cascaded interactive attention;Label signal enhancement", "primary_area": "", "supplementary_material": "", "author": "Liting Jiang;Di Wu;Bohui Mao;Yanbing Li;Wushour Slamu", "authorids": "~Liting_Jiang1;~Di_Wu29;~Bohui_Mao1;~Yanbing_Li1;~Wushour_Slamu1", "gender": "F;M;M;M;M", "homepage": "https://scholar.google.com/citations?user=4Rj4GHMAAAAJ&hl=zh-CN;https://scholar.google.com/citations?user=cafr9soAAAAJ&hl=en;;https://ieeexplore.ieee.org/author/37089788033;https://ieeexplore.ieee.org/author/37397678900", "dblp": "279/2428;;362/8100;75/4343.html;129/9382", "google_scholar": "4Rj4GHMAAAAJ;cafr9soAAAAJ;;;", "or_profile": "~Liting_Jiang1;~Di_Wu29;~Bohui_Mao1;~Yanbing_Li1;~Wushour_Slamu1", "aff": "Xinjiang University;Xinjiang University;Xinjiang University;Xinjiang University;Xinjiang University", "aff_domain": "xju.edu.cn;xju.edu.cn;xju.edu.cn;xju.edu.cn;xju.edu.cn", "position": "PhD student;PhD student;MS student;Instructor;Full Professor", "bibtex": "@inproceedings{\njiang2023empathy,\ntitle={Empathy Intent Drives Empathy Detection},\nauthor={Liting Jiang and Di Wu and Bohui Mao and Yanbing Li and Wushour Slamu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=cvRvFj3Pyv}\n}", "github": "", "project": "", "reviewers": "v6LG;Xn3k;vC87;D1Wg", "site": "https://openreview.net/forum?id=cvRvFj3Pyv", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "5;4;3;3", "excitement": "4;4;3;4", "reproducibility": "4;3;3;3", "correctness": "4;3;3;4", "rating_avg": 3.0, "confidence_avg": 3.75, "excitement_avg": 3.75, "reproducibility_avg": 3.25, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-7086-921X;0000-0002-1914-1634;0000-0003-4780-966X;0000-0001-5368-6921;0009-0006-7944-1889", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Xinjiang University", "aff_unique_dep": "", "aff_unique_url": "http://www.xju.edu.cn", "aff_unique_abbr": "XJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "cw6v58yo6s", "title": "Baize: An Open-Source Chat Model with Parameter-Efficient Tuning on Self-Chat Data", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Chat models, such as ChatGPT, have shown impressive capabilities and have been rapidly adopted across numerous domains. However, these models are only accessible through a restricted API, creating barriers for new research and progress in the field. We propose a pipeline that can automatically generate a high-quality multi-turn chat corpus by leveraging ChatGPT to engage in a conversation with itself. Subsequently, we employ parameter-efficient tuning to enhance LLaMA, an open-source large language model. The resulting model, named Baize, demonstrates good performance in multi-turn dialogues with guardrails that minimize potential risks. Additionally, we propose a new technique called Self-Distill with Feedback, to further improve the performance of the Baize models with feedback from ChatGPT.", "keywords": "large language model;knowledge disillation;data generation;chatbot;chat model;text generation", "primary_area": "", "supplementary_material": "", "author": "Canwen Xu;Daya Guo;Nan Duan;Julian McAuley", "authorids": "~Canwen_Xu1;~Daya_Guo2;~Nan_Duan1;~Julian_McAuley1", "gender": ";M;M;M", "homepage": ";https://guoday.github.io/;https://nanduan.github.io/;http://cseweb.ucsd.edu/~jmcauley/", "dblp": ";225/5494.html;;29/3483", "google_scholar": ";gCG4cPYAAAAJ;Qaa6OxIAAAAJ;icbo4M0AAAAJ", "or_profile": "~Canwen_Xu1;~Daya_Guo2;~Nan_Duan1;~Julian_McAuley1", "aff": ";SUN YAT-SEN UNIVERSITY, Tsinghua University;Microsoft Research Asia;University of California, San Diego, University of California, San Diego", "aff_domain": ";sysu.edu.cn;microsoft.com;eng.ucsd.edu", "position": ";PhD student;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nxu2023baize,\ntitle={Baize: An Open-Source Chat Model with Parameter-Efficient Tuning on Self-Chat Data},\nauthor={Canwen Xu and Daya Guo and Nan Duan and Julian McAuley},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=cw6v58yo6s}\n}", "github": "", "project": "", "reviewers": "Guni;iPyR;ftdW", "site": "https://openreview.net/forum?id=cw6v58yo6s", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "2;3;4", "reproducibility": "4;3;5", "correctness": "2;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-0955-7588", "linkedin": ";;;", "aff_unique_index": "0;1;2", "aff_unique_norm": "Sun Yat-sen University;Microsoft;University of California, San Diego", "aff_unique_dep": ";Research;", "aff_unique_url": "http://www.sysu.edu.cn;https://www.microsoft.com/en-us/research/group/asia;https://www.ucsd.edu", "aff_unique_abbr": "SYSU;MSR Asia;UCSD", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Asia;San Diego", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;United States" }, { "id": "czxX6jjpVJ", "title": "Discovering Highly Influential Shortcut Reasoning: An Automated Template-Free Approach", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Shortcut reasoning is an irrational process of inference, which degrades the robustness of an NLP model.\nWhile a number of previous work has tackled the identification of shortcut reasoning, there are still two major limitations: (i) a method for quantifying the severity of the discovered shortcut reasoning is not provided; (ii) certain types of shortcut reasoning may be missed.\nTo address these issues, we propose a novel method for identifying shortcut reasoning.\nThe proposed method quantifies the severity of the shortcut reasoning by leveraging out-of-distribution data and does not make any assumptions about the type of tokens triggering the shortcut reasoning.\nOur experiments on Natural Language Inference and Sentiment Analysis demonstrate that our framework successfully discovers known and unknown shortcut reasoning in the previous work.", "keywords": "Shortcut Reasoning;Inference;Robustness", "primary_area": "", "supplementary_material": "", "author": "Daichi Haraguchi;Kiyoaki Shirai;Naoya Inoue;Natthawut Kertkeidkachorn", "authorids": "~Daichi_Haraguchi2;~Kiyoaki_Shirai1;~Naoya_Inoue1;~Natthawut_Kertkeidkachorn1", "gender": "M;M;M;M", "homepage": "https://homoscribens.github.io/;;https://naoya-i.info/;", "dblp": ";35/6954;48/4618;124/9246", "google_scholar": ";;https://scholar.google.co.jp/citations?hl=en;", "or_profile": "~Daichi_Haraguchi2;~Kiyoaki_Shirai1;~Naoya_Inoue1;~Natthawut_Kertkeidkachorn1", "aff": "Japan Advanced Institute of Science and Technology;Japan Advanced Institute of Science and Technology;Japan Advanced Institute of Science and Technology;Japan Advanced Institute of Science and Technology, Tokyo Institute of Technology", "aff_domain": "jaist.ac.jp;jaist.ac.jp;jaist.ac.jp;jaist.ac.jp", "position": "MS student;Associate Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nharaguchi2023discovering,\ntitle={Discovering Highly Influential Shortcut Reasoning: An Automated Template-Free Approach},\nauthor={Daichi Haraguchi and Kiyoaki Shirai and Naoya Inoue and Natthawut Kertkeidkachorn},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=czxX6jjpVJ}\n}", "github": "", "project": "", "reviewers": "xMKL;X4Zv;iQTs", "site": "https://openreview.net/forum?id=czxX6jjpVJ", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;5", "excitement": "3;3;3", "reproducibility": "3;4;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "daichi-haraguchi-35bb461a4/;;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Japan Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.jaist.ac.jp", "aff_unique_abbr": "JAIST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Tokyo", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Japan" }, { "id": "d00kbjbYv2", "title": "How to Train Your Dragon: Diverse Augmentation Towards Generalizable Dense Retrieval", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Various techniques have been developed in recent years to improve dense retrieval (DR), such as unsupervised contrastive learning and pseudo-query generation. \nExisting DRs, however, often suffer from effectiveness tradeoffs between supervised and zero-shot retrieval, which some argue was due to the limited model capacity.\nWe contradict this hypothesis and show that a generalizable DR can be trained to achieve high accuracy in both supervised and zero-shot retrieval without increasing model size.\nIn particular, we systematically examine the contrastive learning of DRs, under the framework of Data Augmentation (DA).\nOur study shows that common DA practices such as query augmentation with generative models and pseudo-relevance label creation using a cross-encoder, are often inefficient and sub-optimal.\nWe hence propose a new DA approach with diverse queries and sources of supervision to progressively train a generalizable DR.\nAs a result, DRAGON, our Dense Retriever trained with diverse AuGmentatiON, is the first BERT-base-sized DR to achieve state-of-the-art effectiveness in both supervised and zero-shot evaluations and even competes with models using more complex late interaction.", "keywords": "Generalizable Dense Retrieval;Data Augmentation;Progressive Training", "primary_area": "", "supplementary_material": "", "author": "Sheng-Chieh Lin;Akari Asai;Minghan Li;Barlas Oguz;Jimmy Lin;Yashar Mehdad;Wen-tau Yih;Xilun Chen", "authorids": "~Sheng-Chieh_Lin1;~Akari_Asai2;~Minghan_Li4;~Barlas_Oguz1;~Jimmy_Lin2;~Yashar_Mehdad2;~Wen-tau_Yih1;~Xilun_Chen1", "gender": "M;F;;;;;M;", "homepage": "https://jacklin64.github.io/about_me/;https://akariasai.github.io/;https://alexlimh.github.io/;;https://cs.uwaterloo.ca/~jimmylin/;;http://scottyih.org;https://xilunchen.com", "dblp": "61/10361;;;https://dblp.org/pers/hd/o/Oguz:Barlas;00/7739;;07/7129;96/10207-2.html", "google_scholar": "https://scholar.google.com/citations?hl=en;gqB4u_wAAAAJ;;iPmTQZMAAAAJ;;;8rDNIMsAAAAJ;eUk_hy8AAAAJ", "or_profile": "~Sheng-Chieh_Lin1;~Akari_Asai2;~Minghan_Li4;~Barlas_Oguz1;~Jimmy_Lin2;~Yashar_Mehdad2;~Wen-tau_Yih1;~Xilun_Chen1", "aff": "Meta Platforms, Inc.;Paul G. Allen School of Computer Science & Engineering, University of Washington;University of Waterloo;Meta;University of Waterloo;;Meta Platforms, Inc.;Meta FAIR", "aff_domain": "meta.com;cs.washington.edu;uwaterloo.ca;meta.com;waterloo.ca;;meta.com;meta.com", "position": "Intern;PhD student;PhD student;Research Scientist;Full Professor;;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nlin2023how,\ntitle={How to Train Your Dragon: Diverse Augmentation Towards Generalizable Dense Retrieval},\nauthor={Sheng-Chieh Lin and Akari Asai and Minghan Li and Barlas Oguz and Jimmy Lin and Yashar Mehdad and Wen-tau Yih and Xilun Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=d00kbjbYv2}\n}", "github": "", "project": "", "reviewers": "a2hW;1UUD;CLak;VbrU", "site": "https://openreview.net/forum?id=d00kbjbYv2", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "2;4;3;4", "excitement": "3;4;4;4", "reproducibility": "2;4;4;4", "correctness": "2;4;3;3", "rating_avg": 4.0, "confidence_avg": 3.25, "excitement_avg": 3.75, "reproducibility_avg": 3.5, "correctness_avg": 3.0, "replies_avg": 13, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-7989-9703;;0009-0007-8972-7714;;;;0000-0003-4263-395X;", "linkedin": "jack-lin-716a61127/;;;barlas-o%C4%9Fuz-25465050;;;scottyih/;", "aff_unique_index": "0;1;2;0;2;0;0", "aff_unique_norm": "Meta;University of Washington;University of Waterloo", "aff_unique_dep": "Meta Platforms, Inc.;Paul G. Allen School of Computer Science & Engineering;", "aff_unique_url": "https://www.meta.com;https://www.washington.edu;https://uwaterloo.ca", "aff_unique_abbr": "Meta;UW;UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;1;0;1;0;0", "aff_country_unique": "United States;Canada" }, { "id": "d0qmGnKfXa", "title": "From Relevance to Utility: Evidence Retrieval with Feedback for Fact Verification", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Retrieval-enhanced methods have become a primary approach in fact verification (FV); it requires reasoning over multiple retrieved pieces of evidence to verify the integrity of a claim. To retrieve evidence, existing work often employs off-the-shelf retrieval models whose design is based on the probability ranking principle. We argue that, rather than relevance, for FV we need to focus on the utility that a claim verifier derives from the retrieved evidence. We introduce the $\\textbf{feedback-based evidence retriever} (FER)$ that optimizes the evidence retrieval process by incorporating feedback from the claim verifier. As a feedback signal we use the divergence in utility between how effectively the verifier utilizes the retrieved evidence and the ground-truth evidence to produce the final claim label. Empirical studies demonstrate the superiority of FER over prevailing baselines.", "keywords": "Utility; Evidence Retrieval; Fact Verification", "primary_area": "", "supplementary_material": "", "author": "Hengran Zhang;Ruqing Zhang;Jiafeng Guo;Maarten de Rijke;Yixing Fan;Xueqi Cheng", "authorids": "~Hengran_Zhang1;~Ruqing_Zhang3;~Jiafeng_Guo1;~Maarten_de_Rijke1;~Yixing_Fan2;~Xueqi_Cheng1", "gender": "F;M;;M;M;F", "homepage": "https://daqingchong.github.io/;http://www.bigdatalab.ac.cn/gjf/;https://staff.fnwi.uva.nl/m.derijke/;https://faneshion.github.io/;https://people.ucas.ac.cn/~cxq?language=en;https://hengran.github.io/", "dblp": ";02/146;r/MdRijke;;44/912;339/3052", "google_scholar": "qwdqaO4AAAAJ;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com/citations?hl=en;w5kGcUsAAAAJ;hY8aLqAAAAAJ;https://scholar.google.com.hk/citations?user=ysrrJqsAAAAJ", "or_profile": "~Ruqing_Zhang3;~Jiafeng_Guo1;~Maarten_de_Rijke1;~Yixing_Fan2;~Xueqi_Cheng1;~Zhang_Heng_Ran1", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technolgy, Chinese Academy of Sciences;University of Amsterdam;, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;ict.ac.cn;uva.nl;ict.ac.cn;ict.ac.cn;ict.ac.cn", "position": "Associate Professor;Researcher;Full Professor;Associate Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nzhang2023from,\ntitle={From Relevance to Utility: Evidence Retrieval with Feedback for Fact Verification},\nauthor={Hengran Zhang and Ruqing Zhang and Jiafeng Guo and Maarten de Rijke and Yixing Fan and Xueqi Cheng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=d0qmGnKfXa}\n}", "github": "", "project": "", "reviewers": "MWUn;uHs1;tnjn", "site": "https://openreview.net/forum?id=d0qmGnKfXa", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;3;4", "excitement": "4;2;3", "reproducibility": "3;3;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-1086-0202;;;", "linkedin": ";;;;;", "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences;University of Amsterdam", "aff_unique_dep": "Institute of Computing Technology;", "aff_unique_url": "http://www.ict.ac.cn;https://www.uva.nl", "aff_unique_abbr": "CAS;UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "China;Netherlands" }, { "id": "d0zla3M3LI", "title": "Tree Prompting: Efficient Task Adaptation without Fine-Tuning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Prompting language models (LMs) is the main interface for applying them to new tasks. However, for smaller LMs, prompting provides low accuracy compared to gradient-based fine-tuning. Tree Prompting is an approach to prompting which builds a decision tree of prompts, linking multiple prompt-LM calls together to solve a task. At inference time, each call to the LM is determined by efficiently routing the outcome of the previous call using the tree. Experiments on classification datasets show that Tree Prompting improves accuracy over competing methods and is competitive with fine-tuning. We also show that variants of Tree Prompting allow inspection of a model's decision-making process.", "keywords": "Decision tree;large language model;chain prompting;prompt engineering", "primary_area": "", "supplementary_material": "", "author": "Chandan Singh;John Xavier Morris;Alexander M Rush;Jianfeng Gao;Yuntian Deng", "authorids": "~Chandan_Singh1;~John_Xavier_Morris1;~Alexander_M_Rush1;~Jianfeng_Gao1;~Yuntian_Deng2", "gender": "M;M;M;M;", "homepage": "https://csinva.io/;http://jxmo.io;http://rush.seas.harvard.edu/;https://www.microsoft.com/en-us/research/people/jfgao/;https://yuntiandeng.com", "dblp": "38/2317;263/9958.html;http://dblp.uni-trier.de/pers/hd/r/Rush:Alexander_M=;92/5339;166/1720", "google_scholar": "https://scholar.google.com/citations?hl=en;Utsbve4AAAAJ;LIjnUGgAAAAJ;https://scholar.google.com/citations?hl=en;tk0e5lYAAAAJ", "or_profile": "~Chandan_Singh1;~John_Xavier_Morris1;~Alexander_M_Rush1;~Jianfeng_Gao1;~Yuntian_Deng2", "aff": "Microsoft Research;Cornell University;School of Engineering and Applied Sciences, Harvard University;Microsoft Research;Harvard University", "aff_domain": "microsoft.com;cornell.edu;seas.harvard.edu;microsoft.com;harvard.edu", "position": "Researcher;PhD student;Assistant Professor;Principal Researcher;PhD student", "bibtex": "@inproceedings{\nsingh2023tree,\ntitle={Tree Prompting: Efficient Task Adaptation without Fine-Tuning},\nauthor={Chandan Singh and John Xavier Morris and Alexander M Rush and Jianfeng Gao and Yuntian Deng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=d0zla3M3LI}\n}", "github": "", "project": "", "reviewers": "VnSE;8DXL;kxdC", "site": "https://openreview.net/forum?id=d0zla3M3LI", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "4;4;4", "reproducibility": "4;4;3", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-0318-2340;;0000-0002-9900-1606;;", "linkedin": "csinva/;;sasha-rush-a69b6917/;;", "aff_unique_index": "0;1;2;0;2", "aff_unique_norm": "Microsoft;Cornell University;Harvard University", "aff_unique_dep": "Microsoft Research;;School of Engineering and Applied Sciences", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.cornell.edu;https://www.harvard.edu", "aff_unique_abbr": "MSR;Cornell;Harvard", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "d94iPelgSD", "title": "Intra-Event and Inter-Event Dependency-Aware Graph Network for Event Argument Extraction", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Event argument extraction is critical to various natural language processing tasks for providing structured information. Existing works usually extract the event arguments one by one, and mostly neglect to build dependency information among event argument roles, especially from the perspective of event structure. Such an approach hinders the model from learning the interactions between different roles. In this paper, we raise our research question: How to adequately model dependencies between different roles for better performance? To this end, we propose an intra-event and inter-event dependency-aware graph network, which uses the event structure as the fundamental unit to construct dependencies between roles. Specifically, we first utilize the dense intra-event graph to construct role dependencies within events, and then construct dependencies between events by retrieving similar events of the current event through the retrieval module. To further optimize dependency information and event representation, we propose a dependency interaction module and two auxiliary tasks to improve the extraction ability of the model in different scenarios. Experimental results on the ACE05, RAMS, and WikiEvents datasets show the great advantages of our proposed approach.", "keywords": "event argument extraction;intra-event dependency;inter-event dependency;dependency-aware graph network", "primary_area": "", "supplementary_material": "", "author": "Hao Li;Yanan Cao;Yubing Ren;Fang Fang;Lanxue Zhang;Yingjie Li;Shi Wang", "authorids": "~Hao_Li45;~Yanan_Cao1;~Yubing_Ren1;~Fang_Fang6;~Lanxue_Zhang1;~Yingjie_Li3;~Shi_Wang2", "gender": "M;F;;;F;M;", "homepage": "https://github.com/hlee-top;;https://lilice-r.github.io/;;;https://github.com/lyj963;https://ictkc.github.io", "dblp": ";97/5152-1;331/1171;;;;", "google_scholar": "gFxTgcMAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com.sg/citations?user=y6qJ0D8AAAAJ;;", "or_profile": "~Hao_Li45;~Yanan_Cao1;~Yubing_Ren1;~Fang_Fang6;~Lanxue_Zhang1;~Yingjie_Li3;~Shi_Wang2", "aff": ";Institute of Information Engineering, Chinese Academy of Sciences;Institute of Information Engineering, Chinese Academy of Sciences;;Institute of Information Engineering, Chinese Academy of Sciences;Institute of Information Engineering, Chinese Academy of Sciences;Chinese Academy of Sciences", "aff_domain": ";iie.ac.cn;iie.ac.cn;;iie.ac.cn;iie.ac.cn;ict.ac.cn", "position": ";Full Professor;PhD student;;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nli2023intraevent,\ntitle={Intra-Event and Inter-Event Dependency-Aware Graph Network for Event Argument Extraction},\nauthor={Hao Li and Yanan Cao and Yubing Ren and Fang Fang and Lanxue Zhang and Yingjie Li and Shi Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=d94iPelgSD}\n}", "github": "", "project": "", "reviewers": "C7eU;Y77w;N1pm", "site": "https://openreview.net/forum?id=d94iPelgSD", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;5", "excitement": "3;3;4", "reproducibility": "2;4;4", "correctness": "2;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-3534-1094;0000-0002-0815-3998;;;;0000-0002-1329-2415", "linkedin": ";;;;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Information Engineering", "aff_unique_url": "http://www.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "dABNxI5c1X", "title": "Modeling Empathic Similarity in Personal Narratives", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The most meaningful connections between people are often fostered through expression of shared vulnerability and emotional experiences in personal narratives. We introduce a new task of identifying similarity in personal stories based on empathic resonance, i.e., the extent to which two people empathize with each others' experiences, as opposed to raw semantic or lexical similarity, as has predominantly been studied in NLP. Using insights from social psychology, we craft a framework that operationalizes empathic similarity in terms of three key features of stories: main events, emotional trajectories, and overall morals or takeaways. We create EmpathicStories, a dataset of 1,500 personal stories annotated with our empathic similarity features, and 2,000 pairs of stories annotated with empathic similarity scores. Using our dataset, we fine-tune a model to compute empathic similarity of story pairs, and show that this outperforms semantic similarity models on automated correlation and retrieval metrics. Through a user study with 150 participants, we also assess the effect our model has on retrieving stories that users empathize with, compared to naive semantic similarity-based retrieval, and find that participants empathized significantly more with stories retrieved by our model. Our work has strong implications for the use of empathy-aware models to foster human connection and empathy between people.", "keywords": "empathy;semantic similarity;personal narratives", "primary_area": "", "supplementary_material": "", "author": "Jocelyn J Shen;Maarten Sap;Pedro Colon-Hernandez;Hae Won Park;Cynthia Breazeal", "authorids": "~Jocelyn_J_Shen1;~Maarten_Sap1;~Pedro_Colon-Hernandez1;~Hae_Won_Park1;~Cynthia_Breazeal1", "gender": "F;M;M;F;F", "homepage": "https://jocelynshen.com/;http://maartensap.com;https://www.media.mit.edu/people/pe25171/overview/;;https://www.media.mit.edu/people/cynthiab/overview/", "dblp": "294/2293;153/9519;191/0560;307/5014-1;65/5630.html", "google_scholar": "03QyipEAAAAJ;gFN4QUYAAAAJ;91YQpegAAAAJ;kJoNMc8AAAAJ;qb3jyP4AAAAJ", "or_profile": "~Jocelyn_J_Shen1;~Maarten_Sap1;~Pedro_Colon-Hernandez1;~Hae_Won_Park1;~Cynthia_Breazeal1", "aff": "Massachusetts Institute of Technology;Carnegie Mellon University;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;cmu.edu;mit.edu;mit.edu;mit.edu", "position": "MS student;Assistant Professor;PhD student;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nshen2023modeling,\ntitle={Modeling Empathic Similarity in Personal Narratives},\nauthor={Jocelyn J Shen and Maarten Sap and Pedro Colon-Hernandez and Hae Won Park and Cynthia Breazeal},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=dABNxI5c1X}\n}", "github": "", "project": "", "reviewers": "Pwhy;RNMN;rDq9;g6sS", "site": "https://openreview.net/forum?id=dABNxI5c1X", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "3;3;3;4", "excitement": "4;4;4;4", "reproducibility": "5;4;4;3", "correctness": "4;4;4;4", "rating_avg": 5.0, "confidence_avg": 3.25, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-7809-5474;;;0000-0001-9638-1722;0000-0002-0587-2065", "linkedin": "jocelyn-shen/;;;;cynthia-breazeal-1792317/", "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.cmu.edu", "aff_unique_abbr": "MIT;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "dFlGP1l65l", "title": "When it Rains, it Pours: Modeling Media Storms and the News Ecosystem", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Most events in the world receive at most brief coverage by the news media. Occasionally, however, an event will trigger a media storm, with voluminous and widespread coverage lasting for weeks instead of days. In this work, we develop and apply a pairwise article similarity model, allowing us to identify story clusters in corpora covering local and national online news, and thereby create a comprehensive corpus of media storms over a nearly two year period. Using this corpus, we investigate media storms at a new level of granularity, allowing us to validate claims about storm evolution and topical distribution, and provide empirical support for previously hypothesized patterns of influence of storms on media coverage and intermedia agenda setting.", "keywords": "political communication;news media;political science", "primary_area": "", "supplementary_material": "", "author": "Benjamin Roger Litterer;David Jurgens;Dallas Card", "authorids": "~Benjamin_Roger_Litterer1;~David_Jurgens1;~Dallas_Card1", "gender": "M;M;", "homepage": ";http://jurgens.people.si.umich.edu;https://dallascard.github.io", "dblp": ";48/4613.html;125/5045", "google_scholar": "kXhY6pkAAAAJ;https://scholar.google.com/citations?hl=en;qH-rJV8AAAAJ", "or_profile": "~Benjamin_Roger_Litterer1;~David_Jurgens1;~Dallas_Card1", "aff": ";University of Michigan - Ann Arbor;University of Michigan - Ann Arbor", "aff_domain": ";umich.edu;umich.edu", "position": ";Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nlitterer2023when,\ntitle={When it Rains, it Pours: Modeling Media Storms and the News Ecosystem},\nauthor={Benjamin Roger Litterer and David Jurgens and Dallas Card},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=dFlGP1l65l}\n}", "github": "", "project": "", "reviewers": "6B36;36nG;iVoD", "site": "https://openreview.net/forum?id=dFlGP1l65l", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;3;3", "reproducibility": "4;2;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-2135-9878;0000-0001-5573-8836", "linkedin": "ben-litterer-a53a24170/;;", "aff_unique_index": "0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Ann Arbor", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "dFvwxdSj0B", "title": "Retrieval-Augmented Parsing for Complex Graphs by Exploiting Structure and Uncertainty", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Retrieval augmentation enhances generative language models by retrieving informative exemplars relevant for output prediction.\nHowever, in realistic graph parsing problems where the output space is large and complex, classic retrieval methods based on input-sentence similarity can fail to identify the most informative exemplars that target graph elements the model is most struggling about, leading to suboptimal retrieval and compromised prediction under limited retrieval budget.\nIn this work, we improve retrieval-augmented parsing for complex graph problems by exploiting two unique sources of information (1) structural similarity and (2) model uncertainty.\nWe propose $\\textit{\\textbf{S}tructure-aware and \\textbf{U}ncertainty-\\textbf{G}uided \\textbf{A}daptive \\textbf{R}etrieval} \\textbf{(SUGAR)} $ that first quantify the model uncertainty in graph prediction and identify its most uncertain subgraphs, and then retrieve exemplars based on their structural similarity with the identified uncertain subgraphs.\nOn a suite of real-world parsing benchmarks with non-trivial graph structure (SMCalflow and E-commerce), SUGAR exhibits a strong advantage over its classic counterparts that do not leverage structure or model uncertainty.", "keywords": "Uncertainty Quantification;Retrieval;Semantic Parsing", "primary_area": "", "supplementary_material": "", "author": "Zi Lin;Quan Yuan;Panupong Pasupat;Jeremiah Zhe Liu;Jingbo Shang", "authorids": "~Zi_Lin1;~Quan_Yuan8;~Panupong_Pasupat1;~Jeremiah_Zhe_Liu1;~Jingbo_Shang2", "gender": "F;;M;M;M", "homepage": "https://zi-lin.com/;https://github.com/yq911122;https://ppasupat.github.io/;;https://shangjingbo1226.github.io/", "dblp": "81/2999;;124/9178;199/2301;151/3145.html", "google_scholar": "kgZYttUAAAAJ;;BqKXIA8AAAAJ;9jrmcG4AAAAJ;0SkFI4MAAAAJ", "or_profile": "~Zi_Lin1;~Quan_Yuan8;~Panupong_Pasupat1;~Jeremiah_Zhe_Liu1;~Jingbo_Shang2", "aff": "University of California, San Diego;Google;Google;Google DeepMind;University of California, San Diego", "aff_domain": "ucsd.edu;google.com;google.com;google.com;ucsd.edu", "position": "Graduate student;Researcher;Employee;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nlin2023retrievalaugmented,\ntitle={Retrieval-Augmented Parsing for Complex Graphs by Exploiting Structure and Uncertainty},\nauthor={Zi Lin and Quan Yuan and Panupong Pasupat and Jeremiah Zhe Liu and Jingbo Shang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=dFvwxdSj0B}\n}", "github": "", "project": "", "reviewers": "Gxdf;GY72;yeis", "site": "https://openreview.net/forum?id=dFvwxdSj0B", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "3;3;4", "reproducibility": "4;3;3", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "zi-lin/;;;;", "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "University of California, San Diego;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.ucsd.edu;https://www.google.com", "aff_unique_abbr": "UCSD;Google", "aff_campus_unique_index": "0;1;1;0", "aff_campus_unique": "San Diego;Mountain View;", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "dHHumVX2XV", "title": "Efficient Continue Training of Temporal Language Model with Structural Information", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Current language models are mainly trained on \nsnap-shots of data gathered at a particular time, which decreases their capability to generalize over time and model language change.\nTo model the \\textit{time} variable, existing works have explored temporal language models (e.g., TempoBERT) by directly incorporating the timestamp into the training process. While effective to some extent, these methods are limited by the superficial temporal information brought by timestamps, which fails to learn the inherent changes of linguistic components. In this paper, we empirically confirm that the performance of pre-trained language models (PLMs) is closely affiliated with syntactically changed tokens. Based on this observation, we propose a simple yet effective method named \\textit{\\textbf{S}yntax-\\textbf{G}uided \\textbf{T}emporal \\textbf{L}anguage \\textbf{M}odel}~(SG-TLM), which could learn the inherent language changes by capturing an intrinsic relationship between the \\textit{time} prefix and the tokens with salient syntactic change.\nExperiments on two datasets and three tasks demonstrate that our model outperforms existing temporal language models in both memorization and generalization capabilities.\nExtensive results further confirm the effectiveness of our approach across different model frameworks, including both encoder-only and decoder-only models (e.g., LLaMA).\nOur code is available at \\url{https://github.com/zhaochen0110/TempoLM}.", "keywords": "Temporal Generalization;Syntactic Change;Temporal Language Model;Pre-trained Language Model", "primary_area": "", "supplementary_material": "", "author": "Zhaochen Su;Juntao Li;Zikang Zhang;Zihan Zhou;Min Zhang", "authorids": "~Zhaochen_Su1;~Juntao_Li2;~Zikang_Zhang1;~Zihan_Zhou9;~Min_Zhang9", "gender": "M;M;M;M;M", "homepage": "https://zhaochen0110.github.io/;https://lijuntaopku.github.io/;;;https://zhangmin-nlp-ai.github.io/", "dblp": ";;;;83/5342-5", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;sZSygsYAAAAJ;;none;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Zhaochen_Su1;~Juntao_Li2;~Zikang_Zhang1;~Zihan_Zhou9;~Min_Zhang9", "aff": "Soochow University;Soochow University, China;Suzhou University;Peking University;Harbin Institute of Technology, Shenzhen", "aff_domain": "suda.edu.cn;suda.edu.cn;suda.edu.cn;pku.edu.cn;hit.edu.cn", "position": "MS student;Associate Professor;MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nsu2023efficient,\ntitle={Efficient Continue Training of Temporal Language Model with Structural Information},\nauthor={Zhaochen Su and Juntao Li and Zikang Zhang and Zihan Zhou and Min Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=dHHumVX2XV}\n}", "github": "", "project": "", "reviewers": "shTU;PDRu;uyuP", "site": "https://openreview.net/forum?id=dHHumVX2XV", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;4;3", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-6286-7529;0009-0001-6219-1469;;", "linkedin": "%E8%82%87%E8%BE%B0-%E8%8B%8F-4a103b295/;;;;", "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "Soochow University;Suzhou University;Peking University;Harbin Institute of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.soochow.edu.cn;https://www.suda.edu.cn;http://www.pku.edu.cn;http://en.hhit.edu.cn/", "aff_unique_abbr": "Soochow U;Suda;Peking U;HIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "dJ5yzTX4rZ", "title": "Solving Hard Analogy Questions with Relation Embedding Chains", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Modelling how concepts are related is a central topic in Lexical Semantics. A common strategy is to rely on knowledge graphs (KGs) such as ConceptNet, and to model the relation between two concepts as a set of paths. However, KGs are limited to a fixed set of relation types, and they are incomplete and often noisy. Another strategy is to distill relation embeddings from a fine-tuned language model. However, this is less suitable for words that are only indirectly related and it does not readily allow us to incorporate structured domain knowledge. In this paper, we aim to combine the best of both worlds. We model relations as paths but associate their edges with relation embeddings. The paths are obtained by first identifying suitable intermediate words and then selecting those words for which informative relation embeddings can be obtained. We empirically show that our proposed representations are useful for solving hard analogy questions.", "keywords": "relation embedding;analogy questions;ConceptNet;knowledge graphs", "primary_area": "", "supplementary_material": "", "author": "Nitesh Kumar;Steven Schockaert", "authorids": "~Nitesh_Kumar1;~Steven_Schockaert2", "gender": "M;M", "homepage": "https://sites.google.com/view/niteshroyal;https://www.cardiff.ac.uk/people/view/133772-schockaert-steven", "dblp": ";29/3972.html", "google_scholar": "nq4neTsAAAAJ;https://scholar.google.co.uk/citations?user=hNCN09AAAAAJ", "or_profile": "~Nitesh_Kumar1;~Steven_Schockaert1", "aff": "Cardiff University;Cardiff University", "aff_domain": "cardiff.ac.uk;cardiff.ac.uk", "position": "Postdoc;Full Professor", "bibtex": "@inproceedings{\nkumar2023solving,\ntitle={Solving Hard Analogy Questions with Relation Embedding Chains},\nauthor={Nitesh Kumar and Steven Schockaert},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=dJ5yzTX4rZ}\n}", "github": "", "project": "", "reviewers": "S1H7;yTYV;zL7T", "site": "https://openreview.net/forum?id=dJ5yzTX4rZ", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;2", "excitement": "3;4;3", "reproducibility": "4;5;3", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-9301-3876;", "linkedin": "nitesh-kumar-5264a115/;", "aff_unique_index": "0;0", "aff_unique_norm": "Cardiff University", "aff_unique_dep": "", "aff_unique_url": "https://www.cardiff.ac.uk", "aff_unique_abbr": "Cardiff", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "dQxLtay1M3", "title": "HyperNetwork-based Decoupling to Improve Model Generalization for Few-Shot Relation Extraction", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Few-shot relation extraction (FSRE) aims to train a model that can deal with new relations using only a few labeled examples. Most existing studies employ Prototypical Networks for FSRE, which usually overfits the relation classes in the training set and cannot generalize well to unseen relations. By investigating the class separation of an FSRE model, we find that model upper layers are prone to learn relation-specific knowledge. Therefore, in this paper, we propose a HyperNetwork-based Decoupling approach to improve the generalization of FSRE models. Specifically, our model consists of an encoder, a network generator (for producing relation classifiers) and the produced-then-finetuned classifiers for every N-way-K-shot episode. Meanwhile, we design a two-step training framework along with a class-agnostic aligner, in which the generated classifiers focus on acquiring relation-specific knowledge and the encoder is encouraged to learn more general relation knowledge. In this way, the roles of upper and lower layers in an FSRE model are explicitly decoupled, thus enhancing its generalizing capability during testing. Experiments on two public datasets demonstrate the effectiveness of our method.", "keywords": "Relation Extraction;Few-Shot Relation Extraction", "primary_area": "", "supplementary_material": "", "author": "Liang Zhang;Chulun Zhou;Fandong Meng;Jinsong Su;Yidong Chen;Jie Zhou", "authorids": "~Liang_Zhang9;~Chulun_Zhou1;~Fandong_Meng3;~Jinsong_Su1;~Yidong_Chen2;~Jie_Zhou8", "gender": "M;M;M;M;M;M", "homepage": ";;http://fandongmeng.github.io/;https://cdmc.xmu.edu.cn/info/1010/1054.htm;http://nlp.xmu.edu.cn/teachers/ydchen/index_en.html;", "dblp": ";246/2903;117/4056.html;05/9013;11/1492;00/5012-16", "google_scholar": "MSCCJiMAAAAJ;c3IPW_8AAAAJ;sA8U4S0AAAAJ;;;https://scholar.google.com.hk/citations?user=OijxQCMAAAAJ", "or_profile": "~Liang_Zhang9;~Chulun_Zhou1;~Fandong_Meng3;~Jinsong_Su1;~Yidong_Chen2;~Jie_Zhou8", "aff": "XMU;Wechat AI;WeChat AI, Tencent Inc.;Xiamen University;Xiamen University;WeChat AI, Tencent Inc.", "aff_domain": "xmu.edu;tencent.com;tencent.com;xmu.edu.cn;xmu.edu.cn;tencent.com", "position": "PhD student;Researcher;Principal Researcher;Researcher;Associate Professor;Principal Researcher", "bibtex": "@inproceedings{\nzhang2023hypernetworkbased,\ntitle={HyperNetwork-based Decoupling to Improve Model Generalization for Few-Shot Relation Extraction},\nauthor={Liang Zhang and Chulun Zhou and Fandong Meng and Jinsong Su and Yidong Chen and Jie Zhou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=dQxLtay1M3}\n}", "github": "", "project": "", "reviewers": "ePtg;bDys;TfK4", "site": "https://openreview.net/forum?id=dQxLtay1M3", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "4;4;3", "reproducibility": "5;4;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-5518-923X;;0000-0002-8158-2377;;;0000-0002-5899-5165", "linkedin": ";;;;;", "aff_unique_index": "0;1;2;0;0;2", "aff_unique_norm": "Xiamen University;WeChat;Tencent", "aff_unique_dep": ";WeChat AI;WeChat AI", "aff_unique_url": "https://www.xmu.edu.cn;https://www.wechat.com;https://www.tencent.com", "aff_unique_abbr": "XMU;WeChat AI;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "dRlYuG3bj7", "title": "SentiStream: A Co-Training Framework for Adaptive Online Sentiment Analysis in Evolving Data Streams", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Online sentiment analysis has emerged as a crucial component in numerous data-driven applications, including social media monitoring, customer feedback analysis, and online reputation management. Despite their importance, current methodologies falter in effectively managing the continuously evolving nature of data streams, largely due to their reliance on substantial, pre-existing labelled datasets. This paper presents $\\textbf{sentistream}$, a novel co-training framework specifically designed for efficient sentiment analysis within dynamic data streams. Comprising unsupervised, semi-supervised, and stream merge modules, $\\textbf{ sentistream}$ guarantees constant adaptability to evolving data landscapes. This research delves into the continuous adaptation of language models for online sentiment analysis, focusing on real-world applications. Experimental evaluations using data streams derived from three benchmark sentiment analysis datasets confirm that our proposed methodology surpasses existing approaches in terms of both accuracy and computational efficiency.", "keywords": "Online Sentiment analysis;Streaming learning", "primary_area": "", "supplementary_material": "", "author": "Yuhao Wu;Karthick Sharma;Chun Wei Seah;Shuhao Zhang", "authorids": "~Yuhao_Wu5;~Karthick_Sharma1;~Chun_Wei_Seah2;~Shuhao_Zhang4", "gender": "M;M;;M", "homepage": ";https://karthick47v2.github.io;;https://shuhaozhangtony.github.io/", "dblp": ";;;135/4657", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com.sg/citations?user=nRPnPEAAAAAJ;Xwn7lCEAAAAJ", "or_profile": "~Yuhao_Wu5;~Karthick_Sharma1;~Chun_Wei_Seah2;~Shuhao_Zhang4", "aff": "Singapore University of Technology and Design;Singapore University of Technology and Design;DSO;Singapore University of Technology and Design", "aff_domain": "sutd.edu.sg;sutd.edu.sg;dso.org.sg;sutd.edu.sg", "position": "PhD student;Intern;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nwu2023sentistream,\ntitle={SentiStream: A Co-Training Framework for Adaptive Online Sentiment Analysis in Evolving Data Streams},\nauthor={Yuhao Wu and Karthick Sharma and Chun Wei Seah and Shuhao Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=dRlYuG3bj7}\n}", "github": "", "project": "", "reviewers": "55iG;o7C8;11Lv", "site": "https://openreview.net/forum?id=dRlYuG3bj7", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "2;4;4", "reproducibility": "2;4;4", "correctness": "1;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 2.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-9927-6925", "linkedin": ";Karthick47/;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Singapore University of Technology and Design;DSO", "aff_unique_dep": ";", "aff_unique_url": "https://www.sutd.edu.sg;", "aff_unique_abbr": "SUTD;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore;" }, { "id": "dVOXsyVcik", "title": "Dynamic Top-k Estimation Consolidates Disagreement between Feature Attribution Methods", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Feature attribution scores are used for explaining the prediction of a text classifier to users by highlighting a k number of tokens. In this work, we propose a way to determine the number of optimal k tokens that should be displayed from sequential properties of the attribution scores. Our approach is dynamic across sentences, method-agnostic, and deals with sentence length bias. We compare agreement between multiple methods and humans on an NLI task, using fixed k and dynamic k. We find that perturbation-based methods and Vanilla Gradient exhibit highest agreement on most method--method and method--human agreement metrics with a static k. Their advantage over other methods disappears with dynamic ks which mainly improve Integrated Gradient and GradientXInput. To our knowledge, this is the first evidence that sequential properties of attribution scores are informative for consolidating attribution signals for human interpretation.", "keywords": "interpretability;explainability;top-k;ranking;agreement;feature attribution", "primary_area": "", "supplementary_material": "", "author": "Jonathan Kamp;Lisa Beinborn;Antske Fokkens", "authorids": "~Jonathan_Kamp1;~Lisa_Beinborn1;~Antske_Fokkens1", "gender": ";F;F", "homepage": "https://jbkamp.github.io/;https://beinborn.eu/;", "dblp": "330/3211;https://dblp.uni-trier.de/pid/154/8216;41/9013", "google_scholar": "https://scholar.google.nl/citations?user=guVj14MAAAAJ;https://scholar.google.de/citations?user=Mh5y8L0AAAAJ;El5nmZUAAAAJ", "or_profile": "~Jonathan_Kamp1;~Lisa_Beinborn1;~Antske_Fokkens1", "aff": "Vrije Universiteit Amsterdam;Vrije Universiteit Amsterdam;VU University Amsterdam", "aff_domain": "vu.nl;vu.nl;vu.nl", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nkamp2023dynamic,\ntitle={Dynamic Top-k Estimation Consolidates Disagreement between Feature Attribution Methods},\nauthor={Jonathan Kamp and Lisa Beinborn and Antske Fokkens},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=dVOXsyVcik}\n}", "github": "", "project": "", "reviewers": "EnSP;Cz9e;HqLk;Mndj", "site": "https://openreview.net/forum?id=dVOXsyVcik", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "3;3;2;2", "excitement": "4;4;4;3", "reproducibility": "4;4;3;4", "correctness": "3;3;4;4", "rating_avg": 5.0, "confidence_avg": 2.5, "excitement_avg": 3.75, "reproducibility_avg": 3.75, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-0916-7459;;0000-0002-6628-6916", "linkedin": "jonathan-kamp/;;", "aff_unique_index": "0;0;1", "aff_unique_norm": "Vrije Universiteit Amsterdam;VU University Amsterdam", "aff_unique_dep": ";", "aff_unique_url": "https://www.vu.nl;https://www.vu.nl", "aff_unique_abbr": "VU Amsterdam;VU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Amsterdam", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Netherlands" }, { "id": "dVfeS1pp2e", "title": "Strong and Efficient Baselines for Open Domain Conversational Question Answering", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Unlike the Open Domain Question Answering (ODQA) setting, the conversational (ODConvQA) domain has received limited attention when it comes to reevaluating baselines for both efficiency and effectiveness. In this paper, we study the State-of-the-Art (SotA) Dense Passage Retrieval (DPR) retriever and Fusion-in-Decoder (FiD) reader pipeline, and show that it significantly underperforms when applied to ODConvQA tasks due to various limitations. We then propose and evaluate strong yet simple and efficient baselines, by introducing a fast reranking component between the retriever and the reader, and by performing targeted finetuning steps. Experiments on two ODConvQA tasks, namely TopiOCQA and OR-QuAC, show that our method improves the SotA results, while reducing reader's latency by 60%. Finally, we provide new and valuable insights into the development of challenging baselines that serve as a reference for future, more intricate approaches, including those that leverage Large Language Models (LLMs).", "keywords": "Open Domain Conversational Question Answering", "primary_area": "", "supplementary_material": "", "author": "Andrei Catalin Coman;Gianni Barlacchi;Adri\u00e0 de Gispert", "authorids": "~Andrei_Catalin_Coman1;~Gianni_Barlacchi1;~Adri\u00e0_de_Gispert1", "gender": "M;;M", "homepage": "https://www.idiap.ch/~acoman/;;", "dblp": "229/9083.html;127/1249;36/3696", "google_scholar": "LZLaA4cAAAAJ;https://scholar.google.it/citations?user=vh3CkJ0AAAAJ;https://scholar.google.com.ph/citations?user=zgrlytAAAAAJ", "or_profile": "~Andrei_Catalin_Coman1;~Gianni_Barlacchi1;~Adri\u00e0_de_Gispert1", "aff": "Idiap Research Institute;Amazon;Amazon", "aff_domain": "idiap.ch;amazon.com;amazon.com", "position": "PhD student;Researcher;Principal Scientist", "bibtex": "@inproceedings{\ncoman2023strong,\ntitle={Strong and Efficient Baselines for Open Domain Conversational Question Answering},\nauthor={Andrei Catalin Coman and Gianni Barlacchi and Adri{\\`a} de Gispert},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=dVfeS1pp2e}\n}", "github": "", "project": "", "reviewers": "tKUu;3qL2;UNK4", "site": "https://openreview.net/forum?id=dVfeS1pp2e", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;3;3", "excitement": "2;3;3", "reproducibility": "4;4;3", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "andrei-c%C4%83t%C4%83lin-coman-226026137/;;", "aff_unique_index": "0;1;1", "aff_unique_norm": "Idiap Research Institute;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.idiap.ch;https://www.amazon.com", "aff_unique_abbr": "Idiap;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Switzerland;United States" }, { "id": "dZWiI6A09u", "title": "Explainable Claim Verification via Knowledge-Grounded Reasoning with Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Claim verification plays a crucial role in combating misinformation. While existing works on claim verification have shown promising results, a crucial piece of the puzzle that remains unsolved is to understand how to verify claims without relying on human-annotated data, which is expensive to create at a large scale. Additionally, it is important for models to provide comprehensive explanations that can justify their decisions and assist human fact-checkers. This paper presents First-Order-Logic-Guided Knowledge-Grounded (FOLK) Reasoning that can verify complex claims and generate explanations without the need for annotated evidence using Large Language Models (LLMs). FOLK leverages the in-context learning ability of LLMs to translate the claim into a First-Order-Logic (FOL) clause consisting of predicates, each corresponding to a sub-claim that needs to be verified. Then, FOLK performs FOL-Guided reasoning over a set of knowledge-grounded question-and-answer pairs to make veracity predictions and generate explanations to justify its decision-making process. This process makes our model highly explanatory, providing clear explanations of its reasoning process in human-readable form. Our experiment results indicate that FOLK outperforms strong baselines on three datasets encompassing various claim verification challenges. Our code and data are available.", "keywords": "Claim Verification;Natural Language Reasoning;Large Language Model", "primary_area": "", "supplementary_material": "", "author": "Haoran Wang;Kai Shu", "authorids": "~Haoran_Wang12;~Kai_Shu1", "gender": "M;", "homepage": "https://haoranwang18.github.io/;https://www.cs.emory.edu/~kshu5/", "dblp": ";153/5265", "google_scholar": "aEuLcokAAAAJ;-6bAV2cAAAAJ", "or_profile": "~Haoran_Wang12;~Kai_Shu1", "aff": "Illinois Institute of Technology;", "aff_domain": "iit.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\nwang2023explainable,\ntitle={Explainable Claim Verification via Knowledge-Grounded Reasoning with Large Language Models},\nauthor={Haoran Wang and Kai Shu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=dZWiI6A09u}\n}", "github": "", "project": "", "reviewers": "ajs8;UJsT;6KM3", "site": "https://openreview.net/forum?id=dZWiI6A09u", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;3;3", "reproducibility": "3;3;4", "correctness": "4;2;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-5787-3131;", "linkedin": "haoran-wang-165236209/;", "aff_unique_index": "0", "aff_unique_norm": "Illinois Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.iit.edu", "aff_unique_abbr": "IIT", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "daGbpBMkoy", "title": "Narrative Order Aware Story Generation via Bidirectional Pretraining Model with Optimal Transport Reward", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "To create a captivating story, a writer often plans a sequence of logically coherent events and ingeniously manipulates the narrative order to generate flashback in place. However, existing storytelling systems suffer from both insufficient understanding of event correlations and inadequate awareness of event temporal order (e.g., go to hospital get ill), making it challenging to generate high-quality events that balance the logic and narrative order of story. In this paper, we propose a narrative order aware framework BPOT (Bidirectional Pretraining Model with Optimal Transport Reward) for story generation, which presents a bidirectional pretrained model to encode event correlations and pairwise event order. We also design a reinforcement learning algorithm with novel optimal transport reward to further improve the quality of generated events in the fine-tuning stage. Specifically, a narrative order aware event sequence model is pretrained with the joint learning objectives of event blank infilling and pairwise order prediction. Then, reinforcement learning with novel optimal transport reward is designed to further improve the generated event quality in the fine-tuning stage. The novel optimal transport reward captures the mappings between the generated events and the sentences in the story, effectively measuring the quality of generated events. Both automatic and manual evaluation results demonstrate the superiority of our framework in generating logically coherent stories with flashbacks.", "keywords": "story generation;narrative order;optimal transport", "primary_area": "", "supplementary_material": "", "author": "Zhicong Lu;Li Jin;Guangluan Xu;Linmei Hu;Nayu Liu;Xiaoyu Li;Xian Sun;Zequn Zhang;kaiwen wei", "authorids": "~Zhicong_Lu6;~Li_Jin5;~Guangluan_Xu2;~Linmei_Hu1;~Nayu_Liu1;~Xiaoyu_Li10;~Xian_Sun2;~Zequn_Zhang2;~kaiwen_wei1", "gender": "M;M;M;F;M;M;M;;M", "homepage": "https://github.com/lzc2017;https://github.com/jinli331;https://people.ucas.edu.cn/~xugl?language=en;;;https://github.com/LiXiaoyu0101;https://github.com/trailsV;;https://wkwiecas.github.io/Kaiwen1997.github.io/", "dblp": ";42/1899-1.html;;141/4440;278/8050;;;120/9628.html;297/8721", "google_scholar": ";g7lHJYcAAAAJ;;;;;;;", "or_profile": "~Zhicong_Lu6;~Li_Jin5;~Guangluan_Xu2;~Linmei_Hu1;~Nayu_Liu1;~Xiaoyu_Li10;~Xian_Sun2;~Zequn_Zhang2;~kaiwen_wei1", "aff": "University of Chinese Academy of Sciences;Aerospace Information Research Institute, Chinese Academy of Sciences;University of Chinese Academy of Sciences;Beijing Institute of Technology;University of Chinese Academy of Sciences;Aerospace Information Innovation Institute, Chinese Academy of Science;, Chinese Academy of Sciences;Aerospace Information Research Institute, Chinese Academy of Science;University of Chinese Academy of Sciences", "aff_domain": "ucas.ac.cn;aircas.ac.cn;ucas.ac.cn;bit.edu.cn;ucas.ac.cn;aircas.ac.cn;ucas.ac.cn;aircas.ac.cn;ucas.edu.cn", "position": "PhD student;Associate Professor;Full Professor;Associate Professor;PhD student;Principal Researcher;Full Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nlu2023narrative,\ntitle={Narrative Order Aware Story Generation via Bidirectional Pretraining Model with Optimal Transport Reward},\nauthor={Zhicong Lu and Li Jin and Guangluan Xu and Linmei Hu and Nayu Liu and Xiaoyu Li and Xian Sun and Zequn Zhang and kaiwen wei},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=daGbpBMkoy}\n}", "github": "", "project": "", "reviewers": "tdcS;bja9;qVqz", "site": "https://openreview.net/forum?id=daGbpBMkoy", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "3;3;3", "reproducibility": "3;3;3", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0005-7520-7799;0000-0001-8833-4862;;;0000-0002-7664-9856;0000-0003-0286-6660;;;0000-0002-5830-0802", "linkedin": ";;;;;;;;", "aff_unique_index": "0;1;0;2;0;1;1;1;0", "aff_unique_norm": "University of Chinese Academy of Sciences;Chinese Academy of Sciences;Beijing Institute of Technology", "aff_unique_dep": ";Aerospace Information Research Institute;", "aff_unique_url": "http://www.ucas.ac.cn;http://www.cas.ac.cn;http://www.bit.edu.cn/", "aff_unique_abbr": "UCAS;CAS;BIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "dbRZyDxYlL", "title": "Improving Speech Translation by Fusing Speech and Text", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In speech translation, leveraging multimodal data to improve model performance and address limitations of individual modalities has shown significant effectiveness. In this paper, we harness the complementary strengths of speech and text to improve speech translation. However, speech and text are disparate modalities, we observe three aspects of modality gap that impede their integration in a speech translation model. To tackle these gaps, we propose **Fuse**-**S**peech-**T**ext (**FuseST**), a cross-modal model which supports three distinct input modalities for translation: speech, text and fused speech-text. We leverage multiple techniques for cross-modal alignment and conduct a comprehensive analysis to assess its impact on speech translation, machine translation and fused speech-text translation. We evaluate FuseST on MuST-C, GigaST and newstest benchmark. Experiments show that the proposed FuseST achieves an average 34.0 BLEU on MuST-C En$\\rightarrow$De/Es/Fr (vs SOTA +1.1 BLEU). Further experiments demonstrate that FuseST does not degrade on MT task, as observed in previous works. Instead, it yields an average improvement of 3.2 BLEU over the pre-trained MT model. Code is available at https://github.com/WenbiaoYin/FuseST.", "keywords": "speech translation;multimodal", "primary_area": "", "supplementary_material": "", "author": "Wenbiao Yin;Zhicheng Liu;Chengqi Zhao;Tao Wang;Jian Tong;Rong Ye", "authorids": "~Wenbiao_Yin1;~Zhicheng_Liu4;~Chengqi_Zhao1;~Tao_Wang12;~Jian_Tong2;~Rong_Ye1", "gender": ";M;M;M;;F", "homepage": ";https://github.com/xinshoulzc;;;https://dl.acm.org/profile/99659260055;https://reneeye.github.io/", "dblp": "330/7482;;183/0931;;;84/5795.html", "google_scholar": ";;https://scholar.google.com/citations?hl=zh-CN;S6LsK6cAAAAJ;;UV4u5UQAAAAJ", "or_profile": "~Wenbiao_Yin1;~Zhicheng_Liu4;~Chengqi_Zhao1;~Tao_Wang12;~Jian_Tong2;~Rong_Ye1", "aff": "Nanjing University;;;;;ByteDance", "aff_domain": "nju.edu.cn;;;;;bytedance.com", "position": "Researcher;;;;;Researcher", "bibtex": "@inproceedings{\nyin2023improving,\ntitle={Improving Speech Translation by Fusing Speech and Text},\nauthor={Wenbiao Yin and Zhicheng Liu and Chengqi Zhao and Tao Wang and Jian Tong and Rong Ye},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=dbRZyDxYlL}\n}", "github": "", "project": "", "reviewers": "VGMC;dtrK;xQGj", "site": "https://openreview.net/forum?id=dbRZyDxYlL", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;3;3", "reproducibility": "3;5;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";;;;;", "aff_unique_index": "0;1", "aff_unique_norm": "Nanjing University;ByteDance", "aff_unique_dep": ";", "aff_unique_url": "https://www.nju.edu.cn;https://www.bytedance.com", "aff_unique_abbr": "Nanjing U;ByteDance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "dcYt9ByOOK", "title": "Responsible AI Considerations in Text Summarization Research: A Review of Current Practices", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "AI and NLP publication venues have increasingly encouraged researchers to reflect on possible ethical considerations, adverse impacts, and other responsible AI issues their work might engender. However, for specific NLP tasks our understanding of how prevalent such issues are, or when and why these issues are likely to arise, remains limited. Focusing on text summarization\u2014a common NLP task largely overlooked by the responsible AI community\u2014we examine research and reporting practices in the current literature. We conduct a multi-round qualitative analysis of 333 summarization papers from the ACL Anthology published between 2020\u20132022. We focus on how, which, and when responsible AI issues are covered, which relevant stakeholders are considered, and mismatches between stated and realized research goals. We also discuss current evaluation practices and consider how authors discuss the limitations of both prior work and their own work. Overall, we find that relatively few papers engage with possible stakeholders or contexts of use, which limits their consideration of potential downstream adverse impacts or other responsible AI issues. Based on our findings, we make recommendations on concrete practices and research directions.", "keywords": "responsible AI;automatic summarization", "primary_area": "", "supplementary_material": "", "author": "Yu Lu Liu;Meng Cao;Su Lin Blodgett;Jackie CK Cheung;Alexandra Olteanu;Adam Trischler", "authorids": "~Yu_Lu_Liu2;~Meng_Cao3;~Su_Lin_Blodgett2;~Jackie_CK_Cheung1;~Alexandra_Olteanu1;~Adam_Trischler1", "gender": "F;M;F;M;;M", "homepage": "https://yululiu.github.io/;https://mcao516.github.io/;https://sblodgett.github.io/;http://cs.mcgill.ca/~jcheung/;;https://www.microsoft.com/en-us/research/people/adtrisch/", "dblp": ";;182/2034;00/9012;;177/9137", "google_scholar": ";https://scholar.google.ca/citations?user=CvHeFv8AAAAJ;8jbAkOUAAAAJ;https://scholar.google.com.tw/citations?user=Um-wmYQAAAAJ;;https://scholar.google.ca/citations?user=EvUM6UUAAAAJ", "or_profile": "~Yu_Lu_Liu2;~Meng_Cao3;~Su_Lin_Blodgett2;~Jackie_CK_Cheung1;~Alexandra_Olteanu1;~Adam_Trischler1", "aff": "McGill University, McGill University;McGill University;Microsoft;Microsoft;;", "aff_domain": "mail.mcgill.ca;mcgill.ca;microsoft.com;microsoft.com;;", "position": "MS student;PhD student;Researcher;Consulting Researcher;;", "bibtex": "@inproceedings{\nliu2023responsible,\ntitle={Responsible {AI} Considerations in Text Summarization Research: A Review of Current Practices},\nauthor={Yu Lu Liu and Meng Cao and Su Lin Blodgett and Jackie CK Cheung and Alexandra Olteanu and Adam Trischler},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=dcYt9ByOOK}\n}", "github": "", "project": "", "reviewers": "8kcV;ao4U;vqro", "site": "https://openreview.net/forum?id=dcYt9ByOOK", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;5;4", "excitement": "3;3;4", "reproducibility": "0;4;4", "correctness": "3;2;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0003-4869-8460;;;;;", "linkedin": ";;;;;", "aff_unique_index": "0;0;1;1", "aff_unique_norm": "McGill University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.mcgill.ca;https://www.microsoft.com", "aff_unique_abbr": "McGill;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "Canada;United States" }, { "id": "ddldNozhnM", "title": "CLEME: Debiasing Multi-reference Evaluation for Grammatical Error Correction", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Evaluating the performance of Grammatical Error Correction (GEC) systems is a challenging task due to its subjectivity. Designing an evaluation metric that is as objective as possible is crucial to the development of GEC task. However, mainstream evaluation metrics, i.e., reference-based metrics, introduce bias into the multi-reference evaluation by extracting edits without considering the presence of multiple references. To overcome this issue, we propose Chunk-LE Multi-reference Evaluation (CLEME), designed to evaluate GEC systems in the multi-reference evaluation setting. CLEME builds chunk sequences with consistent boundaries for the source, the hypothesis and references, thus eliminating the bias caused by inconsistent edit boundaries. Furthermore, we observe the consistent boundary could also act as the boundary of grammatical errors, based on which the F$_{0.5}$ score is then computed following the correction independence assumption. We conduct experiments on six English reference sets based on the CoNLL-2014 shared task. Extensive experiments and detailed analyses demonstrate the correctness of our discovery and the effectiveness of CLEME. Further analysis reveals that CLEME is robust to evaluate GEC systems across reference sets with varying numbers of references and annotation styles. All the source codes of CLEME are released at https://github.com/THUKElab/CLEME.", "keywords": "natural language processing;grammatical error correction;evaluation metric", "primary_area": "", "supplementary_material": "", "author": "Jingheng Ye;Yinghui Li;Qingyu Zhou;Yangning Li;Shirong Ma;Hai-Tao Zheng;Ying Shen", "authorids": "~Jingheng_Ye1;~Yinghui_Li1;~Qingyu_Zhou1;~Yangning_Li1;~Shirong_Ma1;~Hai-Tao_Zheng2;~Ying_Shen3", "gender": "M;M;M;M;;M;F", "homepage": "https://github.com/yejh123;https://github.com/geekjuruo;https://res.qyzhou.me/;https://github.com/HUSTLyn;;https://www.sigs.tsinghua.edu.cn/fg3/105069.jhtml;http://ise.sysu.edu.cn/teacher/teacher02/1371452.htm", "dblp": "331/8438.html;243/8822.html;199/2091;315/0403;;20/134-2;", "google_scholar": "Zm_L_EUAAAAJ;xTM9pKsAAAAJ;buLOsq0AAAAJ;https://scholar.google.com.hk/citations?user=BmX7lQkAAAAJ;;https://scholar.google.com.hk/citations?user=7VPeORoAAAAJ;rVpl7SIAAAAJ", "or_profile": "~Jingheng_Ye1;~Yinghui_Li1;~Qingyu_Zhou1;~Yangning_Li1;~Shirong_Ma1;~Hai-Tao_Zheng2;~Ying_Shen3", "aff": "Tsinghua University;Tsinghua University;OPPO Research Institute;Tsinghua University;Tsinghua University;Tsinghua University;SUN YAT-SEN UNIVERSITY", "aff_domain": "mail.tsinghua.edu.cn;tsinghua.edu.cn;oppo.com;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;sysu.edu.cn", "position": "MS student;PhD student;Researcher;PhD student;MS student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nye2023cleme,\ntitle={{CLEME}: Debiasing Multi-reference Evaluation for Grammatical Error Correction},\nauthor={Jingheng Ye and Yinghui Li and Qingyu Zhou and Yangning Li and Shirong Ma and Hai-Tao Zheng and Ying Shen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ddldNozhnM}\n}", "github": "", "project": "", "reviewers": "oXwi;WBcg;XNd5", "site": "https://openreview.net/forum?id=ddldNozhnM", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "4;4;4", "reproducibility": "5;4;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0005-9366-4985;;0000-0002-4389-1582;;0009-0008-1686-407X;0000-0001-5128-5649;0000-0002-3220-904X", "linkedin": ";;;;;;", "aff_unique_index": "0;0;1;0;0;0;2", "aff_unique_norm": "Tsinghua University;OPPO Research Institute;Sun Yat-sen University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.oppo.com/en;http://www.sysu.edu.cn", "aff_unique_abbr": "THU;OPPO RI;SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "di1Foopybz", "title": "CoSyn: Detecting Implicit Hate Speech in Online Conversations Using a Context Synergized Hyperbolic Network", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The tremendous growth of social media users interacting in online conversations has led to significant growth in hate speech affecting people from various demographics. Most of the prior works focus on detecting explicit hate speech, which is overt and leverages hateful phrases, with very little work focusing on detecting hate speech that is implicit or denotes hatred through indirect or coded language. In this paper, we present CoSyn, a context synergized neural network that explicitly incorporates user- and conversational-context for detecting implicit hate speech in online conversations. CoSyn introduces novel ways to encode these external contexts and employs a novel context interaction mechanism that clearly captures the interplay between them, making independent assessments of the amounts of information to be retrieved from these noisy contexts. Additionally, it carries out all these operations in the hyperbolic space to account for the scale-free dynamics of social media. We demonstrate the effectiveness of CoSyn on 6 hate speech datasets and show that CoSyn outperforms all our baselines in detecting implicit hate speech with absolute improvements in the range of 1.24% - 57.8%. We make our code available.", "keywords": "hate-speech;hyperbolic;social-good;implicit", "primary_area": "", "supplementary_material": "", "author": "Sreyan Ghosh;Manan Suri;Purva Chiniya;Utkarsh Tyagi;Sonal Kumar;Dinesh Manocha", "authorids": "~Sreyan_Ghosh1;~Manan_Suri1;~Purva_Chiniya2;~Utkarsh_Tyagi1;~Sonal_Kumar1;~Dinesh_Manocha3", "gender": "M;M;F;M;M;M", "homepage": "https://sreyan88.github.io/;https://www.manansuri.com;;https://utkarsh4430.github.io;https://sonalkum.github.io;https://www.cs.umd.edu/people/dmanocha", "dblp": "173/5626;;;286/2046;;m/DineshManocha", "google_scholar": "5HKZJHAAAAAJ;KbVluf4AAAAJ;c9aJlxcAAAAJ;https://scholar.google.co.in/citations?user=RLjKaTwAAAAJ;jiJ2DcEAAAAJ;X08l_4IAAAAJ", "or_profile": "~Sreyan_Ghosh1;~Manan_Suri1;~Purva_Chiniya2;~Utkarsh_Tyagi1;~Sonal_Kumar1;~Dinesh_Manocha3", "aff": "University of Maryland, College Park;Netaji Subhas Institute of Technology;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park", "aff_domain": "umd.edu;nsit.ac.in;umd.edu;umd.edu;umd.edu;umd.edu", "position": "PhD student;Undergrad student;MS student;MS student;PhD student;Professor", "bibtex": "@inproceedings{\nghosh2023cosyn,\ntitle={CoSyn: Detecting Implicit Hate Speech in Online Conversations Using a Context Synergized Hyperbolic Network},\nauthor={Sreyan Ghosh and Manan Suri and Purva Chiniya and Utkarsh Tyagi and Sonal Kumar and Dinesh Manocha},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=di1Foopybz}\n}", "github": "", "project": "", "reviewers": "cZN1;4YGb;ArYr", "site": "https://openreview.net/forum?id=di1Foopybz", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;5;3", "excitement": "3;4;4", "reproducibility": "3;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0001-7047-9801", "linkedin": ";manansuri27/;purva-chiniya-667470153/;utkarsh4430/;realsonalkumar/;dinesh-manocha-2311846", "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "University of Maryland;Netaji Subhas Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www/umd.edu;https://www.nsit.ac.in", "aff_unique_abbr": "UMD;NSIT", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "United States;India" }, { "id": "diItUQ1idA", "title": "Abstractive Open Information Extraction", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Open Information Extraction (OpenIE) is a traditional NLP task that extracts structured information from unstructured text to be used for other downstream applications. Traditionally, OpenIE focuses on extracting the surface forms of relations as they appear in the raw text, which we term extractive OpenIE. One of the main drawbacks of this approach is that implicit semantic relations (inferred relations) can not be extracted, compromising the performance of downstream applications. In this paper, we broaden the scope of OpenIE relations from merely the surface form of relations to include inferred relations, which we term abstractive OpenIE. This new task calls for the development of a new abstractive OpenIE training dataset and a baseline neural model that can extract those inferred relations. We also demonstrate the necessity for a new semantics-based metric for evaluating abstractive OpenIE extractions. Via a case study on Complex QA, we demonstrate the effectiveness of abstractive OpenIE.", "keywords": "Open Information Extraction;Relation Extraction;Natural Language Generation", "primary_area": "", "supplementary_material": "", "author": "Kevin Song Pei;Ishan Jindal;Kevin Chang", "authorids": "~Kevin_Song_Pei1;~Ishan_Jindal1;~Kevin_Chang1", "gender": "M;M;M", "homepage": ";https://ijindal.github.io/;https://siebelschool.illinois.edu/about/people/faculty/kcchang", "dblp": "212/4054;159/1866;c/KCCChang", "google_scholar": ";https://scholar.google.ca/citations?user=TNrWFecAAAAJ;https://scholar.google.com.tw/citations?user=sugWZ6MAAAAJ", "or_profile": "~Kevin_Song_Pei1;~Ishan_Jindal1;~Kevin_Chang1", "aff": "University of Illinois, Urbana Champaign;IBM Research;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;ibm.com;illinois.edu", "position": "PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\npei2023abstractive,\ntitle={Abstractive Open Information Extraction},\nauthor={Kevin Song Pei and Ishan Jindal and Kevin Chang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=diItUQ1idA}\n}", "github": "", "project": "", "reviewers": "TQSQ;GwEy;8EaF", "site": "https://openreview.net/forum?id=diItUQ1idA", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;4", "excitement": "3;3;3", "reproducibility": "3;3;3", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-0997-6803", "linkedin": ";ishan-jindal/;", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;IBM", "aff_unique_dep": ";IBM Research", "aff_unique_url": "https://illinois.edu;https://www.ibm.com/research", "aff_unique_abbr": "UIUC;IBM", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "diUHb3jt3j", "title": "Uncovering the Root of Hate Speech: A Dataset for Identifying Hate Instigating Speech", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "While many prior studies have applied computational approaches, such as machine learning, to detect and moderate hate speech, only scant attention has been paid to the task of identifying the underlying cause of hate speech. In this study, we introduce the concept of hate instigating speech, which refers to a specific type of textual posts on online platforms that stimulate or provoke others to engage in hate speech. The identification of hate instigating speech carries substantial practical implications for effective hate speech moderation. Rather than targeting individual instances of hate speech, by focusing on their roots, i.e., hate instigating speech, it becomes possible to significantly reduce the volume of content that requires review for moderation. Additionally, targeting hate instigating speech enables early prevention of the spread and propagation of hate speech, further enhancing the effectiveness of moderation efforts. However, several challenges hinder researchers from addressing the identification of hate instigating speech. First, there is a lack of comprehensive datasets specifically annotated for hate instigation, making it difficult to train and evaluate computational models effectively. Second, the subtle and nuanced nature of hate instigating speech (e.g., seemingly non-offensive texts serve as catalysts for triggering hate speech) makes it difficult to apply off-the-shelf machine learning models to the problem. To address these challenges, in this study, we have developed and released a multilingual dataset specifically designed for the task of identifying hate instigating speech. Specifically, it encompasses both English and Korean, allowing for a comprehensive examination of hate instigating speech across different linguistic contexts. We have applied existing machine learning models to our dataset and the results demonstrate that the extant models alone are insufficient for effectively detecting hate instigating speech. This finding highlights the need for further attention from the academic community to address this specific challenge. We expect our study and dataset to inspire researchers to explore innovative methods that can enhance the accuracy of hate instigating speech detection, ultimately contributing to more effective moderation and prevention of hate speech propagation online.", "keywords": "hate speech;hate instigating speech;machine learning;natural language processing", "primary_area": "", "supplementary_material": "", "author": "Hyoungjun Park;Ho Sung Shim;Kyuhan Lee", "authorids": "~Hyoungjun_Park1;~Ho_Sung_Shim1;~Kyuhan_Lee2", "gender": "M;M;M", "homepage": ";;https://biz.korea.ac.kr/eng/professor/professor_view.html?major=608&no=257&refer=%2Fprofessor%2Fmis.html", "dblp": ";;", "google_scholar": ";;4evSQKYAAAAJ", "or_profile": "~Hyoungjun_Park1;~Ho_Sung_Shim1;~Kyuhan_Lee2", "aff": "Korea University;Korea University;Arizona State University", "aff_domain": "korea.ac.kr;korea.ac.kr;asu.edu", "position": "MS student;MS student;Assistant Professor", "bibtex": "@inproceedings{\npark2023uncovering,\ntitle={Uncovering the Root of Hate Speech: A Dataset for Identifying Hate Instigating Speech},\nauthor={Hyoungjun Park and Ho Sung Shim and Kyuhan Lee},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=diUHb3jt3j}\n}", "github": "", "project": "", "reviewers": "wEu5;eite;9nDB", "site": "https://openreview.net/forum?id=diUHb3jt3j", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "1;2;2", "reproducibility": "3;3;3", "correctness": "1;2;2", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 1.6666666666666667, "reproducibility_avg": 3.0, "correctness_avg": 1.6666666666666667, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "hyoungjunpark/;ho-sung-shim-58693b187/;", "aff_unique_index": "0;0;1", "aff_unique_norm": "Korea University;Arizona State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.korea.ac.kr;https://www.asu.edu", "aff_unique_abbr": "KU;ASU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "South Korea;United States" }, { "id": "djmjglxOZ7", "title": "Finding Support Examples for In-Context Learning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In-context learning is a new learning paradigm where a language model observes a few examples and directly outputs the test input's prediction. Previous works have shown that it is sensitive to the provided examples and randomly sampled examples probably cause inferior performance. In this paper, we propose finding ``support examples'' for in-context learning: Given a training dataset, it aims to select one permutation of a few examples, which can well characterize the task for in-context learning and thus lead to superior performance. \nAlthough for traditional gradient-based training, there are extensive methods to find a coreset from the entire dataset, they struggle to find important in-context examples, because in-context learning occurs in the language model's forward process without gradients or parameter updates and thus has a significant gap with traditional training. \nAdditionally, the strong dependence among in-context examples makes it an NP-hard combinatorial optimization problem and enumerating all permutations is infeasible. Hence we propose **LENS**, a fi**L**ter-th**EN**-**S**earch method to tackle this challenge in two stages:\nirst we filter the dataset to obtain individually informative in-context examples. Specifically, we propose a novel metric, InfoScore, to evaluate the example's in-context informativeness based on the language model's feedback, and further propose a progressive filtering process to filter out uninformative examples.\nThen we propose diversity-guided example search which iteratively refines and evaluates the selected example permutations, to find examples that fully depict the task. \nThe experimental results show that LENS significantly outperforms a wide range of baselines and further analyses show that each component contribute critically to the improvements and shed light on the principles of supporting examples and in-context learning.", "keywords": "In-Context Learning;ICL;Language Model", "primary_area": "", "supplementary_material": "", "author": "Xiaonan Li;Xipeng Qiu", "authorids": "~Xiaonan_Li1;~Xipeng_Qiu1", "gender": "M;M", "homepage": ";https://xpqiu.github.io/", "dblp": "84/6885;69/1395", "google_scholar": "ldEcEjEAAAAJ;Pq4Yp_kAAAAJ", "or_profile": "~Xiaonan_Li1;~Xipeng_Qiu1", "aff": "Fudan University;Fudan University", "aff_domain": "fudan.edu;fudan.edu.cn", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nli2023finding,\ntitle={Finding Support Examples for In-Context Learning},\nauthor={Xiaonan Li and Xipeng Qiu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=djmjglxOZ7}\n}", "github": "", "project": "", "reviewers": "Hn2j;wMx9;NXgr", "site": "https://openreview.net/forum?id=djmjglxOZ7", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;5", "excitement": "4;3;3", "reproducibility": "4;4;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-7163-5247", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "dl4e3EBz5j", "title": "GlotLID: Language Identification for Low-Resource Languages", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Several recent papers have published good solutions for language identification (LID) for about 300 high-resource and medium-resource languages. However, there is no LID available that (i) covers a wide range of low-resource languages, (ii) is rigorously evaluated and reliable and (iii) efficient and easy to use. Here, we publish GlotLID-M, an LID model that satisfies the desiderata of wide coverage, reliability and efficiency. It identifies 1665 languages, a large increase in coverage compared to prior work. In our experiments, GlotLID-M outperforms four baselines (CLD3, FT176, OpenLID and NLLB) when balancing F1 and false positive rate (FPR). We analyze the unique challenges that low-resource LID poses: incorrect corpus metadata, leakage from high-resource languages, difficulty separating closely related languages, handling of macrolanguage vs varieties and in general noisy data. We hope that integrating GlotLID-M into dataset creation pipelines will improve quality and enhance accessibility of NLP technology for low-resource languages and cultures. GlotLID-M model, code, and list of data sources are available: https://github.com/cisnlp/GlotLID.", "keywords": "Language Identification;Low-Resource Languages", "primary_area": "", "supplementary_material": "", "author": "Amir Hossein Kargaran;Ayyoob Imani;Fran\u00e7ois Yvon;Hinrich Schuetze", "authorids": "~Amir_Hossein_Kargaran1;~Ayyoob_Imani1;~Fran\u00e7ois_Yvon2;~Hinrich_Schuetze3", "gender": "M;M;M;M", "homepage": "https://kargaranamir.github.io/;https://ayyoobimani.github.io/;http://cv.archives-ouvertes.fr/francois-yvon;https://www.cis.uni-muenchen.de/schuetze/", "dblp": "261/9248;230/3947;05/2701.html;s/HinrichSchutze", "google_scholar": "2idwpjcAAAAJ;vtpOxL8AAAAJ;https://scholar.google.fr/citations?hl=fr;", "or_profile": "~Amir_Hossein_Kargaran1;~Ayyoob_Imani1;~Fran\u00e7ois_Yvon2;~Hinrich_Schuetze3", "aff": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Ludwig Maximilian University of Munich;LISN-CNRS / Universit\u00e9 Paris Saclay;Center for Information and Language Processing", "aff_domain": "lmu.de;lmu.de;lisn.fr;lmu.de", "position": "PhD student;PhD student;Senior Researcher;Full Professor", "bibtex": "@inproceedings{\nkargaran2023glotlid,\ntitle={Glot{LID}: Language Identification for Low-Resource Languages},\nauthor={Amir Hossein Kargaran and Ayyoob Imani and Fran{\\c{c}}ois Yvon and Hinrich Schuetze},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=dl4e3EBz5j}\n}", "github": "", "project": "", "reviewers": "9oC8;6ooP;TXgM", "site": "https://openreview.net/forum?id=dl4e3EBz5j", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "4;3;3", "reproducibility": "4;3;4", "correctness": "4;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-6253-1315;;0000-0002-7972-7442;", "linkedin": "amirkargaran/;ayyoob-imani-49abb68a/;;", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Ludwig Maximilian University of Munich;Universit\u00e9 Paris Saclay;Center for Information and Language Processing", "aff_unique_dep": ";;LISN-CNRS;", "aff_unique_url": "https://www.lmu.de;https://www.lmu.de;https://www.universite-paris-saclay.fr;", "aff_unique_abbr": "LMU;LMU;UPS;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Germany;France;" }, { "id": "dnIfD7RJLU", "title": "GEM: Gestalt Enhanced Markup Language Model for Web Understanding via Render Tree", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Inexhaustible web content carries abundant perceptible information beyond text. Unfortunately, most prior efforts in pre-trained Language Models (LMs) ignore such cyber-richness, while few of them only employ plain HTMLs, and crucial information in the rendered web, such as visual, layout, and style, are excluded. Intuitively, those perceptible web information can provide essential intelligence to facilitate content understanding tasks. This study presents an innovative Gestalt Enhanced Markup (GEM) Language Model inspired by Gestalt psychological theory for hosting heterogeneous visual information from the render tree into the language model without requiring additional visual input. Comprehensive experiments on multiple downstream tasks, i.e., web question answering and web information extraction, validate GEM superiority.", "keywords": "Gestalt;Markup Language;Web Understanding;Language Model", "primary_area": "", "supplementary_material": "", "author": "Zirui Shao;Feiyu Gao;Zhongda QI;Hangdi Xing;Jiajun Bu;Zhi Yu;Qi Zheng;Xiaozhong Liu", "authorids": "~Zirui_Shao1;~Feiyu_Gao1;~Zhongda_QI1;~Hangdi_Xing1;~Jiajun_Bu1;~Zhi_Yu1;~Qi_Zheng2;~Xiaozhong_Liu2", "gender": "M;M;M;;M;M;M;M", "homepage": ";;;;https://person.zju.edu.cn/bjj;https://person.zju.edu.cn/en/yuzhirenzhe;;https://www.wpi.edu/people/faculty/xliu14", "dblp": "280/7728;34/9056;;342/2712.html;50/3147;;;11/6389.html", "google_scholar": "https://scholar.google.com.hk/citations?user=V4Vl5mgAAAAJ;;;;OgZP2okAAAAJ;;;1BUByMcAAAAJ", "or_profile": "~Zirui_Shao1;~Feiyu_Gao1;~Zhongda_QI1;~Hangdi_Xing1;~Jiajun_Bu1;~Zhi_Yu1;~Qi_Zheng2;~Xiaozhong_Liu2", "aff": "Zhejiang University;Alibaba Group;Zhejiang University;Zhejiang University;Zhejiang University;Zhejiang University;Alibaba Group;Worcester Polytechnic Institute", "aff_domain": "zju.edu.cn;alibaba-inc.com;zju.edu.cn;zju.edu.cn;zju.edu.cn;zju.edu.cn;alibaba-inc.com;wpi.edu", "position": "PhD student;Engineer;MS student;PhD student;Full Professor;Assistant Professor;Alogirithm specialist;Associate Professor", "bibtex": "@inproceedings{\nshao2023gem,\ntitle={{GEM}: Gestalt Enhanced Markup Language Model for Web Understanding via Render Tree},\nauthor={Zirui Shao and Feiyu Gao and Zhongda QI and Hangdi Xing and Jiajun Bu and Zhi Yu and Qi Zheng and Xiaozhong Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=dnIfD7RJLU}\n}", "github": "", "project": "", "reviewers": "bTQ3;W8C9;6rWs", "site": "https://openreview.net/forum?id=dnIfD7RJLU", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;5;4", "excitement": "4;4;3", "reproducibility": "3;4;4", "correctness": "4;5;3", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-6902-0075;;0000-0002-1097-2044;;;", "linkedin": ";;;;;;;", "aff_unique_index": "0;1;0;0;0;0;1;2", "aff_unique_norm": "Zhejiang University;Alibaba Group;Worcester Polytechnic Institute", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;https://www.alibaba.com;https://www.wpi.edu", "aff_unique_abbr": "ZJU;Alibaba;WPI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;1", "aff_country_unique": "China;United States" }, { "id": "dnQI76LKQy", "title": "MultiCMET: A Novel Chinese Benchmark for Understanding Multimodal Metaphor", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Metaphor is a pervasive aspect of human communication, and its presence in multimodal forms has become more prominent with the progress of mass media. However, there is limited research on multimodal metaphor resources beyond the English language. Furthermore, the existing work in natural language processing does not address the exploration of categorizing the source and target domains in metaphors. This omission is significant considering the extensive research conducted in the fields of cognitive linguistics, which emphasizes that a profound understanding of metaphor relies on recognizing the differences and similarities between domain categories. We, therefore, introduce MultiCMET, a multimodal Chinese metaphor dataset, consisting of 13,820 text-image pairs of advertisements with manual annotations of the occurrence of metaphors, domain categories, and sentiments metaphors convey. We also constructed a domain lexicon that encompasses categorizations of metaphorical source domains and target domains and propose a Cascading Domain Knowledge Integration (CDKI) benchmark to detect metaphors by introducing domain-specific lexical features. Experimental results demonstrate the effectiveness of CDKI. The dataset and code are publicly available.", "keywords": "Multimodal metaphor;Benchmark;Chinese language;Domain lexicon;Metaphor understanding", "primary_area": "", "supplementary_material": "", "author": "Dongyu Zhang;Jingwei Yu;Senyuan Jin;Liang Yang;Hongfei Lin", "authorids": "~Dongyu_Zhang4;~Jingwei_Yu1;~Senyuan_Jin1;~Liang_Yang3;~Hongfei_Lin3", "gender": "F;F;M;M;M", "homepage": "https://www.dongyuzhang.com/;;;;", "dblp": ";;;05/3933-3;https://dblp.uni-trier.de/pid/07/1644", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;;;kV68br0AAAAJ", "or_profile": "~Dongyu_Zhang4;~Jingwei_Yu1;~Senyuan_Jin1;~Liang_Yang3;~Hongfei_Lin3", "aff": "Dalian University of Technology;Dalian University of Technology;Dalian University of Technology;Dalian University of Technology;Dalian University of Technology", "aff_domain": "dlut.edu.cn;dlut.edu.cn;dlut.edu.cn;dlut.edu.cn;dlut.edu.cn", "position": "Full Professor;MS student;MS student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2023multicmet,\ntitle={Multi{CMET}: A Novel Chinese Benchmark for Understanding Multimodal Metaphor},\nauthor={Dongyu Zhang and Jingwei Yu and Senyuan Jin and Liang Yang and Hongfei Lin},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=dnQI76LKQy}\n}", "github": "", "project": "", "reviewers": "MYGj;Szae;RctQ", "site": "https://openreview.net/forum?id=dnQI76LKQy", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "4;3;2", "reproducibility": "5;4;3", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-7683-5560;;;;0000-0003-0872-7688", "linkedin": ";%E9%9D%96%E8%96%87-%E4%BA%8E-40ba7b27a/;%E6%A3%AE%E6%BA%90-%E9%9D%B3-430bb527a/;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Dalian University of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.dlut.edu.cn/", "aff_unique_abbr": "DUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "dp9jTeKXec", "title": "From Simple to Complex: A Progressive Framework for Document-level Informative Argument Extraction", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Document-level Event Argument Extraction (EAE) requires the model to extract arguments of multiple events from a single document. Considering the underlying dependencies between these events, recent efforts leverage the idea of \"memory\", where the results of already predicted events are cached and can be retrieved to help the prediction of upcoming events. These methods extract events according to their appearance order in the document, however, the event that appears in the first sentence does not mean that it is the easiest to extract. Existing methods might introduce noise to the extraction of upcoming events if they rely on an incorrect prediction of previous events. In order to provide more reliable memory, we propose a simple-to-complex progressive framework for document-level EAE. Specifically, we first calculate the difficulty of each event and then, we conduct the extraction following a simple-to-complex order. In this way, the memory will store the most certain results, and the model could use these reliable sources to help the prediction of more difficult events. Experiments on WikiEvents show that our model outperforms SOTA by 1.4\\% in F1, indicating the proposed simple-to-complex framework is useful in the EAE task.", "keywords": "Document-level event extraction;informative argument extraction;simple-to-complex prediction", "primary_area": "", "supplementary_material": "", "author": "Quzhe Huang;Yanxi Zhang;Dongyan Zhao", "authorids": "~Quzhe_Huang1;~Yanxi_Zhang1;~Dongyan_Zhao1", "gender": ";M;M", "homepage": "https://andrewzhe.github.io/;https://sites.google.com/view/zhangyanxi/;https://www.wict.pku.edu.cn/zhaodongyan/en/", "dblp": "278/1884;;63/1870", "google_scholar": "https://scholar.google.com/citations?hl=en;4l5qRQwAAAAJ;lhR8-68AAAAJ", "or_profile": "~Quzhe_Huang1;~Yanxi_Zhang1;~Dongyan_Zhao2", "aff": "Peking University;Chongqing University;Peking University", "aff_domain": "pku.edu.cn;cqu.edu.cn;pku.edu.cn", "position": "PhD student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nhuang2023from,\ntitle={From Simple to Complex: A Progressive Framework for Document-level Informative Argument Extraction},\nauthor={Quzhe Huang and Yanxi Zhang and Dongyan Zhao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=dp9jTeKXec}\n}", "github": "", "project": "", "reviewers": "CXkm;FLW2;Unoe", "site": "https://openreview.net/forum?id=dp9jTeKXec", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;3;4", "reproducibility": "5;4;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Peking University;Chongqing University", "aff_unique_dep": ";", "aff_unique_url": "http://www.pku.edu.cn;https://www.cqu.edu.cn", "aff_unique_abbr": "Peking U;CQU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "dpS5VxAwuF", "title": "Multimodal Embodied Plan Prediction Augmented with Synthetic Embodied Dialogue", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Embodied task completion is a challenge where an agent in a simulated environment must predict environment actions to complete tasks\nbased on natural language instructions and ego-centric visual observations. We propose a variant of this problem where the agent predicts\nactions at a higher level of abstraction called a plan, which helps make agent actions more interpretable and can be obtained from the appropriate prompting of large language models. We show that multimodal transformer models can outperform language-only models for this problem but fall significantly short of oracle plans. Since collecting human-human dialogues for embodied environments is expensive and time-consuming, we propose a method to synthetically generate such dialogues, which we then use as training data for plan prediction. We demonstrate that multimodal transformer models can attain strong zero-shot performance from our synthetic data, outperforming language-only models trained on human-human data.", "keywords": "Embodied AI;Embodied Task Completion;Language and Robotics;Plan Prediction;Dialog Simulation", "primary_area": "", "supplementary_material": "", "author": "Aishwarya Padmakumar;Mert Inan;Spandana Gella;Patrick L. Lange;Dilek Hakkani-Tur", "authorids": "~Aishwarya_Padmakumar1;~Mert_Inan1;~Spandana_Gella2;~Patrick_L._Lange1;~Dilek_Hakkani-Tur1", "gender": "F;M;F;M;F", "homepage": "https://aishwaryap.github.io/;http://merterm.github.io/;https://scholar.google.com/citations?user=fChTW6MAAAAJ&hl=en&oi=ao;https://patricklange.dev/;https://siebelschool.illinois.edu/about/people/faculty/dilek", "dblp": "160/9492;244/7871.html;146/3968.html;188/8957;h/DilekZHakkaniTur", "google_scholar": "YH7PDvEAAAAJ;ztpK4iwAAAAJ;fChTW6MAAAAJ;https://scholar.google.com/citations?hl=en;GMcL_9kAAAAJ", "or_profile": "~Aishwarya_Padmakumar1;~Mert_Inan1;~Spandana_Gella2;~Patrick_L._Lange1;~Dilek_Hakkani_Tur1", "aff": "Amazon;University of Pittsburgh;Amazon;Amazon - Alexa AI;Amazon", "aff_domain": "amazon.com;cs.pitt.edu;amazon.com;amazon.com;amazon.com", "position": "Researcher;PhD student;Research Scientist;Researcher;Snr Principal Scientist", "bibtex": "@inproceedings{\npadmakumar2023multimodal,\ntitle={Multimodal Embodied Plan Prediction Augmented with Synthetic Embodied Dialogue},\nauthor={Aishwarya Padmakumar and Mert Inan and Spandana Gella and Patrick L. Lange and Dilek Hakkani-Tur},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=dpS5VxAwuF}\n}", "github": "", "project": "", "reviewers": "MiCJ;zTkK;YKo6", "site": "https://openreview.net/forum?id=dpS5VxAwuF", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;1;5", "excitement": "3;3;4", "reproducibility": "4;3;4", "correctness": "3;3;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-6972-411X;;0000-0003-3935-663X;0000-0001-5246-2117", "linkedin": "aishwarya-padmakumar-475b4171/;merterm/;spandana-gella-313b7019/;langep/;dilek-hakkani-tur-9517543/", "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Amazon;University of Pittsburgh", "aff_unique_dep": "Amazon.com, Inc.;", "aff_unique_url": "https://www.amazon.com;https://www.pitt.edu", "aff_unique_abbr": "Amazon;Pitt", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "dq25TkeI1W", "title": "Probing LLMs for hate speech detection: strengths and vulnerabilities", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recently efforts have been made by social media platforms as well as researchers to detect hateful or toxic language using large language models. However, none of these works aim to use explanation, additional context and victim community information in the detection process. We utilise different prompt variation, input information and evaluate large language models in zero shot setting (without adding any in-context examples). We select two large language models (GPT-3.5 and text-davinci) and three datasets - HateXplain, implicit hate and ToxicSpans. We find that on average including the target information in the pipeline improves the model performance substantially ($\\sim20-30\\%$) over the baseline across the datasets. There is also a considerable effect of adding the rationales/explanations into the pipeline ($\\sim10-20\\%$) over the baseline across the datasets. In addition, we further provide a typology of the error cases where these large language models fail to (i) classify and (ii) explain the reason for the decisions they take. Such vulnerable points automatically constitute `jailbreak\u2019 prompts for these models and industry scale safeguard techniques need to be developed to make the models robust against such prompts.", "keywords": "hate speech;explanation;large language models;detection", "primary_area": "", "supplementary_material": "", "author": "Sarthak Roy;Ashish Harshvardhan;Animesh Mukherjee;Punyajoy Saha", "authorids": "~Sarthak_Roy1;~Ashish_Harshvardhan1;~Animesh_Mukherjee2;~Punyajoy_Saha1", "gender": "M;M;M;M", "homepage": "https://cse.iitkgp.ac.in/~sarthakroy/;;https://punyajoy.github.io/;http://cse.iitkgp.ac.in/~animeshm", "dblp": "359/3436;;229/7254;m/AnimeshMukherjee.html", "google_scholar": "xDGfZWkAAAAJ;;VGBwCtsAAAAJ;lf7-deEAAAAJ", "or_profile": "~Sarthak_Roy1;~Ashish_Harshvardhan1;~Punyajoy_Saha1;~Animesh_Mukherjee1", "aff": "Indian Institute of Technology, Kharagpur;Indian Institute of Technology Kharagpur;Indian Institute of Technology Kharagpur;Indian Institute of Technology Kharagpur", "aff_domain": "iitkgp.ac.in;iitkgp.ernet.in;iitkgp.ac.in;iitkgp.ac.in", "position": "PhD student;Undergrad student;PhD student;Full Professor", "bibtex": "@inproceedings{\nroy2023probing,\ntitle={Probing {LLM}s for hate speech detection: strengths and vulnerabilities},\nauthor={Sarthak Roy and Ashish Harshvardhan and Animesh Mukherjee and Punyajoy Saha},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=dq25TkeI1W}\n}", "github": "", "project": "", "reviewers": "Dc6M;UTwD;xYYx", "site": "https://openreview.net/forum?id=dq25TkeI1W", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;4;4", "excitement": "3;3;3", "reproducibility": "3;2;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0007-1060-2266;;0000-0002-3952-2514;", "linkedin": "sarthak-roy-667a15146;ashish-harshvardhan-72437019b;punyajoy-saha-bb5008114/;", "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Indian Institute of Technology;Indian Institute of Technology Kharagpur", "aff_unique_dep": ";", "aff_unique_url": "https://www.iitkgp.ac.in;https://www.iitkgp.ac.in", "aff_unique_abbr": "IIT Kharagpur;IIT Kharagpur", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Kharagpur", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "India" }, { "id": "drG2ScCe4C", "title": "Spoiler Detection as Semantic Text Matching", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Engaging with discussion of TV shows online often requires individuals to refrain from consuming show-related content for extended periods to avoid spoilers. While existing research on spoiler detection shows promising results in safeguarding viewers from general spoilers, it fails to address the issue of users abstaining from show-related content during their watch. This is primarily because the definition of a spoiler varies depending on the viewer's progress in the show, and conventional spoiler detection methods lack the granularity to capture this complexity. To tackle this challenge, we propose the task of spoiler matching, which involves assigning an episode number to a spoiler given a specific TV show. We frame this task as semantic text matching and introduce a dataset comprised of comments and episode summaries to evaluate model performance. Given the length of each example, our dataset can also serve as a benchmark for long-range language models.", "keywords": "dataset;spoiler detection;semantic text matching", "primary_area": "", "supplementary_material": "", "author": "Ryan Tran;Canwen Xu;Julian McAuley", "authorids": "~Ryan_Tran1;~Canwen_Xu1;~Julian_McAuley1", "gender": "M;;M", "homepage": "https://bobotran.github.io/;;http://cseweb.ucsd.edu/~jmcauley/", "dblp": "300/4545;;29/3483", "google_scholar": ";;icbo4M0AAAAJ", "or_profile": "~Ryan_Tran1;~Canwen_Xu1;~Julian_McAuley1", "aff": "University of California, San Diego;;University of California, San Diego, University of California, San Diego", "aff_domain": "ucsd.edu;;eng.ucsd.edu", "position": "MS student;;Full Professor", "bibtex": "@inproceedings{\ntran2023spoiler,\ntitle={Spoiler Detection as Semantic Text Matching},\nauthor={Ryan Tran and Canwen Xu and Julian McAuley},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=drG2ScCe4C}\n}", "github": "", "project": "", "reviewers": "qhzf;W1pv;SNpa", "site": "https://openreview.net/forum?id=drG2ScCe4C", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;5;3", "excitement": "4;4;3", "reproducibility": "4;5;3", "correctness": "4;3;3", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-0955-7588", "linkedin": "ryanbobotran/;;", "aff_unique_index": "0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "du1t38uXPA", "title": "DeCrisisMB: Debiased Semi-Supervised Learning for Crisis Tweet Classification via Memory Bank", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "During crisis events, people often use social media platforms such as Twitter to disseminate information about the situation, warnings, advice, and support. Emergency relief organizations leverage such information to acquire timely crisis circumstances and expedite rescue operations. While existing works utilize such information to build models for crisis event analysis, fully-supervised approaches require annotating vast amounts of data and are impractical due to limited response time. On the other hand, semi-supervised models can be biased, performing moderately well for certain classes while performing extremely poorly for others, resulting in substantially negative effects on disaster monitoring and rescue. In this paper, we first study two recent debiasing methods on semi-supervised crisis tweet classification. Then we propose a simple but effective debiasing method, DeCrisisMB, that utilizes a Memory Bank to store and perform equal sampling for generated pseudo-labels from each class at each training iteration. Extensive experiments are conducted to compare different debiasing methods' performance and generalization ability in both in-distribution and out-of-distribution settings. The results demonstrate the superior performance of our proposed method. Our code is available at https://github.com/HenryPengZou/DeCrisisMB.", "keywords": "Semi-Supervised Learning;Debiasing;Crisis Tweet Classification", "primary_area": "", "supplementary_material": "", "author": "Henry Peng Zou;Yue Zhou;Weizhi Zhang;Cornelia Caragea", "authorids": "~Henry_Peng_Zou1;~Yue_Zhou6;~Weizhi_Zhang1;~Cornelia_Caragea2", "gender": "Not Specified;;M;", "homepage": "https://github.com/HenryPengZou;;https://davidzwz.github.io/;https://www.cs.uic.edu/~cornelia/", "dblp": "359/3792;;205/0473-1.html;69/6680.html", "google_scholar": "1qN70bIAAAAJ;;TazcjBIAAAAJ;vkX6VV4AAAAJ", "or_profile": "~Henry_Peng_Zou1;~Yue_Zhou6;~Weizhi_Zhang1;~Cornelia_Caragea2", "aff": "University of Illinois at Chicago;University of Illinois at Chicago;University of Illinois Chicago;University of Illinois at Chicago", "aff_domain": "uic.edu;uic.edu;uic.edu;uic.edu", "position": "PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nzou2023decrisismb,\ntitle={DeCrisis{MB}: Debiased Semi-Supervised Learning for Crisis Tweet Classification via Memory Bank},\nauthor={Henry Peng Zou and Yue Zhou and Weizhi Zhang and Cornelia Caragea},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=du1t38uXPA}\n}", "github": "", "project": "", "reviewers": "msjn;LWNG;Kir9;MKfM", "site": "https://openreview.net/forum?id=du1t38uXPA", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;3;4", "excitement": "3;3;3;3", "reproducibility": "3;5;4;4", "correctness": "4;3;3;3", "rating_avg": 3.0, "confidence_avg": 3.75, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.25, "replies_avg": 13, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-4067-7588;", "linkedin": "henry-peng-zou/;;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Illinois at Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.uic.edu", "aff_unique_abbr": "UIC", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Chicago", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "dvDi1Oc2y7", "title": "GBT: Generative Boosting Training Approach for Paraphrase Identification", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Paraphrase Identification (PI), a task of determining whether a pair of sentences express the same meaning, is widely applied in Information Retrieval and Question Answering. Data Augmentation (DA) is proven effective in tackling the PI task. However, the majority of DA methods still suffer from two limitations: inefficiency and poor quality. In this study, we propose the Generative Boosting Training (GBT) approach for PI. GBT designs a boosting learning method for a single model based on the human learning process, utilizing seq2seq model to perform DA on misclassified instances periodically. We conduct experiments on the benchmark corpora QQP and LCQMC, towards both English and Chinese PI tasks. Experimental results show that our method yields significant improvements on a variety of Pre-trained Language Model (PLM) based baselines with good efficiency and effectiveness. It is noteworthy that a single BERT model (with a linear classifier) can outperform the state-of-the-art PI models with the boosting of GBT.", "keywords": "Paraphrase Identification;Data Augmentation;Text Semantic Matching", "primary_area": "", "supplementary_material": "", "author": "Rui Peng;Zhiling Jin;Yu Hong", "authorids": "~Rui_Peng2;~Zhiling_Jin1;~Yu_Hong1", "gender": "M;M;M", "homepage": "http://www.rpengrpeng.com;;", "dblp": ";;66/5306", "google_scholar": ";WbtdzVAAAAAJ;", "or_profile": "~Rui_Peng2;~Zhiling_Jin1;~Yu_Hong1", "aff": "Soochow University;Soochow University;Suzhou University", "aff_domain": "suda.edu.cn;suda.edu.cn;suda.edu.cn", "position": "MS student;MS student;Full Professor", "bibtex": "@inproceedings{\npeng2023gbt,\ntitle={{GBT}: Generative Boosting Training Approach for Paraphrase Identification},\nauthor={Rui Peng and Zhiling Jin and Yu Hong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=dvDi1Oc2y7}\n}", "github": "", "project": "", "reviewers": "xb7T;Zs1Z;rB1x", "site": "https://openreview.net/forum?id=dvDi1Oc2y7", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "2;4;4", "reproducibility": "3;4;3", "correctness": "2;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0;1", "aff_unique_norm": "Soochow University;Suzhou University", "aff_unique_dep": ";", "aff_unique_url": "https://www.soochow.edu.cn;https://www.suda.edu.cn", "aff_unique_abbr": "Soochow U;Suda", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "dwGKBFXiy2", "title": "NASH: A Simple Unified Framework of Structured Pruning for Accelerating Encoder-Decoder Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Structured pruning methods have proven effective in reducing the model size and accelerating inference speed in various network architectures such as Transformers. Despite the versatility of encoder-decoder models in numerous NLP tasks, the structured pruning methods on such models are relatively less explored compared to encoder-only models. In this study, we investigate the behavior of the structured pruning of the encoder-decoder models in the decoupled pruning perspective of the encoder and decoder component, respectively. Our findings highlight two insights: (1) the number of decoder layers is the dominant factor of inference speed, and (2) low sparsity in the pruned encoder network enhances generation quality. Motivated by these findings, we propose a simple and effective framework, NASH, that narrows the encoder and shortens the decoder networks of encoder-decoder models. Extensive experiments on diverse generation and inference tasks validate the effectiveness of our method in both speedup and output quality.", "keywords": "Model Compression;Structured Pruning;Encoder-Decoder Language Model", "primary_area": "", "supplementary_material": "", "author": "Jongwoo Ko;Seungjoon Park;Yujin Kim;Sumyeong Ahn;Du-Seong Chang;Euijai Ahn;Se-Young Yun", "authorids": "~Jongwoo_Ko1;~Seungjoon_Park1;~Yujin_Kim2;~Sumyeong_Ahn1;~Du-Seong_Chang1;~Euijai_Ahn1;~Se-Young_Yun1", "gender": "M;M;F;M;M;M;M", "homepage": "https://sites.google.com/view/jongwooko;;https://github.com/kimyuji;https://sumyeongahn.github.io;https://duseongchang.github.io/;https://dxp.korea.ac.kr;https://fbsqkd.github.io", "dblp": "286/1503;;128/3542;217/5462;92/3996;;23/8862", "google_scholar": "l2jkwHwAAAAJ;vqBppVcAAAAJ;17yTpxsAAAAJ;krxhvIYAAAAJ;https://scholar.google.co.kr/citations?user=y1HTwWAAAAAJ;;X_IAjb8AAAAJ", "or_profile": "~Jongwoo_Ko1;~Seungjoon_Park1;~Yujin_Kim2;~Sumyeong_Ahn1;~Du-Seong_Chang1;~Euijai_Ahn1;~Se-Young_Yun1", "aff": "Korea Advanced Institute of Science & Technology;KAIST;KAIST, Graduate School of AI;Korea Advanced Institute of Science & Technology;KT;;KAIST", "aff_domain": "kaist.ac.kr;ee.kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kt.com;;kaist.ac.kr", "position": "PhD student;MS student;MS student;PhD student;Senior Vice President;;Assistant Professor", "bibtex": "@inproceedings{\nko2023nash,\ntitle={{NASH}: A Simple Unified Framework of Structured Pruning for Accelerating Encoder-Decoder Language Models},\nauthor={Jongwoo Ko and Seungjoon Park and Yujin Kim and Sumyeong Ahn and Du-Seong Chang and Euijai Ahn and Se-Young Yun},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=dwGKBFXiy2}\n}", "github": "", "project": "", "reviewers": "XszA;1C5A;h8AR;7MBx", "site": "https://openreview.net/forum?id=dwGKBFXiy2", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;1;3;4", "excitement": "3;4;2;3", "reproducibility": "4;4;5;4", "correctness": "3;4;4;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 4.25, "correctness_avg": 3.5, "replies_avg": 12, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;", "linkedin": "jongwoo-ko-8b93051b4/;;;;;;seyoung-yun-395130ab/", "aff_unique_index": "0;0;1;0;2;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;KAIST;Korea Telecom", "aff_unique_dep": ";Graduate School of AI;", "aff_unique_url": "https://www.kaist.ac.kr;https://www.kaist.edu;http://www.kt.com", "aff_unique_abbr": "KAIST;KAIST;KT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "dwj886NUqy", "title": "GreedyCAS: Unsupervised Scientific Abstract Segmentation with Normalized Mutual Information", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The abstracts of scientific papers typically contain both premises (e.g., background and observations) and conclusions. Although conclusion sentences are highlighted in structured abstracts, in non-structured abstracts the concluding information is not explicitly marked, which makes the automatic segmentation of conclusions from scientific abstracts a challenging task. In this work, we explore Normalized Mutual Information (NMI) as a means for abstract segmentation. We consider each abstract as a recurrent cycle of sentences and place two segmentation boundaries by greedily optimizing the NMI score between the two segments, assuming that conclusions are strongly semantically linked with preceding premises. On non-structured abstracts, our proposed unsupervised approach GreedyCAS achieves the best performance across all evaluation metrics; on structured abstracts, GreedyCAS outperforms all baseline methods measured by $P_k$. The strong correlation of NMI to our evaluation metrics reveals the effectiveness of NMI for abstract segmentation.", "keywords": "Abstract Segmentation;Mutual Information;Argument Mining", "primary_area": "", "supplementary_material": "", "author": "Yingqiang Gao;Jessica Lam;Nianlong Gu;Richard Hahnloser", "authorids": "~Yingqiang_Gao1;~Jessica_Lam3;~Nianlong_Gu1;~Richard_Hahnloser1", "gender": ";F;;M", "homepage": ";https://www.ini.uzh.ch/en/institute/people.html;;", "dblp": ";272/3725;264/9438;", "google_scholar": ";https://scholar.google.com/citations?hl=en;q6blwLYAAAAJ;", "or_profile": "~Yingqiang_Gao1;~Jessica_Lam3;~Nianlong_Gu1;~Richard_Hahnloser1", "aff": ";Insititute of Neuroinformatics, University of Zurich and ETH Zurich, ETHZ - ETH Zurich;ETHZ - ETH Zurich;ETHZ - ETH Zurich", "aff_domain": ";ini.ethz.ch;ethz.ch;ethz.ch", "position": ";PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\ngao2023greedycas,\ntitle={Greedy{CAS}: Unsupervised Scientific Abstract Segmentation with Normalized Mutual Information},\nauthor={Yingqiang Gao and Jessica Lam and Nianlong Gu and Richard Hahnloser},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=dwj886NUqy}\n}", "github": "", "project": "", "reviewers": "Wkik;RqZY;m93t", "site": "https://openreview.net/forum?id=dwj886NUqy", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;3", "excitement": "4;3;3", "reproducibility": "4;3;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-4039-7773", "linkedin": ";;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "ETH Zurich", "aff_unique_dep": "Institute of Neuroinformatics", "aff_unique_url": "https://www.ethz.ch", "aff_unique_abbr": "ETHZ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "id": "dxCviFd7rj", "title": "Visual Elements Mining as Prompts for Instruction Learning for Target-Oriented Multimodal Sentiment Classification", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Target-oriented Multimodal Sentiment Classification (TMSC) aims to incorporate visual modality with text modality to identify the sentiment polarity towards a specific target within a sentence. To address this task, we propose a Visual Elements Mining as Prompts (VEMP) method, which describes the semantic information of visual elements with Text Symbols Embedded in the Image (TSEI), Target-aware Adjective-Noun Pairs (TANPs) and image scene caption, and then transform them into prompts for instruction learning of the model Tk-Instruct. In our VEMP, the text symbols embedded in the image may contain the textual descriptions of fine-grained visual elements, and are extracted as input TSEI; we extract adjective-noun pairs from the image and align them with the target to obtain TANPs, in which the adjectives provide emotional embellishments for the relevant target; finally, to effectively fuse these visual elements with text modality for sentiment prediction, we integrate them to construct instruction prompts for instruction-tuning Tk-Instruct which possesses powerful learning capabilities under instructions. Extensive experimental results show that our method achieves state-of-the-art performance on two benchmark datasets. And further analysis demonstrates the effectiveness of each component of our method.", "keywords": "Target-oriented multimodal sentiment classification;Instruction learning", "primary_area": "", "supplementary_material": "", "author": "Bin Yang;Jinlong Li", "authorids": "~Bin_Yang12;~Jinlong_Li1", "gender": ";M", "homepage": "https://github.com/long8181;", "dblp": ";", "google_scholar": ";", "or_profile": "~Bin_Yang12;~Jinlong_Li1", "aff": "University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn", "position": "MS student;Associate Professor", "bibtex": "@inproceedings{\nyang2023visual,\ntitle={Visual Elements Mining as Prompts for Instruction Learning for Target-Oriented Multimodal Sentiment Classification},\nauthor={Bin Yang and Jinlong Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=dxCviFd7rj}\n}", "github": "", "project": "", "reviewers": "HyRf;BJVD;fbv8", "site": "https://openreview.net/forum?id=dxCviFd7rj", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;3", "excitement": "3;3;3", "reproducibility": "3;3;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "e2B31gDhnj", "title": "Domain Private Transformers for Multi-Domain Dialog Systems", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Large, general purpose language models have demonstrated impressive performance across many different conversational domains. While multi-domain language models achieve low overall perplexity, their outputs are not guaranteed to stay within the domain of a given input prompt. This paper proposes \\emph{domain privacy} as a novel way to quantify how likely a conditional language model will leak across domains. We also develop policy functions based on token-level domain classification, and propose an efficient fine-tuning method to improve the trained model's domain privacy. Experiments on membership inference attacks show that our proposed method has comparable resiliency to methods adapted from recent literature on differentially private language models.", "keywords": "differential privacy;language models;dialogue;multi-domain language models", "primary_area": "", "supplementary_material": "", "author": "Anmol Kabra;Ethan R. Elenberg", "authorids": "~Anmol_Kabra1;~Ethan_R._Elenberg2", "gender": ";", "homepage": "https://anmolkabra.com/;http://eelenberg.github.io/", "dblp": ";150/5501", "google_scholar": "FH1DDk0AAAAJ;Kh-DC4IAAAAJ", "or_profile": "~Anmol_Kabra1;~Ethan_R_Elenberg1", "aff": "Toyota Technological Institute at Chicago;ASAPP", "aff_domain": "ttic.edu;asapp.com", "position": "MS student;Researcher", "bibtex": "@inproceedings{\nkabra2023domain,\ntitle={Domain Private Transformers for Multi-Domain Dialog Systems},\nauthor={Anmol Kabra and Ethan R. Elenberg},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=e2B31gDhnj}\n}", "github": "", "project": "", "reviewers": "tU8m;sL5m;nhsK", "site": "https://openreview.net/forum?id=e2B31gDhnj", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;3;4", "excitement": "3;3;4", "reproducibility": "3;3;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;1", "aff_unique_norm": "Toyota Technological Institute at Chicago;ASAPP", "aff_unique_dep": ";", "aff_unique_url": "https://www.tti-chicago.org;https://www.asapp.com", "aff_unique_abbr": "TTI Chicago;ASAPP", "aff_campus_unique_index": "0", "aff_campus_unique": "Chicago;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "e3eIqCPCT9", "title": "Learning to Rank Generation with Pairwise Partial Rewards", "track": "main", "status": "Long Main", "tldr": "", "abstract": "This paper studies the use of reinforcement learning for conditional text generation, which overcomes the limitation of the prevalent supervised maximum likelihood estimation approach. However, it still suffers from challenges including the large action space and the delayed reward, as the reward can be computed only after an entire sequence is generated. To address these challenges, we propose a method that provides partial rewards for intermediate actions taken on partial sequences. This enables the model to promptly prioritize actions that lead to the generation of more desirable sequences. Our method's key contribution lies in its focus on distinguishing relatively more desirable actions rather than striving to precisely estimate pointwise values for arbitrary partial sequences. Instead, our model learns to discern the relative desirability between pairs of actions, or rank actions in a pairwise manner, only when necessary and feasible. This is materialized in an efficient way by leveraging the prefix tree constructed from the sampled sequences. Experimental results on paraphrase generation and constrained machine translation tasks showcase the effectiveness of our method.", "keywords": "Reinforcement learning;learning to rank;reward shaping;conditional text generation", "primary_area": "", "supplementary_material": "", "author": "Youngwon Lee;Jinu Lee;seung-won hwang", "authorids": "~Youngwon_Lee1;~Jinu_Lee2;~seung-won_hwang2", "gender": ";M;", "homepage": ";https://jinulee-v.github.io;http://seungwonh.github.io", "dblp": ";229/2178;h/SeungwonHwang", "google_scholar": ";LtaXhxEAAAAJ;63bBmc3mYrAC", "or_profile": "~Youngwon_Lee1;~Jinu_Lee2;~seung-won_hwang2", "aff": ";Seoul National University, Seoul National University;Seoul National University", "aff_domain": ";cse.snu.ac.kr;snu.ac.kr", "position": ";Undergrad student;Full Professor", "bibtex": "@inproceedings{\nlee2023learning,\ntitle={Learning to Rank Generation with Pairwise Partial Rewards},\nauthor={Youngwon Lee and Jinu Lee and seung-won hwang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=e3eIqCPCT9}\n}", "github": "", "project": "", "reviewers": "WfAY;yfPs;4L43", "site": "https://openreview.net/forum?id=e3eIqCPCT9", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;4;4", "reproducibility": "4;5;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "0", "aff_campus_unique": "Seoul;", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "e3gXrvjGys", "title": "A Unified Framework for Synaesthesia Analysis", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Synaesthesia refers to the description of perceptions in one sensory modality through concepts from other modalities. It involves not only a linguistic phenomenon, but also a cognitive phenomenon structuring human thought and action, which makes understanding it challenging. As a means of cognition, synaesthesia is rendered by more than sensory modalities, cue and stimulus can also play an important role in expressing and understanding it. In addition, understanding synaesthesia involves many cognitive efforts, such as identifying the semantic relationship between sensory words and modalities. Therefore, we propose a unified framework focusing on annotating all kinds of synaesthetic elements and fully exploring the relationship among them. In particular, we introduce a new annotation scheme, including sensory modalities as well as their cues and stimuli, which facilitate understanding synaesthetic information collectively. We further design a structure generation model to capture the relations among synaesthetic elements and generate them jointly. Through extensive experiments, the importance of proposed dataset can be verified by the statistics and progressive performances. In addition, our proposed model yields state-of-the-art results, demonstrating its effectiveness.", "keywords": "Synaesthesia Analysis;Generation Model;Lingusitic", "primary_area": "", "supplementary_material": "", "author": "Kun Sheng;Zhongqing Wang;Qingqing Zhao;Xiaotong Jiang;Guodong Zhou", "authorids": "~Kun_Sheng1;~Zhongqing_Wang1;~Qingqing_Zhao3;~Xiaotong_Jiang1;~Guodong_Zhou1", "gender": "M;M;F;F;M", "homepage": "https://github.com/ADcountdown;http://nlp.suda.edu.cn/~wangzq;https://www.researchgate.net/profile/Qingqing-Zhao-2;https://github.com/kk1tsch;http://nlp.suda.edu.cn/~gdzhou/", "dblp": ";20/9924;;;", "google_scholar": ";;;;", "or_profile": "~Kun_Sheng1;~Zhongqing_Wang1;~Qingqing_Zhao3;~Xiaotong_Jiang1;~Guodong_Zhou1", "aff": "Suzhou University;Soochow University, China;Chinese Academy of Social Sciences;Soochow Univ;Soochow University, China", "aff_domain": "suda.edu.cn;suda.edu.cn;cass.org.cn;suda.edu.cn;suda.edu.cn", "position": "MS student;Associate Professor;Associate Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nsheng2023a,\ntitle={A Unified Framework for Synaesthesia Analysis},\nauthor={Kun Sheng and Zhongqing Wang and Qingqing Zhao and Xiaotong Jiang and Guodong Zhou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=e3gXrvjGys}\n}", "github": "", "project": "", "reviewers": "oMzF;q3wD;UVPL", "site": "https://openreview.net/forum?id=e3gXrvjGys", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;4", "excitement": "3;4;4", "reproducibility": "2;2;3", "correctness": "2;3;4", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;;", "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "Suzhou University;Soochow University;Chinese Academy of Social Sciences", "aff_unique_dep": ";;", "aff_unique_url": "https://www.suda.edu.cn;https://www.soochow.edu.cn;http://www.cass.cn", "aff_unique_abbr": "Suda;Soochow U;CASS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "e4dXIBRQ9u", "title": "Grammatical Error Correction via Mixed-Grained Weighted Training", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The task of Grammatical Error Correction (GEC) aims to automatically correct grammatical errors in natural texts.\nAlmost all previous works treat annotated training data equally, but inherent discrepancies in data are neglected. \nIn this paper, the inherent discrepancies are manifested in two aspects, namely, accuracy of data annotation and diversity of potential annotations. \nTo this end, we propose MainGEC, which designs token-level and sentence-level training weights based on inherent discrepancies therein, and then conducts mixed-grained weighted training to improve the training effect for GEC.\nEmpirical evaluation shows that whether in the Seq2Seq or Seq2Edit manner, MainGEC achieves consistent and significant performance improvements on two benchmark datasets, demonstrating the effectiveness and superiority of the mixed-grained weighted training. \nFurther ablation experiments verify the effectiveness of designed weights for both granularities in MainGEC.", "keywords": "Grammatical Error Correction;Weighted Training", "primary_area": "", "supplementary_material": "", "author": "Jiahao Li;Quan Wang;Chiwei Zhu;Zhendong Mao;Yongdong Zhang", "authorids": "~Jiahao_Li4;~Quan_Wang7;~Chiwei_Zhu1;~Zhendong_Mao1;~Yongdong_Zhang2", "gender": "M;F;M;;M", "homepage": ";;;;https://imcc.ustc.edu.cn/_upload/tpl/0d/13/3347/template3347/zhangyongdong.html", "dblp": ";;361/7071;;z/YongdongZhang", "google_scholar": "https://scholar.google.com.hk/citations?user=TvedIHgAAAAJ;l2yEbhAAAAAJ;xMAxveAAAAAJ;;https://scholar.google.com.hk/citations?user=hxGs4ukAAAAJ", "or_profile": "~Jiahao_Li4;~Quan_Wang7;~Chiwei_Zhu1;~Zhendong_Mao1;~Yongdong_Zhang2", "aff": "University of Science and Technology of China;Beijing University of Posts and Telecommunications;University of Science and Technology of China;;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;bupt.edu.cn;ustc.edu.cn;;ustc.edu.cn", "position": "PhD student;Associate Professor;PhD student;;Full Professor", "bibtex": "@inproceedings{\nli2023grammatical,\ntitle={Grammatical Error Correction via Mixed-Grained Weighted Training},\nauthor={Jiahao Li and Quan Wang and Chiwei Zhu and Zhendong Mao and Yongdong Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=e4dXIBRQ9u}\n}", "github": "", "project": "", "reviewers": "uf6d;45hm;6raQ", "site": "https://openreview.net/forum?id=e4dXIBRQ9u", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;5", "excitement": "3;3;3", "reproducibility": "4;4;5", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0003-0066-3448", "linkedin": ";;;;", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Science and Technology of China;Beijing University of Posts and Telecommunications", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;http://www.bupt.edu.cn/", "aff_unique_abbr": "USTC;BUPT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "e4m1Gu6rVP", "title": "Distilling ChatGPT for Explainable Automated Student Answer Assessment", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Providing explainable and faithful feedback is crucial for automated student answer assessment. In this paper, we introduce a novel framework that explores using ChatGPT, a cutting-edge large language model, for the concurrent tasks of student answer scoring and rationale generation. We identify the appropriate instructions by prompting ChatGPT with different templates to collect the rationales, where inconsistent rationales are refined to align with marking standards. The refined ChatGPT outputs enable us to fine-tune a smaller language model that simultaneously assesses student answers and provides rationales. Extensive experiments on the benchmark dataset show that the proposed method improves the overall QWK score by 11% compared to ChatGPT. Furthermore, our thorough analysis and human evaluation demonstrate that the rationales generated by our proposed method are comparable to those of ChatGPT. Our approach provides a viable solution to achieve explainable automated assessment in education", "keywords": "Student answer assessment;Rationale generation;Large language model", "primary_area": "", "supplementary_material": "", "author": "Jiazheng Li;Lin Gui;Yuxiang Zhou;David West;Cesare Aloisi;Yulan He", "authorids": "~Jiazheng_Li4;~Lin_Gui3;~Yuxiang_Zhou3;~David_West2;~Cesare_Aloisi1;~Yulan_He1", "gender": "M;M;;;F;M", "homepage": ";https://zyxnlp.github.io/;http://thehub.aqa.org.uk/About/our-business/research-and-innovation/about-research-and-regulation/research-and-development;https://www.aqa.org.uk;https://www.kcl.ac.uk/people/yulan-he;https://jiazhengli.com/", "dblp": "34/8605-3;203/4838.html;;348/0007;75/5430;155/6074-2", "google_scholar": "https://scholar.google.com.ph/citations?user=1b3Eyx4AAAAJ;https://scholar.google.com/citations?hl=en;;;https://scholar.google.co.uk/citations?user=SP9r32UAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Lin_Gui3;~Yuxiang_Zhou3;~David_West2;~Cesare_Aloisi1;~Yulan_He1;~Jiazheng_Li2", "aff": "King's College London, University of London;King's College London;AQA;AQA ;King's College London, University of London;King's College London, University of London", "aff_domain": "kcl.ac.uk;kcl.ac.uk;aqa.org.uk;aqa.org.uk;kcl.ac.uk;kcl.ac.uk", "position": "Lecturer;Postdoc;Researcher;Associate Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nli2023distilling,\ntitle={Distilling Chat{GPT} for Explainable Automated Student Answer Assessment},\nauthor={Jiazheng Li and Lin Gui and Yuxiang Zhou and David West and Cesare Aloisi and Yulan He},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=e4m1Gu6rVP}\n}", "github": "", "project": "", "reviewers": "FgJs;PxdN;JJVy", "site": "https://openreview.net/forum?id=e4m1Gu6rVP", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;3;4", "excitement": "3;3;3", "reproducibility": "3;4;4", "correctness": "4;4;2", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0004-3720-9083;;;0000-0003-3948-5845;", "linkedin": ";;;;yulan-he-277234a/?originalSubdomain=uk;", "aff_unique_index": "0;0;1;1;0;0", "aff_unique_norm": "King's College London;AQA", "aff_unique_dep": ";", "aff_unique_url": "https://www.kcl.ac.uk;https://www.aqa.org.uk", "aff_unique_abbr": "KCL;AQA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "e5UzmaR8EE", "title": "Towards Interpretable Mental Health Analysis with Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The latest large language models (LLMs) such as ChatGPT, exhibit strong capabilities in automated mental health analysis.\nHowever, existing relevant studies bear several limitations, including inadequate evaluations, lack of prompting strategies, and ignorance of exploring LLMs for explainability.\nTo bridge these gaps, we comprehensively evaluate the mental health analysis and emotional reasoning ability of LLMs on 11 datasets across 5 tasks. We explore the effects of different prompting strategies with unsupervised and distantly supervised emotional information. \nBased on these prompts, we explore LLMs for interpretable mental health analysis by instructing them to generate explanations for each of their decisions. \nWe convey strict human evaluations to assess the quality of the generated explanations, leading to a novel dataset with 163 human-assessed explanations.\nWe benchmark existing automatic evaluation metrics on this dataset to guide future related works. \nAccording to the results, ChatGPT shows strong in-context learning ability but still has a significant gap with advanced task-specific methods. \nCareful prompt engineering with emotional cues and expert-written few-shot examples can also effectively improve performance on mental health analysis. \nIn addition, ChatGPT generates explanations that approach human performance, showing its great potential in explainable mental health analysis.", "keywords": "mental health analysis;large language models;prompt engineering;explainability", "primary_area": "", "supplementary_material": "", "author": "Kailai Yang;Shaoxiong Ji;Tianlin Zhang;Qianqian Xie;Ziyan Kuang;Sophia Ananiadou", "authorids": "~Kailai_Yang1;~Shaoxiong_Ji1;~Tianlin_Zhang1;~Qianqian_Xie1;~Ziyan_Kuang1;~Sophia_Ananiadou1", "gender": "M;;M;F;F;F", "homepage": "https://stevekgyang.github.io/;;http://www.zhangtianlin.top/;;https://www.linkedin.com/in/%E7%B4%AB%E5%AB%A3-%E5%86%B5-23800027b?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3BQIHhPDWSTrWFRmFJa1cOJg%3D%3D;http://www.manchester.ac.uk/research/Sophia.ananiadou/", "dblp": "277/3317;227/0291;;;;47/4142", "google_scholar": "df4H1aQAAAAJ;;Yy88kOoAAAAJ;UYW7X_0AAAAJ;;https://scholar.google.com.tw/citations?user=quhi-K0AAAAJ", "or_profile": "~Kailai_Yang1;~Shaoxiong_Ji1;~Tianlin_Zhang1;~Qianqian_Xie1;~Ziyan_Kuang1;~Sophia_Ananiadou1", "aff": "University of Manchester;Aalto University;University of Manchester;Yale University;Jiangxi Normal University;University of Manchester", "aff_domain": "cs.manchester.ac.uk;aalto.fi;manchester.ac.uk;yale.edu;jxnu.edu.cn;manchester.ac.uk", "position": "PhD student;PhD student;PhD student;Postdoc;MS student;Full Professor", "bibtex": "@inproceedings{\nyang2023towards,\ntitle={Towards Interpretable Mental Health Analysis with Large Language Models},\nauthor={Kailai Yang and Shaoxiong Ji and Tianlin Zhang and Qianqian Xie and Ziyan Kuang and Sophia Ananiadou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=e5UzmaR8EE}\n}", "github": "", "project": "", "reviewers": "mk6h;PbQ7;XJA8;NbTf", "site": "https://openreview.net/forum?id=e5UzmaR8EE", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "5;3;5;4", "excitement": "3;3;5;4", "reproducibility": "4;3;4;3", "correctness": "4;3;5;3", "rating_avg": 4.0, "confidence_avg": 4.25, "excitement_avg": 3.75, "reproducibility_avg": 3.5, "correctness_avg": 3.75, "replies_avg": 13, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-3142-2516;;0000-0003-0843-1916;0000-0002-9588-7454;;0000-0002-4097-9191", "linkedin": ";;;;;sophia-ananiadou-ba98b63/", "aff_unique_index": "0;1;0;2;3;0", "aff_unique_norm": "University of Manchester;Aalto University;Yale University;Jiangxi Normal University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.manchester.ac.uk;https://www.aalto.fi;https://www.yale.edu;http://www.jxnu.edu.cn", "aff_unique_abbr": "UoM;Aalto;Yale;JXNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2;3;0", "aff_country_unique": "United Kingdom;Finland;United States;China" }, { "id": "e8jvAr4Aaj", "title": "Enhancing Conversational Search: Large Language Model-Aided Informative Query Rewriting", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Query rewriting plays a vital role in enhancing conversational search by transforming context-dependent user queries into standalone forms. Existing approaches primarily leverage human-rewritten queries as labels to train query rewriting models. However, human rewrites may lack sufficient information for optimal retrieval performance. To overcome this limitation, we propose utilizing large language models (LLMs) as query rewriters, enabling the generation of informative query rewrites through well-designed instructions. We define four essential properties for well-formed rewrites and incorporate all of them into the instruction. In addition, we introduce the role of rewrite editors for LLMs when initial query rewrites are available, forming a ``rewrite-then-edit'' process. Furthermore, we propose distilling the rewriting capabilities of LLMs into smaller models to reduce rewriting latency. Our experimental evaluation on the QReCC dataset demonstrates that informative query rewrites can yield substantially improved retrieval performance compared to human rewrites, especially with sparse retrievers.", "keywords": "conversational search;conversational passage retrieval;query rewriting;query reformulation", "primary_area": "", "supplementary_material": "", "author": "Fanghua Ye;Meng Fang;Shenghui Li;Emine Yilmaz", "authorids": "~Fanghua_Ye1;~Meng_Fang1;~Shenghui_Li1;~Emine_Yilmaz1", "gender": "M;M;M;F", "homepage": "https://www.fanghuaye.xyz/;;https://lishenghui.github.io/;https://sites.google.com/site/emineyilmaz/", "dblp": "203/0957;67/463;;36/3270", "google_scholar": "UXN7iUsAAAAJ;IcNYP1oAAAAJ;I1AiTyUAAAAJ;https://scholar.google.com.tw/citations?user=ocmAN4YAAAAJ", "or_profile": "~Fanghua_Ye1;~Meng_Fang1;~Shenghui_Li1;~Emine_Yilmaz1", "aff": "University College London;Eindhoven University of Technology;Uppsala University;Department of Computer Science, University College London", "aff_domain": "ucl.ac.uk;tue.nl;uu.se;cs.ucl.ac.uk", "position": "PhD student;Assistant Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nye2023enhancing,\ntitle={Enhancing Conversational Search: Large Language Model-Aided Informative Query Rewriting},\nauthor={Fanghua Ye and Meng Fang and Shenghui Li and Emine Yilmaz},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=e8jvAr4Aaj}\n}", "github": "", "project": "", "reviewers": "ve5t;uAia;H9aq", "site": "https://openreview.net/forum?id=e8jvAr4Aaj", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;5", "excitement": "3;3;3", "reproducibility": "4;3;5", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "fanghua-ye-81084587/;;;", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University College London;Eindhoven University of Technology;Uppsala University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ucl.ac.uk;https://www.tue.nl;https://www.uu.se", "aff_unique_abbr": "UCL;TU/e;UU", "aff_campus_unique_index": "1", "aff_campus_unique": ";London", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "United Kingdom;Netherlands;Sweden" }, { "id": "e8wYLib8HC", "title": "Transformer Working Memory Enables Regular Language Reasoning And Natural Language Length Extrapolation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Unlike recurrent models, conventional wisdom has it that Transformers cannot perfectly model regular languages.\nInspired by the notion of working memory, we propose a new Transformer variant named RegularGPT. With its novel combination of Weight-Sharing, Adaptive-Depth, and Sliding-Dilated-Attention, RegularGPT constructs working memory along the depth dimension, thereby enabling efficient and successful modeling of regular languages such as PARITY.\nWe further test RegularGPT on the task of natural language length extrapolation and surprisingly find that it rediscovers the local windowed attention effect deemed necessary in prior work for length extrapolation.", "keywords": "Transformer;Algorithmic reasoning;Length extrapolation;Working memory", "primary_area": "", "supplementary_material": "", "author": "Ta-Chung Chi;Ting-Han Fan;Alexander Rudnicky;Peter Ramadge", "authorids": "~Ta-Chung_Chi1;~Ting-Han_Fan1;~Alexander_Rudnicky1;~Peter_Ramadge1", "gender": ";M;M;M", "homepage": ";;http://www.cs.cmu.edu/~air/;http://ee.princeton.edu/people/faculty/peter-j-ramadge", "dblp": "207/7824;213/0948;29/5401;77/3256", "google_scholar": "https://scholar.google.com.tw/citations?user=ZqpdQOoAAAAJ;1mQ3kTEAAAAJ;axOnEnQAAAAJ;BOMboVoAAAAJ", "or_profile": "~Ta-Chung_Chi1;~Ting-Han_Fan1;~Alexander_Rudnicky1;~Peter_Ramadge1", "aff": "Carnegie Mellon University;Princeton University;Carnegie Mellon University;Princeton University", "aff_domain": "cmu.edu;princeton.edu;cmu.edu;princeton.edu", "position": "PhD student;PhD student;Emeritus;Full Professor", "bibtex": "@inproceedings{\nchi2023transformer,\ntitle={Transformer Working Memory Enables Regular Language Reasoning And Natural Language Length Extrapolation},\nauthor={Ta-Chung Chi and Ting-Han Fan and Alexander Rudnicky and Peter Ramadge},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=e8wYLib8HC}\n}", "github": "", "project": "", "reviewers": "AFjC;cBxx;fhAH;wKf5", "site": "https://openreview.net/forum?id=e8wYLib8HC", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;4;4;2", "excitement": "3;3;4;2", "reproducibility": "4;4;4;3", "correctness": "4;4;3;2", "rating_avg": 4.0, "confidence_avg": 3.25, "excitement_avg": 3.0, "reproducibility_avg": 3.75, "correctness_avg": 3.25, "replies_avg": 13, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-3896-9397;", "linkedin": ";;arudnicky;", "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Carnegie Mellon University;Princeton University", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://www.princeton.edu", "aff_unique_abbr": "CMU;Princeton", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "eBUgomB8uo", "title": "Towards Multilingual Interlinear Morphological Glossing", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Interlinear Morphological Glosses are annotations produced in the context of language documentation. Their goal is to identify morphs occurring in an L1 sentence and to explicit their function and meaning, with the further support of an associated translation in L2. We study here the task of automatic glossing, aiming to provide linguists with adequate tools to facilitate this process. Our formalisation of glossing uses a latent variable Conditional Random Field (CRF), which labels the L1 morphs while simultaneously aligning them to L2 words. In experiments with several under-resourced languages, we show that this approach is both effective and data-efficient and mitigates the problem of annotating unknown morphs. We also discuss various design choices regarding the alignment process and the selection of features. We finally demonstrate that it can benefit from multilingual (pre-)training, achieving results which outperform very strong baselines.", "keywords": "Computational Language Documentation; Interlinear Morphological Glosses; Structured Prediction;", "primary_area": "", "supplementary_material": "", "author": "Shu Okabe;Fran\u00e7ois Yvon", "authorids": "~Shu_Okabe1;~Fran\u00e7ois_Yvon2", "gender": ";M", "homepage": ";http://cv.archives-ouvertes.fr/francois-yvon", "dblp": "268/0912;05/2701.html", "google_scholar": ";https://scholar.google.fr/citations?hl=fr", "or_profile": "~Shu_Okabe1;~Fran\u00e7ois_Yvon2", "aff": "Universit\u00e9 Paris Saclay;LISN-CNRS / Universit\u00e9 Paris Saclay", "aff_domain": "universite-paris-saclay.fr;lisn.fr", "position": "PhD student;Senior Researcher", "bibtex": "@inproceedings{\nokabe2023towards,\ntitle={Towards Multilingual Interlinear Morphological Glossing},\nauthor={Shu Okabe and Fran{\\c{c}}ois Yvon},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=eBUgomB8uo}\n}", "github": "", "project": "", "reviewers": "v4Qx;fF76;H7gr", "site": "https://openreview.net/forum?id=eBUgomB8uo", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "3;4;3", "reproducibility": "3;2;5", "correctness": "2;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0003-0169-3689;0000-0002-7972-7442", "linkedin": "shu-okabe;", "aff_unique_index": "0;0", "aff_unique_norm": "Universit\u00e9 Paris Saclay", "aff_unique_dep": "", "aff_unique_url": "https://www.universite-paris-saclay.fr", "aff_unique_abbr": "UPSaclay", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "id": "eCXfUq3RDf", "title": "Miracle: Towards Personalized Dialogue Generation with Latent-Space Multiple Personal Attribute Control", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Personalized dialogue systems aim to endow the chatbot agent with more anthropomorphic traits for human-like interactions.\nPrevious approaches have explored explicitly user profile modeling using text descriptions, \nimplicit derivation of user embeddings, or utilizing handicraft prompts for ChatGPT-like models. \nHowever, textual personas are limited in describing multi-faceted attributes (\\emph{e.g.}, \\emph{language style, inner character nuances}), implicit embedding suffers from personality sparsity, \nand handicraft prompts lack fine-grained and stable controllability. \nHence, these approaches may struggle with complex personalized dialogue generation tasks that require generating controllable responses with multiple personal attributes.\nTo this end, we propose \\textbf{\\textsc{Miracle}}, a novel personalized dialogue generation method through \\textbf{M}ult\\textbf{I}ple Pe\\textbf{R}sonal \\textbf{A}ttributes \\textbf{C}ontrol within \\textbf{L}atent-Space \\textbf{E}nergy-based Models.\nttributes \\textbf{C}ontrol within \\textbf{L}atent-Space \\textbf{E}nergy-based Models.\nSpecifically, our approach first disentangles complex personality into multi-faceted attributes. \nSubsequently, we employ a conditional variational auto-encoder to align with the dense personalized responses within a latent joint attribute space.\nWe have also tailored a dedicated energy function and customized the ordinary differential equations sampling method to offer flexible attribute composition and precise attribute control.\nExtensive experiments demonstrate that \\textsc{Miracle} outperforms several strong baselines in terms of personality controllability and response generation quality.\nOur dataset and code are available at \\url{https://github.com/LZY-the-boys/MIRACLE}", "keywords": "personalized response generaion;dialogue;Natural language generation", "primary_area": "", "supplementary_material": "", "author": "Zhenyi Lu;Wei Wei;Xiaoye Qu;Xian-Ling Mao;Dangyang Chen;Jixiong Chen", "authorids": "~Zhenyi_Lu2;~Wei_Wei14;~Xiaoye_Qu1;~Xian-Ling_Mao1;~Dangyang_Chen1;~Jixiong_Chen1", "gender": "M;M;M;M;M;M", "homepage": "https://orcid.org/0009-0002-8381-3236;https://www.eric-weiwei.com;;https://cs.bit.edu.cn/szdw/jsml/js/mxl/index.htm;;", "dblp": "307/6180;24/4105-2;229/8206;46/9687.html;327/3353;", "google_scholar": ";https://scholar.google.com.sg/citations?hl=en;rT3hqdcAAAAJ;b2DzFF8AAAAJ;;yLINOlUAAAAJ", "or_profile": "~Zhenyi_Lu2;~Wei_Wei14;~Xiaoye_Qu1;~Xian-Ling_Mao1;~Dangyang_Chen1;~Jixiong_Chen1", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology;Shanghai Artificial Intelligence Laboratory;Beijing Institute of Technology;Pingan Technology;", "aff_domain": "hust.edu.cn;hust.edu.cn;pjlab.org.cn;bit.edu.cn;pingan.com.cn;", "position": "MS student;Full Professor;Researcher;Associate Professor;CTO;", "bibtex": "@inproceedings{\nlu2023miracle,\ntitle={Miracle: Towards Personalized Dialogue Generation with Latent-Space Multiple Personal Attribute Control},\nauthor={Zhenyi Lu and Wei Wei and Xiaoye Qu and Xian-Ling Mao and Dangyang Chen and Jixiong Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=eCXfUq3RDf}\n}", "github": "", "project": "", "reviewers": "trw2;j4rF;9UGD;jJFK", "site": "https://openreview.net/forum?id=eCXfUq3RDf", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;4;4", "excitement": "3;3;4;3", "reproducibility": "3;2;4;4", "correctness": "2;4;3;2", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.25, "reproducibility_avg": 3.25, "correctness_avg": 2.75, "replies_avg": 15, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-4488-0102;;;;0000-0002-5516-9853", "linkedin": ";;%E6%99%93%E6%99%94-xiaoye-qu-%E7%9E%BF-8b9a0a133/;;;", "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "Huazhong University of Science and Technology;Shanghai Artificial Intelligence Laboratory;Beijing Institute of Technology;PingAn Technology", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.hust.edu.cn;http://www.shailab.org/;http://www.bit.edu.cn/;https://www.pingan.com", "aff_unique_abbr": "HUST;Shanghai AI Lab;BIT;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "eEV5S2EIp9", "title": "Transfer-Free Data-Efficient Multilingual Slot Labeling", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Slot labeling (SL) is a core component of task-oriented dialogue (TOD) systems, where slots and corresponding values are usually language-, task- and domain-specific. Therefore, extending the system to any new language-domain-task configuration requires (re)running an expensive and resource-intensive data annotation process. To mitigate the inherent data scarcity issue, current research on multilingual ToD assumes that sufficient English-language annotated data are always available for particular tasks and domains, and thus operates in a standard cross-lingual transfer setup. In this work, we depart from this often unrealistic assumption. We examine challenging scenarios where such transfer-enabling English annotated data cannot be guaranteed, and focus on bootstrapping multilingual data-efficient slot labelers in transfer-free scenarios directly in the target languages without any English-ready data. We propose a two-stage slot labeling approach (termed TWOSL) which transforms standard multilingual sentence encoders into effective slot labelers. In Stage 1, relying on SL-adapted contrastive learning with only a handful of SL-annotated examples, we turn sentence encoders into task-specific span encoders. In Stage 2, we recast SL from a token classification into a simpler, less data-intensive span classification task.\nOur results on two standard multilingual TOD datasets and across diverse languages confirm the effectiveness and robustness of TWOSL. It is especially effective for the most challenging transfer-free few-shot setups, paving the way for quick and data-efficient bootstrapping of multilingual slot labelers for TOD.", "keywords": "Dialogue NLU;Multilingual;Slot Labeling;Data Efficient", "primary_area": "", "supplementary_material": "", "author": "Evgeniia Razumovskaia;Ivan Vuli\u0107;Anna Korhonen", "authorids": "~Evgeniia_Razumovskaia1;~Ivan_Vuli\u01071;~Anna_Korhonen1", "gender": "F;M;", "homepage": "https://evgeniiaraz.github.io/;https://sites.google.com/site/ivanvulic/;https://sites.google.com/site/annakorhonen/", "dblp": "234/7680;77/9768;14/6532", "google_scholar": "grFuVx0AAAAJ;ZX8js60AAAAJ;https://scholar.google.co.uk/citations?user=SCoVoOYAAAAJ", "or_profile": "~Evgeniia_Razumovskaia1;~Ivan_Vuli\u01071;~Anna_Korhonen1", "aff": "University of Cambridge;PolyAI Limited;University of Cambridge", "aff_domain": "cam.ac.uk;poly-ai.com;cam.ac.uk", "position": "PhD student;Senior Scientist;Professor", "bibtex": "@inproceedings{\nrazumovskaia2023transferfree,\ntitle={Transfer-Free Data-Efficient Multilingual Slot Labeling},\nauthor={Evgeniia Razumovskaia and Ivan Vuli{\\'c} and Anna Korhonen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=eEV5S2EIp9}\n}", "github": "", "project": "", "reviewers": "N33Q;7N7r;qbcu", "site": "https://openreview.net/forum?id=eEV5S2EIp9", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;5;4", "excitement": "4;4;4", "reproducibility": "4;5;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";ivan-vuli%C4%87-286b4a81/;anna-korhonen-534a9b5/", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Cambridge;PolyAI Limited", "aff_unique_dep": ";", "aff_unique_url": "https://www.cam.ac.uk;https://www.poly.ai", "aff_unique_abbr": "Cambridge;PolyAI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Cambridge;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "eFnBXtZXIH", "title": "When Reviewers Lock Horns: Finding Disagreements in Scientific Peer Reviews", "track": "main", "status": "Short Main", "tldr": "", "abstract": "To this date, the efficacy of the scientific publishing enterprise fundamentally rests on the strength of the peer review process. The journal editor or the conference chair primarily relies on the expert reviewers' assessment, $\\textit{identify points of agreement and disagreement}$ and try to reach a consensus to make a fair and informed decision on whether to accept or reject a paper. However, with the escalating number of submissions requiring review, especially in top-tier Artificial Intelligence (AI) conferences, the editor/chair, among many other works, invests a significant, sometimes stressful effort to mitigate reviewer disagreements. Here in this work, we introduce a novel task of automatically identifying contradictions among reviewers on a given article. To this end, we introduce $\\textit{ContraSciView}$, a comprehensive review-pair contradiction dataset on around 8.5k papers (with around 28k review pairs containing nearly 50k review pair comments) from the open review-based ICLR and NeurIPS conferences. We further propose a baseline model that detects contradictory statements from the review pairs. To the best of our knowledge, we make the first attempt to identify disagreements among peer reviewers automatically. We make our dataset and code public for further investigations.", "keywords": "Contradiction Detection;Peer Reviews;NLP", "primary_area": "", "supplementary_material": "", "author": "Sandeep Kumar;Tirthankar Ghosal;Asif Ekbal", "authorids": "~Sandeep_Kumar9;~Tirthankar_Ghosal1;~Asif_Ekbal1", "gender": "M;M;M", "homepage": "https://www.linkedin.com/in/sandeep-kumar-a51329197;https://elitr.eu/tirthankar-ghosal/;https://ekbalasif.github.io", "dblp": ";;11/3590", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.co.in/citations?user=IAL_F04AAAAJ", "or_profile": "~Sandeep_Kumar9;~Tirthankar_Ghosal1;~Asif_Ekbal1", "aff": "Indian Institute of Technology Patna;Oak Ridge National Laboratory;Indian Institute of Technology, Patna", "aff_domain": "iitp.ac.in;ornl.gov;iitp.ac.in", "position": "PhD student;Scientist;Associate Professor", "bibtex": "@inproceedings{\nkumar2023when,\ntitle={When Reviewers Lock Horns: Finding Disagreements in Scientific Peer Reviews},\nauthor={Sandeep Kumar and Tirthankar Ghosal and Asif Ekbal},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=eFnBXtZXIH}\n}", "github": "", "project": "", "reviewers": "B2WG;WLX5;H8L1;z23x", "site": "https://openreview.net/forum?id=eFnBXtZXIH", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;4;4;4", "excitement": "4;3;4;2", "reproducibility": "4;3;3;4", "correctness": "4;2;4;2", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.25, "reproducibility_avg": 3.5, "correctness_avg": 3.0, "replies_avg": 13, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-3612-8834", "linkedin": "sandeep-kumar-a51329197/;;asif-ekbal-3b8a4517/?originalSubdomain=in", "aff_unique_index": "0;1;0", "aff_unique_norm": "Indian Institute of Technology Patna;Oak Ridge National Laboratory", "aff_unique_dep": ";", "aff_unique_url": "https://www.iitp.ac.in;https://www.ornl.gov", "aff_unique_abbr": "IIT Patna;ORNL", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Patna;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "India;United States" }, { "id": "eGNwWBfqqs", "title": "CCSRD: Content-Centric Speech Representation Disentanglement Learning for End-to-End Speech Translation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Deep neural networks have demonstrated their capacity in extracting features from speech inputs. However, these features may include non-linguistic speech factors such as timbre and speaker identity, which are not directly related to translation. In this paper, we propose a content-centric speech representation disentanglement learning framework for speech translation, CCSRD, which decomposes speech representations into content representations and non-linguistic representations via representation disentanglement learning. CCSRD consists of a content encoder that encodes linguistic content information from the speech input, a non-content encoder that models non-linguistic speech features, and a disentanglement module that learns disentangled representations with a cyclic reconstructor, feature reconstructor and speaker classifier trained in a multi-task learning way. Experiments on the MuST-C benchmark dataset demonstrate that CCSRD achieves an average improvement of +0.9 BLEU in two settings across five translation directions over the baseline, outperforming state-of-the-art end-to-end speech translation models and cascaded models.", "keywords": "speech translation;representation disentanglement", "primary_area": "", "supplementary_material": "", "author": "Xiaohu Zhao;Haoran Sun;Yikun Lei;shaolin Zhu;Deyi Xiong", "authorids": "~Xiaohu_Zhao1;~Haoran_Sun7;~Yikun_Lei1;~shaolin_Zhu1;~Deyi_Xiong2", "gender": "M;;M;M;M", "homepage": ";;https://tjunlp-lab.github.io/members/students/;https://zsl-nlp.github.io/;https://dyxiong.github.io", "dblp": ";;293/8759;206/8937;55/6548", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;mxpXRBYAAAAJ;https://scholar.google.com/citations?hl=zh-CN;QPLO3myO5PkC", "or_profile": "~Xiaohu_Zhao1;~Haoran_Sun7;~Yikun_Lei1;~shaolin_Zhu1;~Deyi_Xiong2", "aff": "Tianjin University;Tianjin University;Tianjin University;Tianjin University;Tianjin University", "aff_domain": "tju.edu.cn;tju.edu.cn;tju.edu;tju.edu.cn;tju.edu.cn", "position": "MS student;MS student;MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhao2023ccsrd,\ntitle={{CCSRD}: Content-Centric Speech Representation Disentanglement Learning for End-to-End Speech Translation},\nauthor={Xiaohu Zhao and Haoran Sun and Yikun Lei and shaolin Zhu and Deyi Xiong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=eGNwWBfqqs}\n}", "github": "", "project": "", "reviewers": "fmsg;Gjy4;GTrW", "site": "https://openreview.net/forum?id=eGNwWBfqqs", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;3", "excitement": "4;3;3", "reproducibility": "3;4;2", "correctness": "3;3;2", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-2353-5038", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Tianjin University", "aff_unique_dep": "", "aff_unique_url": "http://www.tju.edu.cn", "aff_unique_abbr": "TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "eHqrdft1wn", "title": "Aligning Language Models to User Opinions", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "An important aspect of developing LLMs that interact with humans is to align models' behavior to their users. It is possible to prompt an LLM into behaving as a certain persona, especially a user group or ideological persona the model captured during its pertaining stage. But, how to best align an LLM with a specific user and not a demographic or ideological group remains an open question. Mining public opinion surveys (by PEW research), we find that the opinions of a user and their demographics and ideologies are not mutual predictors. We use this insight to align LLMs by modeling relevant past user opinions in addition to user demographics and ideology, achieving up to 7 points accuracy gains in predicting public opinions from survey questions across a broad set of topics. Our work opens up the research avenues to bring user opinions as an important ingredient in aligning language models.", "keywords": "personalization;large language model", "primary_area": "", "supplementary_material": "", "author": "EunJeong Hwang;Bodhisattwa Prasad Majumder;Niket Tandon", "authorids": "~EunJeong_Hwang1;~Bodhisattwa_Prasad_Majumder1;~Niket_Tandon2", "gender": "F;;M", "homepage": "https://eujhwang.github.io/;https://www.majumderb.com/;https://niket.tandon.info", "dblp": ";138/6177;29/9923", "google_scholar": "Z0TA4NEAAAAJ;cEM1a5gAAAAJ;9uWuZkUAAAAJ", "or_profile": "~EunJeong_Hwang1;~Bodhisattwa_Prasad_Majumder1;~Niket_Tandon2", "aff": "University of British Columbia;University of California, San Diego;Allen Institute for Artificial Intelligence", "aff_domain": "cs.ubc.ca;ucsd.edu;allenai.org", "position": "PhD student;PhD student;Researcher", "bibtex": "@inproceedings{\nhwang2023aligning,\ntitle={Aligning Language Models to User Opinions},\nauthor={EunJeong Hwang and Bodhisattwa Prasad Majumder and Niket Tandon},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=eHqrdft1wn}\n}", "github": "", "project": "", "reviewers": "cFeH;Qyo6;iqqj", "site": "https://openreview.net/forum?id=eHqrdft1wn", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "2;2;4", "reproducibility": "5;5;3", "correctness": "4;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of British Columbia;University of California, San Diego;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ubc.ca;https://www.ucsd.edu;https://allenai.org", "aff_unique_abbr": "UBC;UCSD;AI2", "aff_campus_unique_index": "1", "aff_campus_unique": ";San Diego", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Canada;United States" }, { "id": "eNu9odz1sz", "title": "Universal Domain Adaptation for Robust Handling of Distributional Shifts in NLP", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "When deploying machine learning systems to the wild, it is highly desirable for them to effectively leverage prior knowledge to the unfamiliar domain while also firing alarms to anomalous inputs. In order to address these requirements, Universal Domain Adaptation (UniDA) has emerged as a novel research area in computer vision, focusing on achieving both adaptation ability and robustness (i.e., the ability to detect out-of-distribution samples). While UniDA has led significant progress in computer vision, its application on language input still needs to be explored despite its feasibility. In this paper, we propose a comprehensive benchmark for natural language that offers thorough viewpoints of the model\u2019s generalizability and robustness. Our benchmark encompasses multiple datasets with varying difficulty levels and characteristics, including temporal shifts and diverse domains. On top of our testbed, we validate existing UniDA methods from computer vision and state-of-the-art domain adaptation techniques from NLP literature, yielding valuable findings: We observe that UniDA methods originally designed for image input can be effectively transferred to the natural language domain while also underscoring the effect of adaptation difficulty in determining the model\u2019s performance.", "keywords": "Domain Adaptation;Universal Domain Adaptation;Out-of-Distribution Detection", "primary_area": "", "supplementary_material": "", "author": "Hyuhng Joon Kim;Hyunsoo Cho;Sang-Woo Lee;Junyeob Kim;Choonghyun Park;Sang-goo Lee;Kang Min Yoo;Taeuk Kim", "authorids": "~Hyuhng_Joon_Kim1;~Hyunsoo_Cho1;~Sang-Woo_Lee1;~Junyeob_Kim1;~Choonghyun_Park1;~Sang-goo_Lee1;~Kang_Min_Yoo2;~Taeuk_Kim1", "gender": "M;M;M;M;M;M;M;M", "homepage": "http://heyjoonkim.github.io;https://hyunsoocho77.github.io;https://www.sang-woo-lee.com/;https://github.com/juny116;http://ids.snu.ac.kr/site/members/M_Choonghyun_Park.html;;;https://galsang.github.io", "dblp": "321/0995;86/125;31/5983-1;321/1099;331/6012;67/5511;163/5657;205/3110", "google_scholar": "NHFFB4gAAAAJ;https://scholar.google.com/citations?hl=ko;https://scholar.google.co.kr/citations?user=TMTTMuQAAAAJ;ffUhHtUAAAAJ;rzP87zgAAAAJ;;BqaWtH8AAAAJ;eH5uq7wAAAAJ", "or_profile": "~Hyuhng_Joon_Kim1;~Hyunsoo_Cho1;~Sang-Woo_Lee1;~Junyeob_Kim1;~Choonghyun_Park1;~Sang-goo_Lee1;~Kang_Min_Yoo2;~Taeuk_Kim1", "aff": "Seoul National University;Seoul National University;Korea Advanced Institute of Science & Technology;Seoul National University;Seoul National University;Seoul National University;NAVER;Hanyang University", "aff_domain": "snu.ac.kr;snu.ac.kr;kaist.ac.kr;snu.ac.kr;snu.ac.kr;snu.ac.kr;navercorp.com;hanyang.ac.kr", "position": "PhD student;PhD student;Adjunct Professor;PhD student;MS student;Full Professor;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nkim2023universal,\ntitle={Universal Domain Adaptation for Robust Handling of Distributional Shifts in {NLP}},\nauthor={Hyuhng Joon Kim and Hyunsoo Cho and Sang-Woo Lee and Junyeob Kim and Choonghyun Park and Sang-goo Lee and Kang Min Yoo and Taeuk Kim},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=eNu9odz1sz}\n}", "github": "", "project": "", "reviewers": "pfYB;17io;wuhF", "site": "https://openreview.net/forum?id=eNu9odz1sz", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;4;2", "reproducibility": "4;4;4", "correctness": "3;4;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-9680-4828;0000-0001-5701-0814;;;0000-0001-6919-7727", "linkedin": "hyuhng-joon-kim-802624168/;;;;;;;\ud0dc\uc6b1-\uae40-07125a13a/", "aff_unique_index": "0;0;1;0;0;0;2;3", "aff_unique_norm": "Seoul National University;Korea Advanced Institute of Science and Technology;NAVER Corporation;Hanyang University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.snu.ac.kr;https://www.kaist.ac.kr;https://www.naver.com;https://www.hanyang.ac.kr", "aff_unique_abbr": "SNU;KAIST;NAVER;HYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "eSM4RWpuJF", "title": "A Hierarchical Encoding-Decoding Scheme for Abstractive Multi-document Summarization", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Pre-trained language models (PLMs) have achieved outstanding achievements in abstractive single-document summarization (SDS). However, such benefits may not fully extend to multi-document summarization (MDS), where the handling of cross-document information is more complex. Previous works either design new MDS architectures or apply PLMs bluntly with concatenated source documents as a reformulated SDS task. While the former does not utilize previous pre-training efforts and may not generalize well across different domains, the latter may not sufficiently attend to the intricate cross-document relationships unique to MDS tasks. Instead, we enforce hierarchy on both the encoder and decoder to better utilize a PLM to facilitate multi-document interactions for the MDS task. Across 10 MDS benchmarks from various domains, our method outperforms or is competitive with the previous best models, including those with additional MDS pre-training or with more parameters. It outperforms its corresponding PLM backbone by up to 3 Rouge-L and is favored by humans.", "keywords": "multi-document summarization;abstractive summarization;pre-trained language models;hierarchical", "primary_area": "", "supplementary_material": "", "author": "Chenhui Shen;Liying Cheng;Xuan-Phi Nguyen;Yang You;Lidong Bing", "authorids": "~Chenhui_Shen2;~Liying_Cheng1;~Xuan-Phi_Nguyen1;~Yang_You1;~Lidong_Bing2", "gender": ";F;;M;", "homepage": ";https://liyingcheng95.github.io/;;https://www.comp.nus.edu.sg/~youy/;", "dblp": ";221/0115;;33/8167-1.html;", "google_scholar": ";https://scholar.google.com.sg/citations?user=xkZCRy0kBHEC;;jF4dPZwAAAAJ;", "or_profile": "~Chenhui_Shen2;~Liying_Cheng1;~Xuan-Phi_Nguyen1;~Yang_You1;~Lidong_Bing2", "aff": ";Alibaba Group;;National University of Singapore;", "aff_domain": ";alibaba-inc.com;;nus.edu.sg;", "position": ";Researcher;;Professor;", "bibtex": "@inproceedings{\nshen2023a,\ntitle={A Hierarchical Encoding-Decoding Scheme for Abstractive Multi-document Summarization},\nauthor={Chenhui Shen and Liying Cheng and Xuan-Phi Nguyen and Yang You and Lidong Bing},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=eSM4RWpuJF}\n}", "github": "", "project": "", "reviewers": "62Tb;VCWy;MRbH", "site": "https://openreview.net/forum?id=eSM4RWpuJF", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;4", "excitement": "3;3;4", "reproducibility": "3;5;4", "correctness": "2;4;3", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;yang-you-0b92914b/;", "aff_unique_index": "0;1", "aff_unique_norm": "Alibaba Group;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "https://www.alibaba.com;https://www.nus.edu.sg", "aff_unique_abbr": "Alibaba;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "China;Singapore" }, { "id": "eTDs4UY52h", "title": "Automatic Debate Evaluation with Argumentation Semantics and Natural Language Argument Graph Networks", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The lack of annotated data on professional argumentation and complete argumentative debates has led to the oversimplification and the inability of approaching more complex natural language processing tasks. Such is the case of the automatic evaluation of complete professional argumentative debates. In this paper, we propose an original hybrid method to automatically predict the winning stance in this kind of debates. For that purpose, we combine concepts from argumentation theory such as argumentation frameworks and semantics, with Transformer-based architectures and neural graph networks. Furthermore, we obtain promising results that lay the basis on an unexplored new instance of the automatic analysis of natural language arguments.", "keywords": "Argument Evaluation;Debate Analysis;Argumentation Theory;Computational Argumentation", "primary_area": "", "supplementary_material": "", "author": "Ramon Ruiz-Dolz;Stella Heras;Ana Garcia", "authorids": "~Ramon_Ruiz-Dolz1;~Stella_Heras1;~Ana_Garcia1", "gender": "Not Specified;;", "homepage": "https://raruidol.github.io;;http://www.upv.es/ficha-personal/amgarcia", "dblp": "242/1924;16/17;", "google_scholar": "https://scholar.google.es/citations?user=nHVCVWgAAAAJ;https://scholar.google.es/citations?user=PXnFF2YAAAAJ;", "or_profile": "~Ramon_Ruiz-Dolz1;~Stella_Heras1;~Ana_Garcia1", "aff": "UPV, Universidad Polit\u00e9cnica de Valencia;Valencian Research Institute for Artificial Intelligence;Universitat Polit\u00e8cnica de Val\u00e8ncia", "aff_domain": "dsic.upv.es;vrain.upv.es;upv.es", "position": "PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nruiz-dolz2023automatic,\ntitle={Automatic Debate Evaluation with Argumentation Semantics and Natural Language Argument Graph Networks},\nauthor={Ramon Ruiz-Dolz and Stella Heras and Ana Garcia},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=eTDs4UY52h}\n}", "github": "", "project": "", "reviewers": "6Vnd;5zCQ;cvvq", "site": "https://openreview.net/forum?id=eTDs4UY52h", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;5", "excitement": "3;4;3", "reproducibility": "2;3;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-6212-9377;", "linkedin": ";stellaheras;", "aff_unique_index": "0;1;2", "aff_unique_norm": "Universidad Polit\u00e9cnica de Valencia;Valencian Research Institute for Artificial Intelligence;Universitat Polit\u00e8cnica de Val\u00e8ncia", "aff_unique_dep": ";;", "aff_unique_url": "https://www.upv.es;https://www.vrai.es;https://www.upv.es", "aff_unique_abbr": "UPV;VRAI;UPV", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Spain" }, { "id": "eWW0KQhsHe", "title": "CHEF in the Language Kitchen: A Generative Data Augmentation Leveraging Korean Morpheme Ingredients", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Korean morphological variations present unique opportunities and challenges in natural language processing (NLP), necessitating an advanced understanding of morpheme-based sentence construction. The complexity of morphological variations allows for diverse sentence forms based on the syntactic-semantic integration of functional morphemes (i.e., affixes) to lexical morphemes (i.e., roots). With this in mind, we propose a method - CHEF, replicating the morphological transformations inherent in sentences based on lexical and functional morpheme combinations through generative data augmentation. CHEF operates using a morpheme blender and a label discriminator, thereby enhancing the diversity of Korean sentence forms by capturing the properties of agglutination while maintaining label consistency. We conduct experiments on Korean multiple classification datasets, improving model performance in full- and few-shot settings. Our proposed method boosts performance beyond the preceding data augmentation methods without incurring external data usage. We demonstrate that our approach achieves comparable results yielded by augmentation techniques that use large language models (LLMs).", "keywords": "Data Augmentation;Morpheme Blender;Label Discriminator;Contrastive Learning;Korean Language", "primary_area": "", "supplementary_material": "", "author": "Jaehyung Seo;Hyeonseok Moon;Jaewook Lee;Sugyeong Eo;Chanjun Park;Heuiseok Lim", "authorids": "~Jaehyung_Seo1;~Hyeonseok_Moon1;~Jaewook_Lee7;~Sugyeong_Eo1;~Chanjun_Park1;~Heuiseok_Lim1", "gender": "M;M;M;F;M;M", "homepage": "https://j-seo.github.io/;;;;http://parkchanjun.github.io/;http://nlp.korea.ac.kr", "dblp": "298/7721;295/3184.html;;295/3502;268/1379;127/4881", "google_scholar": "V8bFAUIAAAAJ;queGQ5UAAAAJ;1-ioxVcAAAAJ;https://scholar.google.co.kr/citations?user=s4GjpoEAAAAJ;085jNAMAAAAJ;HMTkz7oAAAAJ", "or_profile": "~Jaehyung_Seo1;~Hyeonseok_Moon1;~Jaewook_Lee7;~Sugyeong_Eo1;~Chanjun_Park1;~Heuiseok_Lim1", "aff": "Korea University;Korea University;Korea University;Korea University;Upstage;Korea University", "aff_domain": "korea.ac.kr;korea.ac.kr;korea.ac.kr;korea.ac.kr;upstage.ai;korea.ac.kr", "position": "PhD student;PhD student;PhD student;PhD student;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nseo2023chef,\ntitle={{CHEF} in the Language Kitchen: A Generative Data Augmentation Leveraging Korean Morpheme Ingredients},\nauthor={Jaehyung Seo and Hyeonseok Moon and Jaewook Lee and Sugyeong Eo and Chanjun Park and Heuiseok Lim},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=eWW0KQhsHe}\n}", "github": "", "project": "", "reviewers": "nopA;p68b;ACJB", "site": "https://openreview.net/forum?id=eWW0KQhsHe", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;2;3", "excitement": "3;3;4", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-4761-9818;0000-0002-0841-4262;0000-0001-8621-2060;0000-0002-8008-6160;0000-0002-7200-9632;", "linkedin": "jaehyungseo-datascientist/?originalSubdomain=kr;;jaewook-lee-4a19ba343/;%EC%88%98%EA%B2%BD-%EC%96%B4-21a23015b/;bcj1210/;", "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Korea University;Upstage", "aff_unique_dep": ";", "aff_unique_url": "https://www.korea.ac.kr;", "aff_unique_abbr": "KU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea;" }, { "id": "eXV8sdO5HL", "title": "Multi-level Contrastive Learning for Script-based Character Understanding", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In this work, we tackle the scenario of understanding characters in scripts, which aims to learn the characters' personalities and identities from their utterances. We begin by analyzing several challenges in this scenario, and then propose a multi-level contrastive learning framework to capture characters' global information in a fine-grained manner. To validate the proposed framework, we conduct extensive experiments on three character understanding sub-tasks by comparing with strong pre-trained language models, including SpanBERT, Longformer, BigBird and ChatGPT-3.5. Experimental results demonstrate that our method improves the performances by a considerable margin. Through further in-depth analysis, we show the effectiveness of our method in addressing the challenges and provide more hints on the scenario of character understanding. We will open-source our work in this \\href{https://github.com/David-Li0406/Script-based-Character-Understanding}{URL}.", "keywords": "Natural Language Processing;Character Understanding;Contrastive Learning", "primary_area": "", "supplementary_material": "", "author": "Dawei Li;Hengyuan Zhang;Yanran Li;Shiping Yang", "authorids": "~Dawei_Li5;~Hengyuan_Zhang1;~Yanran_Li1;~Shiping_Yang1", "gender": "M;M;F;M", "homepage": "https://david-li0406.github.io/;https://rattlesnakey.github.io/;https://yanran.li/;https://maybenotime.github.io/", "dblp": "13/5856-8;;;", "google_scholar": "JaX6HGAAAAAJ;;;https://scholar.google.cz/citations?hl=zh-CN", "or_profile": "~Dawei_Li5;~Hengyuan_Zhang1;~Yanran_Li1;~Shiping_Yang1", "aff": "University of California, San Diego;Tencent AI Lab;;Peking University", "aff_domain": "ucsd.edu;tencent.com;;pku.edu.cn", "position": "MS student;Intern;;Intern", "bibtex": "@inproceedings{\nli2023multilevel,\ntitle={Multi-level Contrastive Learning for Script-based Character Understanding},\nauthor={Dawei Li and Hengyuan Zhang and Yanran Li and Shiping Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=eXV8sdO5HL}\n}", "github": "", "project": "", "reviewers": "metd;Cuxc;ydTT;zjYA", "site": "https://openreview.net/forum?id=eXV8sdO5HL", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;4;3;3", "excitement": "3;3;4;3", "reproducibility": "4;4;3;3", "correctness": "3;4;4;3", "rating_avg": 4.0, "confidence_avg": 3.5, "excitement_avg": 3.25, "reproducibility_avg": 3.5, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0000-1991-2163;;0009-0005-9589-7408", "linkedin": ";;;shiping-yang-8b6b27263/", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of California, San Diego;Tencent;Peking University", "aff_unique_dep": ";Tencent AI Lab;", "aff_unique_url": "https://www.ucsd.edu;https://ai.tencent.com;http://www.pku.edu.cn", "aff_unique_abbr": "UCSD;Tencent AI Lab;Peking U", "aff_campus_unique_index": "0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;China" }, { "id": "eaUi1mcvrM", "title": "INSTRUCTSCORE: Towards Explainable Text Generation Evaluation with Automatic Feedback", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Automatically evaluating the quality of language generation is critical. Although recent learned metrics show high correlation with human judgement, these metrics do not provide explicit explanation of their verdict, nor associate the scores with defects in the generated text. To address this limitation, we present INSTRUCTSCORE, a fine-grained explainable evaluation metric for text generation. By harnessing both explicit human instruction and the implicit knowledge of GPT-4, we fine-tune a text evaluation metric based on LLaMA, producing both a score for generated text and a human readable diagnostic report. We evaluate INSTRUCTSCORE on a variety of generation tasks, including translation, captioning, data-to-text, and commonsense generation. Experiments show that our 7B model surpasses all other unsupervised metrics, including those based on 175B GPT-3 and GPT-4. Surprisingly, our INSTRUCTSCORE, even without direct supervision from human-rated data, achieves performance levels on par with state-of-the-art metrics like COMET22, which were fine-tuned on human ratings.", "keywords": "Text generation evaluation;Explainable metric", "primary_area": "", "supplementary_material": "", "author": "Wenda Xu;Danqing Wang;Liangming Pan;Zhenqiao Song;Markus Freitag;William Yang Wang;Lei Li", "authorids": "~Wenda_Xu1;~Danqing_Wang1;~Liangming_Pan1;~Zhenqiao_Song1;~Markus_Freitag2;~William_Yang_Wang2;~Lei_Li11", "gender": "M;F;M;F;M;M;M", "homepage": "https://xu1998hz.github.io/;;https://liangmingpan.bio;https://jocelynsong.github.io/;;https://www.cs.cmu.edu/~leili;https://www.cs.ucsb.edu/~william/", "dblp": ";226/6524.html;186/9707;227/7889;57/8503;13/7007-5.html;08/9282", "google_scholar": "https://scholar.google.co.il/citations?user=hUh7qCcAAAAJ;https://scholar.google.com/citations?hl=en-US;JcjjOTUAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;BYXqAlwAAAAJ;gf8Ms_8AAAAJ", "or_profile": "~Wenda_Xu1;~Danqing_Wang1;~Liangming_Pan1;~Zhenqiao_Song1;~Markus_Freitag2;~Lei_Li11;~William_Wang1", "aff": "University of California, Santa Barbara;University of California, Santa Barbara;University of California, Santa Barbara;University of California, Santa Barbara;Google;Computer Science Department, UC Santa Barbara;UC Santa Barbara", "aff_domain": "ucsb.edu;ucsb.edu;ucsb.edu;ucsb.edu;google.com;cs.ucsb.edu;ucsb.edu", "position": "PhD student;PhD student;Postdoc;PhD student;Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nxu2023instructscore,\ntitle={{INSTRUCTSCORE}: Towards Explainable Text Generation Evaluation with Automatic Feedback},\nauthor={Wenda Xu and Danqing Wang and Liangming Pan and Zhenqiao Song and Markus Freitag and William Yang Wang and Lei Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=eaUi1mcvrM}\n}", "github": "", "project": "", "reviewers": "k7RG;p5F9;uTpq", "site": "https://openreview.net/forum?id=eaUi1mcvrM", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "3;3;4", "reproducibility": "3;4;2", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0003-3095-9776;", "linkedin": "wenda-xu-866040163/;;;;markus-freitag-7b17b4101/;;", "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "University of California, Santa Barbara;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.ucsb.edu;https://www.google.com", "aff_unique_abbr": "UCSB;Google", "aff_campus_unique_index": "0;0;0;0;1;0;0", "aff_campus_unique": "Santa Barbara;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "ebSOK1nV2r", "title": "Answering Questions by Meta-Reasoning over Multiple Chains of Thought", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Modern systems for multi-hop question answering (QA) typically break questions into a sequence of reasoning steps, termed chain-of-thought (CoT), before arriving at a final answer. Often, multiple chains are sampled and aggregated through a voting mechanism over the final answers, but the intermediate steps themselves are discarded. While such approaches improve performance, they do not consider the relations between intermediate steps across chains and do not provide a unified explanation for the predicted answer. We introduce Multi-Chain Reasoning (MCR), an approach which prompts large language models to meta-reason over multiple chains of thought, rather than aggregate their answers. MCR examines different reasoning chains, mixes information between them and selects the most relevant facts in generating an explanation and predicting the answer. MCR outperforms strong baselines on 7 multi-hop QA datasets. Moreover, our analysis reveals that MCR explanations exhibit high quality, enabling humans to verify its answers.", "keywords": "Question Answering;Multi-hop Question Answering;Reasoning;Few-shot;Chain of Thought", "primary_area": "", "supplementary_material": "", "author": "Ori Yoran;Tomer Wolfson;Ben Bogin;Uri Katz;Daniel Deutch;Jonathan Berant", "authorids": "~Ori_Yoran1;~Tomer_Wolfson1;~Ben_Bogin1;~Uri_Katz1;~Daniel_Deutch1;~Jonathan_Berant1", "gender": "M;M;M;M;M;M", "homepage": "https://www.oriyoran.com/;;https://benbogin.github.io/;https://katzurik.github.io/;https://www.cs.tau.ac.il/~danielde/;http://www.cs.tau.ac.il/~joberant/", "dblp": "290/1285.html;225/5206.html;202/2034;94/11232;;31/8178", "google_scholar": "xPEKwGwAAAAJ;6jFwxg4AAAAJ;IdoSF2YAAAAJ;DkQ5W4wAAAAJ;https://scholar.google.com.tw/citations?user=KvIYIokAAAAJ;https://scholar.google.co.il/citations?user=xCYHonIAAAAJ", "or_profile": "~Ori_Yoran1;~Tomer_Wolfson1;~Ben_Bogin1;~Uri_Katz1;~Daniel_Deutch1;~Jonathan_Berant1", "aff": "Tel Aviv University;Tel Aviv University;Allen Institute for Artificial Intelligence;Bar-Ilan University;School of Computer Science, Tel Aviv University;Tel Aviv University", "aff_domain": "tau.ac.il;tau.post.ac.il;allenai.org;biu.ac.il;cs.tau.ac.il;tau.ac.il", "position": "PhD student;PhD student;Postdoc;PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nyoran2023answering,\ntitle={Answering Questions by Meta-Reasoning over Multiple Chains of Thought},\nauthor={Ori Yoran and Tomer Wolfson and Ben Bogin and Uri Katz and Daniel Deutch and Jonathan Berant},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ebSOK1nV2r}\n}", "github": "", "project": "", "reviewers": "hqxw;reqd;aP2G", "site": "https://openreview.net/forum?id=ebSOK1nV2r", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;4", "excitement": "3;4;4", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0002-7404-7736;;;;", "linkedin": "ori-yoran-8022ba140/;;;;;", "aff_unique_index": "0;0;1;2;0;0", "aff_unique_norm": "Tel Aviv University;Allen Institute for Artificial Intelligence;Bar-Ilan University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tau.ac.il;https://allenai.org;https://www.biu.ac.il", "aff_unique_abbr": "TAU;AI2;BIU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Tel Aviv", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "Israel;United States" }, { "id": "edwSiVzFpU", "title": "End-to-end Task-oriented Dialogue: A Survey of Tasks, Methods, and Future Directions", "track": "main", "status": "Long Main", "tldr": "", "abstract": "End-to-end task-oriented dialogue (EToD) can directly generate responses in an end-to-end fashion without modular training, which attracts escalating popularity. The advancement of deep neural networks, especially the successful use of large pre-trained models, has further led to significant progress in EToD research in recent years. In this paper, we present a thorough review and provide a unified perspective to summarize existing approaches as well as recent trends to advance the development of EToD research. The contributions of this paper can be summarized: (1) First survey: to our knowledge, we take the first step to present a thorough survey of this research field; (2) New taxonomy: we first introduce a unified perspective for EToD, including (i) Modularly EToD and (ii) Fully EToD; (3) New Frontiers: we discuss some potential frontier areas as well as the corresponding challenges, hoping to spur breakthrough research in EToD field; (4) Abundant resources: we build a public website, where EToD researchers could directly access the recent progress. We hope this work can serve as a thorough reference for the EToD research community.", "keywords": "End-to-End Task-Oriented Dialogue (EToD);Task-oriented Dialogue Systems (ToD);Pre-trained Models in Dialogue Systems", "primary_area": "", "supplementary_material": "", "author": "Libo Qin;Wenbo Pan;Qiguang Chen;Lizi Liao;Zhou Yu;Yue Zhang;Wanxiang Che;Min Li", "authorids": "~Libo_Qin1;~Wenbo_Pan1;~Qiguang_Chen1;~Lizi_Liao1;~Zhou_Yu1;~Yue_Zhang7;~Wanxiang_Che1;~Min_Li10", "gender": ";;M;F;F;M;M;F", "homepage": ";;https://scholar.google.com/citations?user=8j8AfF0AAAAJ;https://liziliao.github.io/;http://www.cs.columbia.edu/~zhouyu/;http://frcchang.github.io;http://ir.hit.edu.cn/~car/;http://bioinformatics.csu.edu.cn/limin/index_en.html", "dblp": ";;292/9953;149/1249;83/3205;47/722-4;https://dblp.uni-trier.de/pers/hd/c/Che:Wanxiang;", "google_scholar": ";;8j8AfF0AAAAJ;https://scholar.google.com.sg/citations?user=W2b08EUAAAAJ;https://scholar.google.com.tw/citations?user=jee2Dy0AAAAJ;;SVlQ6IEAAAAJ;w47WJE4AAAAJ", "or_profile": "~Libo_Qin1;~Wenbo_Pan1;~Qiguang_Chen1;~Lizi_Liao1;~Zhou_Yu1;~Yue_Zhang7;~Wanxiang_Che1;~Min_Li10", "aff": ";;Harbin Institute of Technology;Singapore Management University;Columbia University;Westlake University;Harbin Institute of Technology;Central South University", "aff_domain": ";;hit.edu.cn;smu.edu.sg;columbia.edu;westlake.edu.cn;hit.edu.cn;csu.edu.cn", "position": ";;PhD student;Assistant Professor;Assistant Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nqin2023endtoend,\ntitle={End-to-end Task-oriented Dialogue: A Survey of Tasks, Methods, and Future Directions},\nauthor={Libo Qin and Wenbo Pan and Qiguang Chen and Lizi Liao and Zhou Yu and Yue Zhang and Wanxiang Che and Min Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=edwSiVzFpU}\n}", "github": "", "project": "", "reviewers": "334w;TKmZ;ynBS", "site": "https://openreview.net/forum?id=edwSiVzFpU", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;3;4", "reproducibility": "4;4;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0002-5214-2268;;", "linkedin": ";;;;;;;", "aff_unique_index": "0;1;2;3;0;4", "aff_unique_norm": "Harbin Institute of Technology;Singapore Management University;Columbia University;Westlake University;Central South University", "aff_unique_dep": ";;;;", "aff_unique_url": "http://www.hit.edu.cn/;https://www.smu.edu.sg;https://www.columbia.edu;https://www.westlake.edu.cn;https://www.csu.edu.cn", "aff_unique_abbr": "HIT;SMU;Columbia;WU;CSU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Harbin;", "aff_country_unique_index": "0;1;2;0;0;0", "aff_country_unique": "China;Singapore;United States" }, { "id": "eeP1y7zPQ7", "title": "Fast and Robust Early-Exiting Framework for Autoregressive Language Models with Synchronized Parallel Decoding", "track": "main", "status": "Long Main", "tldr": "", "abstract": "To tackle the high inference latency exhibited by autoregressive language models, previous studies have proposed an early-exiting framework that allocates adaptive computation paths for each token based on the complexity of generating the subsequent token. However, we observed several shortcomings, including performance degradation caused by a state copying mechanism or numerous exit paths, and sensitivity to exit confidence thresholds. Consequently, we propose a Fast and Robust Early-Exiting (FREE) framework, which incorporates a shallow-deep module and a synchronized parallel decoding. Our framework enables faster inference by synchronizing the decoding process of the current token with previously stacked early-exited tokens. Furthermore, as parallel decoding allows us to observe predictions from both shallow and deep models, we present a novel adaptive threshold estimator that exploits a Beta mixture model to determine suitable confidence thresholds. We empirically demonstrated the superiority of our proposed framework on extensive generation tasks.", "keywords": "Efficient Decoding of Language Model;Early-Exiting Framework;Parallel Decoding;Adaptive Confidence Estimation", "primary_area": "", "supplementary_material": "", "author": "Sangmin Bae;Jongwoo Ko;Hwanjun Song;Se-Young Yun", "authorids": "~Sangmin_Bae1;~Jongwoo_Ko1;~Hwanjun_Song2;~Se-Young_Yun1", "gender": "M;M;M;M", "homepage": "https://www.raymin0223.com;https://sites.google.com/view/jongwooko;https://songhwanjun.github.io/;https://fbsqkd.github.io", "dblp": "91/1588;286/1503;204/3381;23/8862", "google_scholar": "T5rHY14AAAAJ;l2jkwHwAAAAJ;Ijzuc-8AAAAJ;X_IAjb8AAAAJ", "or_profile": "~Sangmin_Bae1;~Jongwoo_Ko1;~Hwanjun_Song2;~Se-Young_Yun1", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Amazon Web Services;KAIST", "aff_domain": "kaist.ac.kr;kaist.ac.kr;amazon.com;kaist.ac.kr", "position": "PhD student;PhD student;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nbae2023fast,\ntitle={Fast and Robust Early-Exiting Framework for Autoregressive Language Models with Synchronized Parallel Decoding},\nauthor={Sangmin Bae and Jongwoo Ko and Hwanjun Song and Se-Young Yun},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=eeP1y7zPQ7}\n}", "github": "", "project": "", "reviewers": "Bvg2;ZsSB;Tuxi;e6CC", "site": "https://openreview.net/forum?id=eeP1y7zPQ7", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "4;3;4;4", "excitement": "4;4;4;3", "reproducibility": "5;4;4;4", "correctness": "4;5;3;3", "rating_avg": 5.0, "confidence_avg": 3.75, "excitement_avg": 3.75, "reproducibility_avg": 4.25, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-1105-0818;", "linkedin": "raymin0223/;jongwoo-ko-8b93051b4/;;seyoung-yun-395130ab/", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Amazon", "aff_unique_dep": ";Amazon Web Services", "aff_unique_url": "https://www.kaist.ac.kr;https://aws.amazon.com", "aff_unique_abbr": "KAIST;AWS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "South Korea;United States" }, { "id": "eiFRPhpsW6", "title": "Why Should This Article Be Deleted? Transparent Stance Detection in Multilingual Wikipedia Editor Discussions", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The moderation of content on online platforms is usually non-transparent. On Wikipedia, however, this discussion is carried out publicly and editors are encouraged to use the content moderation policies as explanations for making moderation decisions. Currently, only a few comments explicitly mention those policies -- 20% of the English ones, but as few as 2% of the German and Turkish comments.\nTo aid in this process of understanding how content is moderated, we construct a novel multilingual dataset of Wikipedia editor discussions along with their reasoning in three languages. The dataset contains the stances of the editors (keep, delete, merge, comment), along with the stated reason, and a content moderation policy, for each edit decision. We demonstrate that stance and corresponding reason (policy) can be predicted jointly with a high degree of accuracy, adding transparency to the decision-making process. We release both our joint prediction models and the multilingual content moderation dataset for further research on automated transparent content moderation.", "keywords": "Wikipedia;deletion discussion;stance detection;content moderation;policy;multilingual", "primary_area": "", "supplementary_material": "", "author": "Lucie-Aim\u00e9e Kaffee;Arnav Arora;Isabelle Augenstein", "authorids": "~Lucie-Aim\u00e9e_Kaffee1;~Arnav_Arora1;~Isabelle_Augenstein1", "gender": "F;;F", "homepage": "https://luciekaffee.github.io/;;http://isabelleaugenstein.github.io/", "dblp": "204/9536;;93/11424.html", "google_scholar": "xiuGTq0AAAAJ;EQUUUUoAAAAJ;https://scholar.google.co.uk/citations?user=DjJp0dcAAAAJ", "or_profile": "~Lucie-Aim\u00e9e_Kaffee1;~Arnav_Arora1;~Isabelle_Augenstein1", "aff": "Copenhagen University;University of Copenhagen;University of Copenhagen", "aff_domain": "ku.dk;diku.dk;ku.dk", "position": "Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\nkaffee2023why,\ntitle={Why Should This Article Be Deleted? Transparent Stance Detection in Multilingual Wikipedia Editor Discussions},\nauthor={Lucie-Aim{\\'e}e Kaffee and Arnav Arora and Isabelle Augenstein},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=eiFRPhpsW6}\n}", "github": "", "project": "", "reviewers": "Xa7D;N2ar;6EKo", "site": "https://openreview.net/forum?id=eiFRPhpsW6", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "4;4;4", "reproducibility": "4;4;3", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-1562-7909", "linkedin": ";;isabelle-augenstein-82436b7a/", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Copenhagen", "aff_unique_dep": "", "aff_unique_url": "https://www.ku.dk", "aff_unique_abbr": "UCPH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Denmark" }, { "id": "eiHT1VAs4K", "title": "Preserving Knowledge Invariance: Rethinking Robustness Evaluation of Open Information Extraction", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The robustness to distribution changes ensures that NLP models can be successfully applied in the realistic world, especially for information extraction tasks. However, most prior evaluation benchmarks have been devoted to validating pairwise matching correctness, ignoring the crucial validation of robustness. In this paper, we present the first benchmark that simulates the evaluation of open information extraction models in the real world, where the syntactic and expressive distributions under the same knowledge meaning may drift variously. We design and annotate a large-scale testbed in which each example is a knowledge-invariant clique that consists of sentences with structured knowledge of the same meaning but with different syntactic and expressive forms. By further elaborating the robustness metric, a model is judged to be robust if its performance is consistently accurate on the overall cliques. We perform experiments on typical models published in the last decade as well as a representative large language model, and the results show that the existing successful models exhibit a frustrating degradation, with a maximum drop of $23.43$ $F_1$ score. Our resources and code will be publicly available.", "keywords": "open information extraction;robustness evaluation;high-quality paraphrase", "primary_area": "", "supplementary_material": "", "author": "Ji Qi;Chuchun Zhang;Xiaozhi Wang;Kaisheng Zeng;Jifan Yu;Jinxin Liu;Lei Hou;Juanzi Li;Xu Bin", "authorids": "~Ji_Qi2;~Chuchun_Zhang1;~Xiaozhi_Wang1;~Kaisheng_Zeng1;~Jifan_Yu2;~Jinxin_Liu2;~Lei_Hou2;~Juanzi_Li1;~Xu_Bin1", "gender": ";M;M;M;M;M;M;;", "homepage": ";https://github.com/ccJIUCI;https://bakser.github.io/;https://github.com/alpc43;https://yujifan0326.github.io/;https://scholar.google.com/citations?user=A7KHQ6YAAAAJ&hl=en&oi=sra;https://www.cs.tsinghua.edu.cn/csen/info/1305/4466.htm;;", "dblp": ";;03/2015;199/8788.html;239/6130.html;20/6480-2;32/5685-1;;", "google_scholar": ";;DjpXXZkAAAAJ;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com.tw/citations?hl=zh-CN;A7KHQ6YAAAAJ;YnIq4hsAAAAJ;;", "or_profile": "~Ji_Qi2;~Chuchun_Zhang1;~Xiaozhi_Wang1;~Kaisheng_Zeng1;~Jifan_Yu2;~Jinxin_Liu2;~Lei_Hou2;~Juanzi_Li1;~Xu_Bin1", "aff": ";University of International Business and Economics;Department of Computer Science and Technology, Tsinghua University;Tsinghua University;;Tsinghua University;Tsinghua University;;", "aff_domain": ";uibe.edu.cn;mails.tsinghua.edu.cn;tsinghua.edu.cn;;tsinghua.edu.cn;tsinghua.edu.cn;;", "position": ";Undergrad student;PhD student;PhD student;;PhD student;Assistant Professor;;", "bibtex": "@inproceedings{\nqi2023preserving,\ntitle={Preserving Knowledge Invariance: Rethinking Robustness Evaluation of Open Information Extraction},\nauthor={Ji Qi and Chuchun Zhang and Xiaozhi Wang and Kaisheng Zeng and Jifan Yu and Jinxin Liu and Lei Hou and Juanzi Li and Xu Bin},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=eiHT1VAs4K}\n}", "github": "", "project": "", "reviewers": "2rx1;snS1;59er", "site": "https://openreview.net/forum?id=eiHT1VAs4K", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;4;4", "reproducibility": "5;4;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-5727-143X;0000-0002-8104-9652;0000-0003-3430-4048;0009-0009-4673-9824;0000-0002-8907-3526;;", "linkedin": ";;xiaozhiwang098/?locale=en_US;https://cn.linkedin.com/in/%E5%BC%80%E8%83%9C-%E6%9B%BE-496566107;;%E9%87%91%E9%91%AB-%E5%88%98-86aaa7211/;;;", "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "University of International Business and Economics;Tsinghua University", "aff_unique_dep": ";Department of Computer Science and Technology", "aff_unique_url": "http://www.uibe.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "UIBE;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "erorKQYQ7P", "title": "Co$^2$PT: Mitigating Bias in Pre-trained Language Models through Counterfactual Contrastive Prompt Tuning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Pre-trained Language Models are widely used in many important real-world applications. However, recent studies show that these models can encode social biases from large pre-training corpora and even amplify biases in downstream applications. To address this challenge, we propose Co$^2$PT, an efficient and effective *debias-while-prompt tuning* method for mitigating biases via counterfactual contrastive prompt tuning on downstream tasks. Our experiments conducted on three extrinsic bias benchmarks demonstrate the effectiveness of Co$^2$PT on bias mitigation during the prompt tuning process and its adaptability to existing upstream debiased language models. These findings indicate the strength of Co$^2$PT and provide promising avenues for further enhancement in bias mitigation on downstream tasks.", "keywords": "bias;contrastive learning;prompt tuning", "primary_area": "", "supplementary_material": "", "author": "Xiangjue Dong;Ziwei Zhu;Zhuoer Wang;Maria Teleki;James Caverlee", "authorids": "~Xiangjue_Dong1;~Ziwei_Zhu1;~Zhuoer_Wang1;~Maria_Teleki1;~James_Caverlee2", "gender": ";M;M;F;M", "homepage": ";https://zziwei.github.io/;https://edillower.github.io/;https://people.tamu.edu/~mariateleki/;https://people.engr.tamu.edu/caverlee/", "dblp": ";159/9916;276/1293;359/3074;55/3697.html", "google_scholar": ";3S6pM7wAAAAJ;bWd8-mEAAAAJ;https://scholar.google.com/citations?hl=en;LB1dq_sAAAAJ", "or_profile": "~Xiangjue_Dong1;~Ziwei_Zhu1;~Zhuoer_Wang1;~Maria_Teleki1;~James_Caverlee2", "aff": ";George Mason University;Texas A&M University;Texas A&M University - College Station;Google", "aff_domain": ";gmu.edu;tamu.edu;tamu.edu;google.com", "position": ";Assistant Professor;PhD student;PhD student;Researcher", "bibtex": "@inproceedings{\ndong2023copt,\ntitle={Co\\${\\textasciicircum}2\\${PT}: Mitigating Bias in Pre-trained Language Models through Counterfactual Contrastive Prompt Tuning},\nauthor={Xiangjue Dong and Ziwei Zhu and Zhuoer Wang and Maria Teleki and James Caverlee},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=erorKQYQ7P}\n}", "github": "", "project": "", "reviewers": "ppym;vAfa;eamV", "site": "https://openreview.net/forum?id=erorKQYQ7P", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;2;3", "reproducibility": "4;4;4", "correctness": "4;3;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-3990-4774;;;0000-0001-8350-8528", "linkedin": ";;;mariateleki;", "aff_unique_index": "0;1;1;2", "aff_unique_norm": "George Mason University;Texas A&M University;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.gmu.edu;https://www.tamu.edu;https://www.google.com", "aff_unique_abbr": "GMU;TAMU;Google", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";College Station;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "euYA3EmI0e", "title": "Learning to Correct Noisy Labels for Fine-Grained Entity Typing via Co-Prediction Prompt Tuning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Fine-grained entity typing (FET) is an essential task in natural language processing that aims to assign semantic types to entities in text. However, FET poses a major challenge known as the noise labeling problem, whereby current methods rely on estimating noise distribution to identify noisy labels but are confused by diverse noise distribution deviation. To address this limitation, we introduce Co-Prediction Prompt Tuning for noise correction in FET, which leverages multiple prediction results to identify and correct noisy labels. Specifically, we integrate prediction results to recall labeled labels and utilize a differentiated margin to identify inaccurate labels. Moreover, we design an optimization objective concerning divergent co-predictions during fine-tuning, ensuring that the model captures sufficient information and maintains robustness in noise identification. Experimental results on three widely-used FET datasets demonstrate that our noise correction approach significantly enhances the quality of various types of training samples, including those annotated using distant supervision, ChatGPT, and crowdsourcing.", "keywords": "fine-grained entity typing;natural language processing;noisy labels;co-prediction prompt tuning;large language model", "primary_area": "", "supplementary_material": "", "author": "Minghao Tang;Yongquan He;Yongxiu Xu;Hongbo Xu;Wenyuan Zhang;Yang Lin", "authorids": "~Minghao_Tang1;~Yongquan_He1;~Yongxiu_Xu1;~Hongbo_Xu3;~Wenyuan_Zhang2;~Yang_Lin4", "gender": "M;M;F;M;;M", "homepage": ";;;https://people.ucas.ac.cn/~xuhongbo;;", "dblp": "235/0717;276/5095.html;294/1202;https://dblp.org/search?q=Hongbo+Xu+Tingwen+Liu;;", "google_scholar": "Cx8GTawAAAAJ;https://scholar.google.com.hk/citations?user=NMvcXrYAAAAJ;https://scholar.google.ca/citations?hl=zh-CN;;;zZDoBQoAAAAJ", "or_profile": "~Minghao_Tang1;~Yongquan_He1;~Yongxiu_Xu1;~Hongbo_Xu3;~Wenyuan_Zhang2;~Yang_Lin4", "aff": "Institute of Information Engineering,Chinese Academy of Sciences;Meituan;Institute of Information Engineering, Chinese Academy of Sciences;Institute of Information Engineering;;", "aff_domain": "iie.ac.cn;meituan.com;iie.edu.cn;iie.ac.cn;;", "position": "PhD student;Researcher;Assistant Professor;Full Professor;;", "bibtex": "@inproceedings{\ntang2023learning,\ntitle={Learning to Correct Noisy Labels for Fine-Grained Entity Typing via Co-Prediction Prompt Tuning},\nauthor={Minghao Tang and Yongquan He and Yongxiu Xu and Hongbo Xu and Wenyuan Zhang and Yang Lin},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=euYA3EmI0e}\n}", "github": "", "project": "", "reviewers": "MVhU;yPJ4;FZzX", "site": "https://openreview.net/forum?id=euYA3EmI0e", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;3;4", "excitement": "3;3;3", "reproducibility": "4;3;4", "correctness": "4;3;3", "rating_avg": 2.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-2926-3907;0000-0002-3079-8530;;0000-0002-0258-7840;;", "linkedin": ";;;;;", "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Chinese Academy of Sciences;Meituan;Institute of Information Engineering", "aff_unique_dep": "Institute of Information Engineering;;", "aff_unique_url": "http://www.cas.cn;https://www.meituan.com;", "aff_unique_abbr": "CAS;Meituan;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China;" }, { "id": "ev8dLLwScW", "title": "Ditto: A Simple and Efficient Approach to Improve Sentence Embeddings", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Prior studies diagnose the anisotropy problem in sentence representations from pre-trained language models, e.g., BERT, without fine-tuning. Our analysis reveals that the sentence embeddings from BERT suffer from a bias towards uninformative words, limiting the performance in semantic textual similarity (STS) tasks. To address this bias, we propose a simple and efficient unsupervised approach, Diagonal Attention Pooling (Ditto), which weights words with model-based importance estimations and computes the weighted average of word representations from pre-trained models as sentence embeddings. Ditto can be easily applied to any pre-trained language model as a postprocessing operation. Compared to prior sentence embedding approaches, Ditto does not add parameters nor requires any learning. Empirical evaluations demonstrate that our proposed Ditto can alleviate the anisotropy problem and improve various pre-trained models on the STS benchmarks.", "keywords": "Sentence embeddings;BERT;Transformer;Self-attention", "primary_area": "", "supplementary_material": "", "author": "Qian Chen;Wen Wang;Qinglin Zhang;Siqi Zheng;Chong Deng;Hai Yu;Jiaqing Liu;Yukun Ma;Chong Zhang", "authorids": "~Qian_Chen1;~Wen_Wang6;~Qinglin_Zhang1;~Siqi_Zheng1;~Chong_Deng1;~Hai_Yu2;~Jiaqing_Liu2;~Yukun_Ma1;~Chong_Zhang8", "gender": "M;;M;M;M;M;M;M;", "homepage": "https://scholar.google.com/citations?user=8eosmSQAAAAJ&hl=en;https://scholar.google.com/citations?user=85Tj1OwAAAAJ&hl=en;;;;https://github.com/haiahaiah;;;", "dblp": "11/1394-3;29/4680-1;67/4963;;220/8430;;;;", "google_scholar": "8eosmSQAAAAJ;85Tj1OwAAAAJ;6Q7NBaEAAAAJ;https://scholar.google.com.hk/citations?user=BsrS95gAAAAJ;https://scholar.google.com/citations?view_op=list_works;;sQ7v9uUAAAAJ;TRTf1uoAAAAJ;", "or_profile": "~Qian_Chen1;~Wen_Wang6;~Qinglin_Zhang1;~Siqi_Zheng1;~Chong_Deng1;~Hai_Yu2;~Jiaqing_Liu2;~Yukun_Ma1;~Chong_Zhang8", "aff": "Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group;", "aff_domain": "alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;", "position": "Researcher;Senior Staff Algorithm Engineer;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;", "bibtex": "@inproceedings{\nchen2023ditto,\ntitle={Ditto: A Simple and Efficient Approach to Improve Sentence Embeddings},\nauthor={Qian Chen and Wen Wang and Qinglin Zhang and Siqi Zheng and Chong Deng and Hai Yu and Jiaqing Liu and Yukun Ma and Chong Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ev8dLLwScW}\n}", "github": "", "project": "", "reviewers": "Z8t9;rnBv;rde8", "site": "https://openreview.net/forum?id=ev8dLLwScW", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;2;4", "excitement": "3;3;4", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-6939-7438;0000-0002-0356-1968;;;;;;;", "linkedin": ";wen-wang-414b548/;;;;;;;", "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "Alibaba Group", "aff_unique_dep": "", "aff_unique_url": "https://www.alibaba.com", "aff_unique_abbr": "Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "ewedHtUI5X", "title": "ASSERT: Automated Safety Scenario Red Teaming for Evaluating the Robustness of Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "As large language models are integrated into society, robustness toward a suite of prompts is increasingly important to maintain reliability in a high-variance environment.Robustness evaluations must comprehensively encapsulate the various settings in which a user may invoke an intelligent system. This paper proposes ASSERT, Automated Safety Scenario Red Teaming, consisting of three methods -- semantically aligned augmentation, target bootstrapping, and adversarial knowledge injection. For robust safety evaluation, we apply these methods in the critical domain of AI safety to algorithmically generate a test suite of prompts covering diverse robustness settings -- semantic equivalence, related scenarios, and adversarial. We partition our prompts into four safety domains for a fine-grained analysis of how the domain affects model performance. Despite dedicated safeguards in existing state-of-the-art models, we find statistically significant performance differences of up to 11% in absolute classification accuracy among semantically related scenarios and error rates of up to 19% absolute error in zero-shot adversarial settings, raising concerns for users' physical safety.", "keywords": "robustness;safety;red teaming;large language models", "primary_area": "", "supplementary_material": "", "author": "Alex Mei;Sharon Levy;William Yang Wang", "authorids": "~Alex_Mei1;~Sharon_Levy1;~William_Yang_Wang2", "gender": ";;M", "homepage": "http://sites.cs.ucsb.edu/~alexmei/;https://sharonlevy.github.io/;https://www.cs.ucsb.edu/~william/", "dblp": ";92/7341;08/9282", "google_scholar": "GOrfNGAAAAAJ;KdTUNZIAAAAJ;gf8Ms_8AAAAJ", "or_profile": "~Alex_Mei1;~Sharon_Levy1;~William_Wang1", "aff": "UC Santa Barbara;UC Santa Barbara;UC Santa Barbara", "aff_domain": "ucsb.edu;ucsb.edu;ucsb.edu", "position": "MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nmei2023assert,\ntitle={{ASSERT}: Automated Safety Scenario Red Teaming for Evaluating the Robustness of Large Language Models},\nauthor={Alex Mei and Sharon Levy and William Yang Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ewedHtUI5X}\n}", "github": "", "project": "", "reviewers": "GVJw;M7hi;Gb6G", "site": "https://openreview.net/forum?id=ewedHtUI5X", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;4;4", "reproducibility": "4;4;3", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "alexmeigz/;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Santa Barbara", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsb.edu", "aff_unique_abbr": "UCSB", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Santa Barbara", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "eyuTFB2CBM", "title": "KEPL: Knowledge Enhanced Prompt Learning for Chinese Hypernym-Hyponym Extraction", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Modeling hypernym-hyponym (\"is-a\") relations is very important for many natural language processing (NLP) tasks, such as classification, natural language inference and relation extraction. Existing work on is-a relation extraction is mostly in the English language environment. Due to the flexibility of language expression and the lack of high-quality Chinese annotation datasets, it is still a challenge to accurately identify such relations from Chinese unstructured texts. To tackle this problem, we propose a Knowledge Enhanced Prompt Learning (KEPL) method for Chinese hypernym-hyponym relation extraction. Our model uses the Hearst-like patterns as the prior knowledge. By exploiting a Dynamic Adaptor Architecture to select the matching pattern for the text into prompt, our model embeds patterns and text simultaneously. Additionally, we construct a Chinese hypernym-hyponym relation extraction dataset, which contains three typical scenarios, as baike, news and We-media. The experimental results on the dataset demonstrate the efficiency and effectiveness of our proposed model.", "keywords": "Knowledge Base;prompt;Hypernym discovery", "primary_area": "", "supplementary_material": "", "author": "Ningchen Ma;Dong Wang;Hongyun Bao;Lei He;Suncong Zheng", "authorids": "~Ningchen_Ma1;~Dong_Wang35;~Hongyun_Bao2;~Lei_He8;~Suncong_Zheng2", "gender": "M;M;;F;M", "homepage": "https://github.com/fengyanflame;;;;", "dblp": ";;34/10083.html;;133/2598", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;qSlaQ6oAAAAJ;;;https://scholar.google.com.hk/citations?user=GORkFYUAAAAJ", "or_profile": "~Ningchen_Ma1;~Dong_Wang35;~Hongyun_Bao2;~Lei_He8;~Suncong_Zheng2", "aff": ";Tencent;institute of qutomation, chinese acadamy of sciences;Tencent AI Lab;", "aff_domain": ";tencent.com;iacas.edu;tencent.com;", "position": ";Researcher;Associate Professor;Researcher;", "bibtex": "@inproceedings{\nma2023kepl,\ntitle={{KEPL}: Knowledge Enhanced Prompt Learning for Chinese Hypernym-Hyponym Extraction},\nauthor={Ningchen Ma and Dong Wang and Hongyun Bao and Lei He and Suncong Zheng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=eyuTFB2CBM}\n}", "github": "", "project": "", "reviewers": "3dDj;rkKb;mrHG", "site": "https://openreview.net/forum?id=eyuTFB2CBM", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;3;3", "excitement": "4;3;3", "reproducibility": "3;3;2", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;lei-he-a75b24119/;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Tencent;Chinese Academy of Sciences", "aff_unique_dep": "Tencent Holdings Limited;Institute of Automation", "aff_unique_url": "https://www.tencent.com;http://www.ia.cas.cn", "aff_unique_abbr": "Tencent;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "f05z3XqUeu", "title": "RainProof: An Umbrella to Shield Text Generator from Out-Of-Distribution Data", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Implementing effective control mechanisms to ensure the proper functioning and security of deployed NLP models, from translation to chatbots, is essential. A key ingredient to ensure safe system behaviour is Out-Of-Distribution (OOD) detection, which aims to detect whether an input sample is statistically far from the training distribution. Although OOD detection is a widely covered topic in classification tasks, most methods rely on hidden features output by the encoder. In this work, we focus on leveraging soft-probabilities in a black-box framework, i.e. we can access the soft-predictions but not the internal states of the model. Our contributions include: (i) RAINPROOF a Relative informAItioN Projection OOD detection framework; and (ii) a more operational evaluation setting for OOD detection. Surprisingly, we find that OOD detection is not necessarily aligned with task-specific measures. The OOD detector may filter out samples well processed by the model and keep samples that are not, leading to weaker performance. Our results show that RAINPROOF provides OOD detection methods more aligned with task-specific performance metrics than traditional OOD detectors.", "keywords": "NLP;OOD detection;natural language generation", "primary_area": "", "supplementary_material": "", "author": "Maxime DARRIN;Pablo Piantanida;Pierre Colombo", "authorids": "~Maxime_DARRIN1;~Pablo_Piantanida2;~Pierre_Colombo2", "gender": "M;M;M", "homepage": "https://icannos.github.io/;https://www.pablo-piantanida.org;https://pierrecolombo.github.io/", "dblp": ";44/1416;", "google_scholar": "https://scholar.google.ca/citations?hl=fr;https://scholar.google.fr/citations?user=QyBEFv0AAAAJ;yPoMt8gAAAAJ", "or_profile": "~Maxime_DARRIN1;~Pablo_Piantanida2;~Pierre_Colombo2", "aff": "CentraleSupelec;Mila - Quebec AI Institute ;CentraleSupelec", "aff_domain": "centralesupelec.fr;mila.quebec;centralesupelec.fr", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ndarrin2023rainproof,\ntitle={RainProof: An Umbrella to Shield Text Generator from Out-Of-Distribution Data},\nauthor={Maxime DARRIN and Pablo Piantanida and Pierre Colombo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=f05z3XqUeu}\n}", "github": "", "project": "", "reviewers": "j15V;fLxa;DVtH", "site": "https://openreview.net/forum?id=f05z3XqUeu", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;3;4", "reproducibility": "4;5;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.0, "replies_avg": 9, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "maxime-darrin/;pablo-piantanida-60a51bb5/?locale=en_US;", "aff_unique_index": "0;1;0", "aff_unique_norm": "CentraleSup\u00e9lec;Quebec AI Institute", "aff_unique_dep": ";AI Institute", "aff_unique_url": "https://www.centralesupelec.fr;https://mila.quebec", "aff_unique_abbr": "CS;Mila", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "France;Canada" }, { "id": "f10SqktqkF", "title": "Investigating Online Community Engagement through Stancetaking", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Much work has explored lexical and semantic variation in online communities, and drawn connections to community identity and user engagement patterns. Communities also express identity through the sociolinguistic concept of stancetaking. Large-scale computational work on stancetaking has explored community similarities in their preferences for stance markers -- words that serve to indicate aspects of a speaker's stance -- without considering the stance-relevant properties of the contexts in which stance markers are used. We propose representations of stance contexts for 1798 Reddit communities and show how they capture community identity patterns distinct from textual or marker similarity measures. We also relate our stance context representations to broader inter- and intra-community engagement patterns, including cross-community posting patterns and social network properties of communities. Our findings highlight the strengths of using rich properties of stance as a way of revealing community identity and engagement patterns in online multi-community spaces.", "keywords": "stancetaking; community variation; sociolinguistics; community identity", "primary_area": "", "supplementary_material": "", "author": "Jai Aggarwal;Brian Diep;Julia Watson;Suzanne Stevenson", "authorids": "~Jai_Aggarwal1;~Brian_Diep1;~Julia_Watson1;~Suzanne_Stevenson1", "gender": ";M;F;", "homepage": "http://www.cs.toronto.edu/~jai/index.html;;http://www.juliawatson.ca;", "dblp": "272/5349;;212/4004;", "google_scholar": "https://scholar.google.ca/citations?user=DyustcgAAAAJ;;QNXA_tcAAAAJ;", "or_profile": "~Jai_Aggarwal1;~Brian_Diep1;~Julia_Watson1;~Suzanne_Stevenson1", "aff": "Department of Computer Science;University of Toronto;Department of Computer Science;", "aff_domain": "cs.toronto.edu;utoronto.ca;cs.toronto.edu;", "position": "PhD student;Undergrad student;PhD student;", "bibtex": "@inproceedings{\naggarwal2023investigating,\ntitle={Investigating Online Community Engagement through Stancetaking},\nauthor={Jai Aggarwal and Brian Diep and Julia Watson and Suzanne Stevenson},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=f10SqktqkF}\n}", "github": "", "project": "", "reviewers": "5R1c;NA9c;Qr3C", "site": "https://openreview.net/forum?id=f10SqktqkF", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;2", "excitement": "4;4;3", "reproducibility": "4;4;3", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-0737-5262;", "linkedin": ";brian-diep/;;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Unknown Institution;University of Toronto", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": ";https://www.utoronto.ca", "aff_unique_abbr": ";U of T", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1", "aff_country_unique": ";Canada" }, { "id": "f1y1tG5pAE", "title": "The BLA Benchmark: Investigating Basic Language Abilities of Pre-Trained Multimodal Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Despite the impressive performance achieved by pre-trained language-and-vision models in downstream tasks, it remains an open question whether this reflects a proper understanding of image-text interaction. In this work, we explore to what extent they handle basic linguistic constructions---active-passive voice, coordination, and relative clauses---that even preschool children can typically master.\nWe present BLA, a novel, automatically constructed benchmark to evaluate multimodal models on these Basic Language Abilities. We show that different types of Transformer-based systems, such as CLIP, ViLBERT, and BLIP2, generally struggle with BLA in a zero-shot setting, in line with previous findings. Our experiments, in particular, show that most of the tested models only marginally benefit when fine-tuned or prompted with construction-specific samples. Yet, the generative BLIP2 shows promising trends, especially in an in-context learning setting. This opens the door to using BLA not only as an evaluation benchmark but also to improve models' basic language abilities.", "keywords": "Dataset;Evaluation;Zero-shot;Prompting;Visual Grounding;Language Constructions", "primary_area": "", "supplementary_material": "", "author": "Xinyi Chen;Raquel Fern\u00e1ndez;Sandro Pezzelle", "authorids": "~Xinyi_Chen3;~Raquel_Fern\u00e1ndez1;~Sandro_Pezzelle1", "gender": "F;F;M", "homepage": ";http://www.illc.uva.nl/~raquel;https://sandropezzelle.github.io/", "dblp": ";02/5384;182/2260", "google_scholar": "7W3aYZYAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.it/citations?user=PW6eQ6YAAAAJ", "or_profile": "~Xinyi_Chen3;~Raquel_Fern\u00e1ndez1;~Sandro_Pezzelle1", "aff": "University of Amsterdam;University of Amsterdam;University of Amsterdam", "aff_domain": "uva.nl;uva.nl;uva.nl", "position": "PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nchen2023the,\ntitle={The {BLA} Benchmark: Investigating Basic Language Abilities of Pre-Trained Multimodal Models},\nauthor={Xinyi Chen and Raquel Fern{\\'a}ndez and Sandro Pezzelle},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=f1y1tG5pAE}\n}", "github": "", "project": "", "reviewers": "5kcq;pLMK;3JNU", "site": "https://openreview.net/forum?id=f1y1tG5pAE", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;5;4", "excitement": "4;4;4", "reproducibility": "4;5;5", "correctness": "4;5;4", "rating_avg": 5.0, "confidence_avg": 4.666666666666667, "excitement_avg": 4.0, "reproducibility_avg": 4.666666666666667, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-5540-5943;0000-0002-3969-7445", "linkedin": ";raquel-fernandez-13578148/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Amsterdam", "aff_unique_dep": "", "aff_unique_url": "https://www.uva.nl", "aff_unique_abbr": "UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Netherlands" }, { "id": "f34v92a86l", "title": "Efficient Grammatical Error Correction Via Multi-Task Training and Optimized Training Schedule", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Progress in neural grammatical error correction (GEC) is hindered by the lack of annotated training data. Sufficient amounts of high-quality manually annotated data are not available, so recent research has relied on generating synthetic data, pretraining on it, and then fine-tuning on real datasets; performance gains have been achieved either by ensembling or by using huge pretrained models such as XXL-T5 as the backbone. In this work, we explore an orthogonal direction: how to use available data more efficiently. First, we propose auxiliary tasks that exploit the alignment between the original and corrected sentences, such as predicting a sequence of corrections. We formulate each task as a sequence-to-sequence problem and perform multi-task training. Second, we discover that the order of datasets used for training and even individual instances within a dataset may have important effects on the final performance, so we set out to find the best training schedule. Together, these two ideas lead to significant improvements, producing results that improve state of the art with much smaller models; in particular, we outperform the best models based on T5-XXL (11B parameters) with a BART-based model (400M parameters).", "keywords": "Grammatical error correction;Multi-task training;Sequence-to-sequence;Fine-tuning", "primary_area": "", "supplementary_material": "", "author": "Andrey Bout;Alexander Podolskiy;Sergey Nikolenko;Irina Piontkovskaya", "authorids": "~Andrey_Bout1;~Alexander_Podolskiy1;~Sergey_Nikolenko1;~Irina_Piontkovskaya2", "gender": ";M;M;F", "homepage": ";https://github.com/APodolskiy;http://logic.pdmi.ras.ru/~sergey/;", "dblp": "225/1448;;50/1870.html;211/7823", "google_scholar": ";sKL4bLEAAAAJ;https://scholar.google.ru/citations?hl=ru;", "or_profile": "~Andrey_Bout1;~Alexander_Podolskiy1;~Sergey_Nikolenko1;~Irina_Piontkovskaya2", "aff": "Huawei Noah's Ark Lab;Huawei Technologies Ltd.;Steklov Institute of Mathematics at St. Petersburg;Huawei Technologies Ltd.", "aff_domain": "huawei.com;huawei.com;pdmi.ras.ru;huawei.com", "position": "Principal Researcher;Researcher;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nbout2023efficient,\ntitle={Efficient Grammatical Error Correction Via Multi-Task Training and Optimized Training Schedule},\nauthor={Andrey Bout and Alexander Podolskiy and Sergey Nikolenko and Irina Piontkovskaya},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=f34v92a86l}\n}", "github": "", "project": "", "reviewers": "7gp4;ZskQ;bMUs;Yt3z", "site": "https://openreview.net/forum?id=f34v92a86l", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;5;5;4", "excitement": "2;4;4;4", "reproducibility": "3;3;5;4", "correctness": "2;4;5;3", "rating_avg": 4.0, "confidence_avg": 4.5, "excitement_avg": 3.5, "reproducibility_avg": 3.75, "correctness_avg": 3.5, "replies_avg": 13, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-7787-2251;", "linkedin": "a-bout/;;;irina-piontkovskaya-6b10b0b5/?originalSubdomain=ru", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Huawei;Steklov Institute of Mathematics", "aff_unique_dep": "Noah's Ark Lab;Mathematics", "aff_unique_url": "https://www.huawei.com;http://www.pdmi.ras.ru", "aff_unique_abbr": "Huawei;PDMI", "aff_campus_unique_index": "1", "aff_campus_unique": ";St. Petersburg", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;Russian Federation" }, { "id": "f42iMss8J3", "title": "Visual Storytelling with Question-Answer Plans", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Visual storytelling aims to generate compelling narratives from image sequences. Existing models often focus on enhancing the representation of the image sequence, e.g., with external knowledge sources or advanced graph structures. Despite recent progress, the stories are often repetitive, illogical, and lacking in detail. To mitigate these issues, we present a novel framework which integrates visual representations with pretrained language models and planning. Our model translates the image sequence into a visual prefix, a sequence of continuous embeddings which language models can interpret. It also leverages a sequence of question-answer pairs as a blueprint plan for selecting salient visual concepts and determining how they should be assembled into a narrative. Automatic and human evaluation on the VIST benchmark demonstrates that blueprint-based models generate stories that are more coherent, interesting, and natural compared to competitive baselines and state-of-the-art systems.", "keywords": "visual storytelling;multimodaility;story generation", "primary_area": "", "supplementary_material": "", "author": "Danyang Liu;Mirella Lapata;Frank Keller", "authorids": "~Danyang_Liu1;~Mirella_Lapata1;~Frank_Keller1", "gender": ";F;M", "homepage": ";https://homepages.inf.ed.ac.uk/mlap/;https://homepages.inf.ed.ac.uk/keller/", "dblp": ";59/6701;30/4872", "google_scholar": "gl1eeBwAAAAJ;j67B9Q4AAAAJ;https://scholar.google.co.uk/citations?user=-lbtnAgAAAAJ", "or_profile": "~Danyang_Liu1;~Mirella_Lapata1;~Frank_Keller1", "aff": "University of Edinburgh;Edinburgh University, University of Edinburgh;University of Edinburgh", "aff_domain": "ed.ac.uk;inf.ed.ac.uk;ed.ac.uk", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nliu2023visual,\ntitle={Visual Storytelling with Question-Answer Plans},\nauthor={Danyang Liu and Mirella Lapata and Frank Keller},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=f42iMss8J3}\n}", "github": "", "project": "", "reviewers": "B4N7;1NBh;9ntA", "site": "https://openreview.net/forum?id=f42iMss8J3", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "3;3;3", "reproducibility": "4;3;4", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-8242-4362", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Edinburgh", "aff_unique_dep": "", "aff_unique_url": "https://www.ed.ac.uk", "aff_unique_abbr": "Edinburgh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "f6S1411OlZ", "title": "Towards a Unified Framework for Reference Retrieval and Related Work Generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The task of related work generation aims to generate a comprehensive survey of related research topics automatically, saving time and effort for authors. Existing methods simplify this task by using human-annotated references in a large-scale scientific corpus as information sources, which is time- and cost-intensive. To this end, we propose a Unified Reference Retrieval and Related Work Generation Model (UR3WG), which combines reference retrieval and related work generation processes in a unified framework based on the large language model (LLM). Specifically, UR3WG first leverages the world knowledge of LLM to extend the abstract and generate the query for the subsequent retrieval stage. Then a lexicon-enhanced dense retrieval is proposed to search relevant references, where an importance-aware representation of the lexicon is introduced. We also propose multi-granularity contrastive learning to optimize our retriever. Since this task is not simply summarizing the main points in references, it should analyze the complex relationships and present them logically. We propose an instruction-tuning method to leverage LLM to generate related work. Extensive experiments on two wide-applied datasets demonstrate that our model outperforms the state-of-the-art baselines in both generation and retrieval metrics.", "keywords": "related work generation;lexicon-enhance retrieval;instruction tuning", "primary_area": "", "supplementary_material": "", "author": "Zhengliang Shi;Shen Gao;Zhen Zhang;Xiuying Chen;Zhumin Chen;Pengjie Ren;Zhaochun Ren", "authorids": "~Zhengliang_Shi1;~Shen_Gao1;~Zhen_Zhang32;~Xiuying_Chen1;~Zhumin_Chen1;~Pengjie_Ren1;~Zhaochun_Ren1", "gender": "M;M;;F;;;M", "homepage": "https://scholar.google.com/citations?user=4UlXbpQAAAAJ&hl=zh-CN;https://shengaopku.github.io/;;https://iriscxy.github.io/;https://ir.sdu.edu.cn/~zhuminchen/~zhuminchen_en.htm;;https://renzhaochun.github.io/", "dblp": "336/6263.html;85/7967;;33/11343.html;88/1081;;58/10440", "google_scholar": "4UlXbpQAAAAJ;Xb5yz-YAAAAJ;;COUnAF4AAAAJ;;;fPcIPt0AAAAJ", "or_profile": "~Zhengliang_Shi1;~Shen_Gao1;~Zhen_Zhang32;~Xiuying_Chen1;~Zhumin_Chen1;~Pengjie_Ren1;~Zhaochun_Ren1", "aff": "Shandong University;Shandong University;;King Abdullah University of Science and Technology;Shandong University;;Shandong University", "aff_domain": "sdu.edu.cn;sdu.edu.cn;;kaust.edu.sa;sdu.edu.cn;;sdu.edu.cn", "position": "MS student;Assistant Professor;;PhD student;Full Professor;;Full Professor", "bibtex": "@inproceedings{\nshi2023towards,\ntitle={Towards a Unified Framework for Reference Retrieval and Related Work Generation},\nauthor={Zhengliang Shi and Shen Gao and Zhen Zhang and Xiuying Chen and Zhumin Chen and Pengjie Ren and Zhaochun Ren},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=f6S1411OlZ}\n}", "github": "", "project": "", "reviewers": "FLmC;5RZj;Vyrb", "site": "https://openreview.net/forum?id=f6S1411OlZ", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "4;2;3", "reproducibility": "4;4;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-9658-4906;0000-0003-1301-3700;;;0000-0003-4592-4074;;0000-0002-9076-6565", "linkedin": ";;;;;;zhaochun-ren-460491296/?locale=nl_NL", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Shandong University;King Abdullah University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.sdu.edu.cn;https://www.kast.kau.edu.sa", "aff_unique_abbr": "SDU;KAUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;Saudi Arabia" }, { "id": "f7eqyX0nJP", "title": "ZEROTOP: Zero-Shot Task-Oriented Semantic Parsing using Large Language Models", "track": "main", "status": "Short Main", "tldr": "", "abstract": "We explore the use of large language models (LLMs) for zero-shot semantic parsing. Semantic parsing involves mapping natural language utterances to task-specific meaning representations. LLMs are generally trained on publicly available text and code and cannot be expected to directly generalize to domain-specific parsing tasks in a zero-shot setting. In this work, we propose ZEROTOP, a zero-shot task-oriented parsing method that decomposes semantic parsing problem into a set of abstractive and extractive question-answering (QA) problems. For each utterance, we prompt the LLM with questions corresponding to its top-level intent and a set of slots and use the LLM generations to construct the target meaning representation. We observe that current LLMs fail to detect unanswerable questions; and as a result, cannot handle questions corresponding to missing slots. We address this by fine-tuning a language model on public QA datasets using synthetic negative samples. Experimental results show that our QA-based decomposition paired with the fine-tuned LLM can zero-shot parse \u2248 16% of utterances in the MTOP dataset.", "keywords": "zero-shot;semantic parsing;mtop;large language models;llm;QA datasets", "primary_area": "", "supplementary_material": "", "author": "Dheeraj Mekala;Jason Andrew Wolfe;Subhro Roy", "authorids": "~Dheeraj_Mekala1;~Jason_Andrew_Wolfe1;~Subhro_Roy1", "gender": "M;;M", "homepage": "https://dheeraj7596.github.io/;;https://sroy9.github.io/", "dblp": "192/1233;;47/9962", "google_scholar": "QdE5rgkAAAAJ;;l2pAq_0AAAAJ", "or_profile": "~Dheeraj_Mekala1;~Jason_Andrew_Wolfe1;~Subhro_Roy1", "aff": "University of California, San Diego;;Microsoft Semantic Machines", "aff_domain": "ucsd.edu;;microsoft.com", "position": "PhD student;;Senior Researcher", "bibtex": "@inproceedings{\nmekala2023zerotop,\ntitle={{ZEROTOP}: Zero-Shot Task-Oriented Semantic Parsing using Large Language Models},\nauthor={Dheeraj Mekala and Jason Andrew Wolfe and Subhro Roy},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=f7eqyX0nJP}\n}", "github": "", "project": "", "reviewers": "h26o;i51D;7287", "site": "https://openreview.net/forum?id=f7eqyX0nJP", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;3", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "dheeraj7596/;;", "aff_unique_index": "0;1", "aff_unique_norm": "University of California, San Diego;Microsoft", "aff_unique_dep": ";Semantic Machines", "aff_unique_url": "https://www.ucsd.edu;https://www.microsoft.com", "aff_unique_abbr": "UCSD;Microsoft", "aff_campus_unique_index": "0", "aff_campus_unique": "San Diego;", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "fCvJrponuK", "title": "Sparse Black-Box Multimodal Attack for Vision-Language Adversary Generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Deep neural networks have been widely applied in real-world scenarios, such as product restrictions on e-commerce and hate speech monitoring on social media, to ensure secure governance of various platforms. However, illegal merchants often deceive the detection models by adding large-scale perturbations to prohibited products, so as to earn illegal profits. Current adversarial attacks using imperceptible perturbations encounter challenges in simulating such adversarial behavior and evaluating the vulnerabilities of detection models to such perturbations. To address this issue, we propose a novel black-box multimodal attack, termed Sparse Multimodal Attack (SparseMA), which leverages sparse perturbations to simulate the adversarial behavior exhibited by illegal merchants in the black-box scenario. Moreover, SparseMA bridges the gap between images and texts by treating the separated image patches and text words uniformly in the discrete space. Extensive experiments demonstrate that SparseMA can identify the vulnerability of the model to different modalities, outperforming existing multimodal attacks and unimodal attacks. SparseMA, which is the first proposed method for black-box multimodal attacks to our knowledge, would be used as an effective tool for evaluating the robustness of multimodal models to different modalities.", "keywords": "Vision-and-Language;adversarial learning;multimodal attack;black-box attack", "primary_area": "", "supplementary_material": "", "author": "Zhen Yu;Zhou Qin;Zhenhua Chen;Meihui Lian;fu Jun Hao;Weigao Wen;Hui Xue';Kun He", "authorids": "~Zhen_Yu2;~Zhou_Qin2;~Zhenhua_Chen2;~Meihui_Lian1;~fu_Jun_Hao1;~Weigao_Wen1;~Hui_Xue'1;~Kun_He1", "gender": "M;M;M;;;M;M;F", "homepage": ";https://github.com/archwalker;https://hust.edu.cn/;https://github.com/lllmh;https://github.com/InitAction;;http://www.alibaba.com;http://faculty.hust.edu.cn/hekun/zh_CN/more/1411001/jsjjgd/index.htm", "dblp": ";;;;;;;59/1028-1", "google_scholar": "Df0dSVUAAAAJ;;;;;;;YTQnGJsAAAAJ", "or_profile": "~Zhen_Yu2;~Zhou_Qin2;~Zhenhua_Chen2;~Meihui_Lian1;~fu_Jun_Hao1;~Weigao_Wen1;~Hui_Xue'1;~Kun_He1", "aff": "Huazhong University of Science and Technology;;Huazhong University of Science and Technology;;Alibaba Group;;Alibaba Group;Huazhong University of Sceince and Technology", "aff_domain": "hust.edu.cn;;hust.edu.cn;;alibaba-inc.com;;alibaba-inc.com;hust.edu.cn", "position": "MS student;;MS student;;Researcher;;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nyu2023sparse,\ntitle={Sparse Black-Box Multimodal Attack for Vision-Language Adversary Generation},\nauthor={Zhen Yu and Zhou Qin and Zhenhua Chen and Meihui Lian and fu Jun Hao and Weigao Wen and Hui Xue' and Kun He},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fCvJrponuK}\n}", "github": "", "project": "", "reviewers": "jyg5;RAMp;BaP4;GDxW", "site": "https://openreview.net/forum?id=fCvJrponuK", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;3;4;4", "excitement": "2;2;4;2", "reproducibility": "4;4;3;4", "correctness": "2;3;4;3", "rating_avg": 3.0, "confidence_avg": 3.75, "excitement_avg": 2.5, "reproducibility_avg": 3.75, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;0000-0001-7627-4604", "linkedin": ";;;;;https://www.linkedin.cn/incareer/in/ACoAABlg3QUBY92_T2u0E9MmBcmBoAJzIoMYnjE;;", "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "Huazhong University of Science and Technology;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "http://www.hust.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "HUST;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "fEuslEGN0j", "title": "Fine-tuned LLMs Know More, Hallucinate Less with Few-Shot Sequence-to-Sequence Semantic Parsing over Wikidata", "track": "main", "status": "Long Main", "tldr": "", "abstract": "While large language models (LLMs) can answer many questions correctly, they can also hallucinate and give wrong answers. Wikidata, with its over 12 billion facts, can be used to ground LLMs to improve their factuality. \n\nThis paper presents WikiWebQuestions, a high-quality question answering benchmark for Wikidata. Ported over from WebQuestions for Freebase, it consists of real-world data with SPARQL annotation. \n\nThis paper presents a few-shot sequence-to-sequence semantic parser for Wikidata. We modify SPARQL to use the unique domain and property names instead of their IDs. We train the parser to use either the results from an entity linker or mentions in the query. We fine-tune LLaMA by adding the few-shot training data to that used to fine-tune Alpaca. \n\nOur experimental results demonstrate the effectiveness of this methodology, establishing a strong baseline of 76% and 65% answer accuracy in the dev and test sets of WikiWebQuestions, respectively. By pairing our semantic parser with GPT-3, we combine verifiable results with qualified GPT-3 guesses to provide useful answers to 96% of the questions in dev. We also show that our method outperforms the state-of-the-art for the QALD-7 Wikidata dataset by 3.6% in F1 score.", "keywords": "knowledge base question answering;Wikidata;semantic parsing;large language models", "primary_area": "", "supplementary_material": "", "author": "Silei Xu;Shicheng Liu;Theo Culhane;Elizaveta Pertseva;Meng-Hsi Wu;Sina Semnani;Monica Lam", "authorids": "~Silei_Xu1;~Shicheng_Liu2;~Theo_Culhane1;~Elizaveta_Pertseva1;~Meng-Hsi_Wu1;~Sina_Semnani1;~Monica_Lam1", "gender": "M;;;F;M;M;F", "homepage": "https://web.stanford.edu/~silei/;https://george1459.github.io/;;https://limpa105.github.io/index.html;https://www.linkedin.com/in/jake-wu-75935979/;https://s-jse.com;https://cs.stanford.edu/~lam/", "dblp": "143/9693;;;;;274/1427;l/MonicaSLam", "google_scholar": "jO8qUfcAAAAJ;Qu38o2EAAAAJ;;;;ECn_7SYAAAAJ;4hS0jZ8AAAAJ", "or_profile": "~Silei_Xu1;~Shicheng_Liu2;~Theo_Culhane1;~Elizaveta_Pertseva1;~Meng-Hsi_Wu1;~Sina_Semnani1;~Monica_Lam1", "aff": "Stanford University;Stanford University;Computer Science Department, Stanford University;Stanford University;;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;cs.stanford.edu;stanford.edu;;stanford.edu;stanford.edu", "position": "PhD student;PhD student;MS student;PhD student;;PhD student;Full Professor", "bibtex": "@inproceedings{\nxu2023finetuned,\ntitle={Fine-tuned {LLM}s Know More, Hallucinate Less with Few-Shot Sequence-to-Sequence Semantic Parsing over Wikidata},\nauthor={Silei Xu and Shicheng Liu and Theo Culhane and Elizaveta Pertseva and Meng-Hsi Wu and Sina Semnani and Monica Lam},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fEuslEGN0j}\n}", "github": "", "project": "", "reviewers": "2RKA;CHSV;1ArN", "site": "https://openreview.net/forum?id=fEuslEGN0j", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "3;4;4", "reproducibility": "4;5;3", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-2697-1916;;;;0000-0002-1472-5788;", "linkedin": ";;theo-culhane-64b5658b/;;;sina-semnani;lammonica/", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "fG6zH1LBHE", "title": "MediaHG: Rethinking Eye-catchy Features in Social Media Headline Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "An attractive blog headline on social media platforms can immediately grab readers and trigger more clicks. However, a good headline shall not only contract the main content but also be eye-catchy with domain platform features, which are decided by the website\u2019s users and objectives. With effective headlines, bloggers can obtain more site traffic and profits, while readers can have easier access to topics of interest. In this paper, we propose a disentanglement-based headline generation model: MediaHG (Social Media Headline Generation), which can balance the content and contextual features. Specifically, we first devise a sample module for various document views and generate the corresponding headline candidates. Then, we incorporate contrastive learning and auxiliary multi-task to choose the best domain-suitable headline, according to the disentangled budgets. Besides, our separated processing gains more flexible adaptation for other headline generation tasks with special domain features. Our model is built from the content and headlines of 70k hot posts collected from REDBook, a Chinese social media platform for daily sharing. Experimental results with language metrics ROUGE and human evaluation show the improvement in the headline generation task for the platform.", "keywords": "headline generation\uff0cstyle-content attractiveness\uff0csocial media", "primary_area": "", "supplementary_material": "", "author": "Boning Zhang;Yang Yang", "authorids": "~Boning_Zhang3;~Yang_Yang35", "gender": "F;M", "homepage": "https://baidu.com;http://yangy.org", "dblp": ";", "google_scholar": ";", "or_profile": "~Boning_Zhang3;~Yang_Yang35", "aff": "Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nzhang2023mediahg,\ntitle={Media{HG}: Rethinking Eye-catchy Features in Social Media Headline Generation},\nauthor={Boning Zhang and Yang Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fG6zH1LBHE}\n}", "github": "", "project": "", "reviewers": "r8GS;miJc;zt61", "site": "https://openreview.net/forum?id=fG6zH1LBHE", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;4", "excitement": "4;3;4", "reproducibility": "3;3;3", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-5058-4417", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "fK6N4R6TpF", "title": "NERvous About My Health: Constructing a Bengali Medical Named Entity Recognition Dataset", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "The ability to identify important entities in a text, known as Named Entity Recognition (NER), is useful in a large variety of downstream tasks in the biomedical domain. This is a considerably difficult task when working with Consumer Health Questions (CHQs), which consist of informal language used in day-to-day life by patients. These difficulties are amplified in the case of Bengali, which allows for a huge amount of flexibility in sentence structures and has significant variances in regional dialects. Unfortunately, the complexity of the language is not accurately reflected in the limited amount of available data, which makes it difficult to build a reliable decision-making system. To address the scarcity of data, this paper presents 'Bangla-HealthNER', a comprehensive dataset designed to identify named entities in health-related texts in the Bengali language. It consists of 31,783 samples sourced from a popular online public health platform, which allows it to capture the diverse range of linguistic styles and dialects used by native speakers from various regions in their day-to-day lives. The insight into this diversity in language will prove useful to any medical decision-making systems that are developed for use in real-world applications. To highlight the difficulty of the dataset, it has been benchmarked on state-of-the-art token classification models, where BanglishBERT achieved the highest performance with an F1-score of $56.13 \\pm 0.75$%. The dataset and all relevant code used in this work have been made publicly available.", "keywords": "Named Entity Recognition;Natural Language Processing;Consumer Health", "primary_area": "", "supplementary_material": "", "author": "Alvi Aveen Khan;Fida Kamal;Nuzhat Nower;Tasnim Ahmed;Sabbir Ahmed;Tareque Mohmud Chowdhury", "authorids": "~Alvi_Aveen_Khan1;~Fida_Kamal1;~Nuzhat_Nower1;~Tasnim_Ahmed1;~Sabbir_Ahmed1;~Tareque_Mohmud_Chowdhury1", "gender": ";F;F;M;M;M", "homepage": ";;;https://tasnim7ahmed.github.io/;https://ggck43.github.io/;http://cse.iutoic-dhaka.edu/profile/tareque", "dblp": ";;;;;", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;0fNsDKwAAAAJ;l_Dk4ZoAAAAJ;b0BdXHQAAAAJ", "or_profile": "~Alvi_Aveen_Khan1;~Fida_Kamal1;~Nuzhat_Nower1;~Tasnim_Ahmed1;~Sabbir_Ahmed1;~Tareque_Mohmud_Chowdhury1", "aff": ";Islamic University of Technology;Islamic University of Technology;Queen's University;Islamic University of Technology;Islamic University of Technology", "aff_domain": ";iutoic-dhaka.edu;iutoic-dhaka.edu;queensu.ca;iutoic-dhaka.edu;iutoic-dhaka.edu", "position": ";Undergrad student;Undergrad student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nkhan2023nervous,\ntitle={{NER}vous About My Health: Constructing a Bengali Medical Named Entity Recognition Dataset},\nauthor={Alvi Aveen Khan and Fida Kamal and Nuzhat Nower and Tasnim Ahmed and Sabbir Ahmed and Tareque Mohmud Chowdhury},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fK6N4R6TpF}\n}", "github": "", "project": "", "reviewers": "NwnB;V5Uk;LNMK", "site": "https://openreview.net/forum?id=fK6N4R6TpF", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;5;4", "excitement": "4;4;3", "reproducibility": "4;3;3", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-4820-5809;;;0000-0001-5928-4886;", "linkedin": ";;nuzhat-nower-236870233/;;sabbirahmediut/;", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Islamic University of Technology;Queen's University", "aff_unique_dep": ";", "aff_unique_url": "https://www.iut-dhaka.edu.bd;https://www.queensu.ca", "aff_unique_abbr": "IUT;Queen's", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Bangladesh;Canada" }, { "id": "fL8AKDvELp", "title": "HyperRouter: Towards Efficient Training and Inference of Sparse Mixture of Experts", "track": "main", "status": "Short Main", "tldr": "", "abstract": "By routing input tokens to only a few split experts, Sparse Mixture-of-Experts has enabled efficient training of large language models. Recent findings suggest that fixing the routers can achieve competitive performance by alleviating the collapsing problem, where all experts eventually learn similar representations. However, this strategy has two key limitations: (i) the policy derived from random routers might be sub-optimal, and (ii) it requires extensive resources during training and evaluation, leading to limited efficiency gains. This work introduces \\texttt{HyperRouter}, which dynamically generates the router's parameters through a fixed hypernetwork and trainable embeddings to achieve a balance between training the routers and freezing them to learn an improved routing policy. Extensive experiments across a wide range of tasks demonstrate the superior performance and efficiency gains of \\texttt{HyperRouter} compared to existing routing methods. Our implementation is publicly available at {\\url{{https://github.com/giangdip2410/HyperRouter}}}.", "keywords": "Sparse mixture of experts;hypernetwork;efficient training of LLMs;large language models", "primary_area": "", "supplementary_material": "", "author": "Truong Giang Do;Le Huy Khiem;Quang Pham;TrungTin Nguyen;Thanh-Nam Doan;Binh T. Nguyen;Chenghao Liu;Savitha Ramasamy;Xiaoli Li;Steven HOI", "authorids": "~Truong_Giang_Do1;~Le_Huy_Khiem1;~Quang_Pham1;~TrungTin_Nguyen1;~Thanh-Nam_Doan1;~Binh_T._Nguyen1;~Chenghao_Liu1;~Savitha_Ramasamy1;~Xiaoli_Li1;~Steven_HOI1", "gender": "M;M;M;M;;M;M;F;M;M", "homepage": "https://github.com/giangdip2410;http://lhkhiem28.github.io;https://sites.google.com/view/quangpham93;https://trung-tinnguyen.github.io/;https://tndoan.com/;https://sites.google.com/site/ntbinhpolytechnique/;;;https://personal.ntu.edu.sg/xlli/;https://www.smu.edu.sg/faculty/profile/110831/Steven-HOI", "dblp": "338/9002;;81/8316;275/3643;160/1537;06/2545;;07/11214;l/XiaoliLi.html;h/StevenCHHoi", "google_scholar": ";https://scholar.google.com/citations?hl=vi;https://scholar.google.com.sg/citations?user=WC7Bu_kAAAAJ;NhiJDJsAAAAJ;CtZurGMAAAAJ;dXEb3PMAAAAJ;https://scholar.google.com/citations?hl=en;SLQ1lxgAAAAJ;E3yQKloAAAAJ;https://scholar.google.com.tw/citations?user=JoLjflYAAAAJ", "or_profile": "~Truong_Giang_Do1;~Le_Huy_Khiem1;~Quang_Pham1;~TrungTin_Nguyen1;~Thanh-Nam_Doan1;~Binh_T._Nguyen1;~Chenghao_Liu1;~Savitha_Ramasamy1;~Xiaoli_Li1;~Steven_HOI1", "aff": "University of Tennessee at Chattanooga;VinUniversity;A*STAR;INRIA;University of Tennessee at Chattanooga;Ho Chi Minh city University of Science, Vietnam National University;Salesforce AI Research;Institute for Infocomm Research, Agency for Science, Technology and Research, Singapore;A*STAR;Singapore Management University", "aff_domain": "utc.edu;vinuni.edu.vn;i2r.a-star.edu.sg;inria.fr;utc.edu;hcmus.edu.vn;salesforce.com;i2r.a-star.edu.sg;a-star.edu.sg;", "position": "MS student;Researcher;Researcher;Postdoc;Researcher;Associate Professor;Researcher;Principal Researcher;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\ndo2023hyperrouter,\ntitle={HyperRouter: Towards Efficient Training and Inference of Sparse Mixture of Experts},\nauthor={Truong Giang Do and Le Huy Khiem and Quang Pham and TrungTin Nguyen and Thanh-Nam Doan and Binh T. Nguyen and Chenghao Liu and Savitha Ramasamy and Xiaoli Li and Steven HOI},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fL8AKDvELp}\n}", "github": "", "project": "", "reviewers": "wUFz;1481;EK3o", "site": "https://openreview.net/forum?id=fL8AKDvELp", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "4;4;3", "reproducibility": "4;4;3", "correctness": "3;3;2", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 10, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-8625-6864;;0000-0001-8433-5980;;0000-0001-5249-9702;;0000-0003-1534-2989;0000-0002-0762-6562;", "linkedin": ";lhkhiem28;;trungtinnguyen0/;;;chenghao-liu-40a62a56/;;li-xiaoli-41027ba/;", "aff_unique_index": "0;1;2;3;0;4;5;2;2;6", "aff_unique_norm": "University of Tennessee;VinUniversity;Agency for Science, Technology and Research;INRIA;Ho Chi Minh City University of Science;Salesforce;Singapore Management University", "aff_unique_dep": ";;;;;Salesforce AI Research;", "aff_unique_url": "https://www.utc.edu;https://vinuni.edu.vn;https://www.a-star.edu.sg;https://www.inria.fr;;https://www.salesforce.com;https://www.smu.edu.sg", "aff_unique_abbr": "UT Chattanooga;VinUni;A*STAR;INRIA;;Salesforce AI;SMU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Chattanooga;", "aff_country_unique_index": "0;1;2;3;0;1;0;2;2;2", "aff_country_unique": "United States;Vietnam;Singapore;France" }, { "id": "fLJVvFGFEE", "title": "Representativeness as a Forgotten Lesson for Multilingual and Code-switched Data Collection and Preparation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Multilingualism is widespread around the world and code-switching (CSW) is a common practice among different language pairs/tuples across locations and regions. However, there is still not much progress in building successful CSW systems, despite the recent advances in Massive Multilingual Language Models (MMLMs). We investigate the reasons behind this setback through a critical study about the existing CSW data sets (68) across language pairs in terms of the collection and preparation (e.g. transcription and annotation) stages. This in-depth analysis reveals that \\textbf{a)} most CSW data involves English ignoring other language pairs/tuples \\textbf{b)} there are flaws in terms of representativeness in data collection and preparation stages due to ignoring the location based, socio-demographic and register variation in CSW. In addition, lack of clarity on the data selection and filtering stages shadow the representativeness of CSW data sets. We conclude by providing a short check-list to improve the representativeness for forthcoming studies involving CSW data collection and preparation.", "keywords": "Multilingualism;Code-switching;representativeness;data preparation;annotation;transcription", "primary_area": "", "supplementary_material": "", "author": "A. Seza Do\u011fru\u00f6z;Sunayana Sitaram;Zheng Xin Yong", "authorids": "~A._Seza_Do\u011fru\u00f6z1;~Sunayana_Sitaram1;~Zheng_Xin_Yong1", "gender": ";F;M", "homepage": ";https://www.microsoft.com/en-us/research/people/susitara/;https://yongzx.github.io", "dblp": ";27/7642;266/0855", "google_scholar": ";PUxwYrkAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~A._Seza_Do\u011fru\u00f6z1;~Sunayana_Sitaram1;~Zheng_Xin_Yong1", "aff": ";Microsoft;Brown University", "aff_domain": ";microsoft.com;brown.edu", "position": ";Researcher;PhD student", "bibtex": "@inproceedings{\ndo{\\u{g}}ru{\\\"o}z2023representativeness,\ntitle={Representativeness as a Forgotten Lesson for Multilingual and Code-switched Data Collection and Preparation},\nauthor={A. Seza Do{\\u{g}}ru{\\\"o}z and Sunayana Sitaram and Zheng Xin Yong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fLJVvFGFEE}\n}", "github": "", "project": "", "reviewers": "5PvD;sZBB;65nB", "site": "https://openreview.net/forum?id=fLJVvFGFEE", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;4;4", "excitement": "3;4;4", "reproducibility": "4;5;4", "correctness": "4;4;4", "rating_avg": 2.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;1", "aff_unique_norm": "Microsoft;Brown University", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://www.brown.edu", "aff_unique_abbr": "Microsoft;Brown", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "fM7x9Lvb9r", "title": "Controllable Contrastive Generation for Multilingual Biomedical Entity Linking", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Multilingual biomedical entity linking (MBEL) aims to map language-specific mentions in the biomedical text to standardized concepts in a multilingual knowledge base (KB) such as Unified Medical Language System (UMLS). In this paper, we propose Con2GEN, a prompt-based controllable contrastive generation framework for MBEL, which summarizes multidimensional information of the UMLS concept mentioned in biomedical text into a natural sentence following a predefined template. Instead of tackling the MBEL problem with a discriminative classifier, we formulate it as a sequence-to-sequence generation task, which better exploits the shared dependencies between source mentions and target entities. Moreover, Con2GEN matches against UMLS concepts in as many languages and types as possible, hence facilitating cross-information disambiguation. Extensive experiments show that our model achieves promising performance improvements compared with several state-of-the-art techniques on the XL-BEL and the Mantra GSC datasets spanning 12 typologically diverse languages.", "keywords": "Multilingual Biomedical Entity Linking;Controllable Generation;Contrastive Learning", "primary_area": "", "supplementary_material": "", "author": "Tiantian Zhu;Yang Qin;Qingcai Chen;Xin Mu;Changlong Yu;Yang Xiang", "authorids": "~Tiantian_Zhu1;~Yang_Qin5;~Qingcai_Chen2;~Xin_Mu1;~Changlong_Yu1;~Yang_Xiang4", "gender": "F;F;M;M;M;M", "homepage": "https://bdsc.szu.edu.cn/teachers/research/2bbc5636-8880-4f93-992d-25d0057c8e8d;http://cs.hitsz.edu.cn/info/1021/1884.htm;http://faculty.hitsz.edu.cn/chenqingcai1;;;", "dblp": ";;15/1052;180/5685;76/238;50/2192-3", "google_scholar": "LJ8M5ewAAAAJ;;7aR5D4sAAAAJ;flijGTkAAAAJ;;zDyL-NoAAAAJ", "or_profile": "~Tiantian_Zhu1;~Yang_Qin5;~Qingcai_Chen2;~Xin_Mu1;~Changlong_Yu1;~Yang_Xiang4", "aff": "Harbin Institute of Technology (Shenzhen);Harbin Institute of Technology;Harbin Institute of Technology (Shenzhen);Peng Cheng Laboratory;Department of Computer Science and Engineering, The Hong Kong University of Science and Technology;Peng Cheng Laboratory", "aff_domain": "hit.edu.cn;hit.edu.cn;hit.edu.cn;pcl.ac.cn;cse.ust.hk;pcl.ac", "position": "PhD student;Associate Professor;Full Professor;Researcher;PhD student;Researcher", "bibtex": "@inproceedings{\nzhu2023controllable,\ntitle={Controllable Contrastive Generation for Multilingual Biomedical Entity Linking},\nauthor={Tiantian Zhu and Yang Qin and Qingcai Chen and Xin Mu and Changlong Yu and Yang Xiang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fM7x9Lvb9r}\n}", "github": "", "project": "", "reviewers": "iTH6;LBDQ;F782;4M3G", "site": "https://openreview.net/forum?id=fM7x9Lvb9r", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;4;3;4", "excitement": "3;4;4;3", "reproducibility": "4;3;3;3", "correctness": "3;4;3;3", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 3.5, "reproducibility_avg": 3.25, "correctness_avg": 3.25, "replies_avg": 13, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-5470-8309;;;;;0000-0003-1395-6805", "linkedin": ";;;;;yang-xiang-7554b6195/", "aff_unique_index": "0;0;0;1;2;1", "aff_unique_norm": "Harbin Institute of Technology;Pengcheng Laboratory;Hong Kong University of Science and Technology", "aff_unique_dep": ";Peng Cheng Laboratory;Department of Computer Science and Engineering", "aff_unique_url": "http://en.hhit.edu.cn/;http://www.pcl.ac.cn;https://www.ust.hk", "aff_unique_abbr": "HIT;PCL;HKUST", "aff_campus_unique_index": "0;1;0;3", "aff_campus_unique": "Shenzhen;Harbin;;Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "fNi7eet4Qc", "title": "Prompt-Based Editing for Text Style Transfer", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Prompting approaches have been recently explored in text style transfer, where a textual prompt is used to query a pretrained language model (PLM) to generate style-transferred texts word by word in an autoregressive manner. However, such a generation process is less controllable and early prediction errors may affect future word predictions. In this paper, we propose a prompt-based editing approach to text style transfer. Specifically, we prompt a PLM for style classification and use the classification probability to compute a style score. Then, we perform discrete search with word-level editing to maximize a comprehensive scoring function for the style-transfer task. In this way, we transform a prompt-based generation problem into a classification one, which does not suffer from the error accumulation problem and is more controllable than the autoregressive generation of sentences. In our experiments, we performed both automatic and human evaluation on three style-transfer benchmark datasets, and show that our approach largely outperforms the existing systems that have 20 times more parameters. Additional empirical analyses further demonstrate the effectiveness of our approach.", "keywords": "Text style transfer; Prompt-based editing; Large language models", "primary_area": "", "supplementary_material": "", "author": "Guoqing Luo;Yu Tong Han;Lili Mou;Mauajama Firdaus", "authorids": "~Guoqing_Luo1;~Yu_Tong_Han1;~Lili_Mou1;~Mauajama_Firdaus1", "gender": "M;F;M;F", "homepage": ";;https://lili-mou.github.io/;", "dblp": "275/8764;;;223/8272", "google_scholar": "ggVTvKoAAAAJ;;https://scholar.google.com.hk/schhp?hl=en;https://scholar.google.co.in/citations?user=nVmB914AAAAJ", "or_profile": "~Guoqing_Luo1;~Yu_Tong_Han1;~Lili_Mou1;~Mauajama_Firdaus1", "aff": "University of Alberta;University of Alberta;University of Alberta;University of Alberta", "aff_domain": "ualberta.ca;ualberta.ca;ualberta.ca;ualberta.ca", "position": "MS student;MS student;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nluo2023promptbased,\ntitle={Prompt-Based Editing for Text Style Transfer},\nauthor={Guoqing Luo and Yu Tong Han and Lili Mou and Mauajama Firdaus},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fNi7eet4Qc}\n}", "github": "", "project": "", "reviewers": "bmvG;xk33;PLiK", "site": "https://openreview.net/forum?id=fNi7eet4Qc", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;5;5", "excitement": "4;3;3", "reproducibility": "4;5;4", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 4.666666666666667, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-4097-5051;;0000-0001-7485-5974", "linkedin": "guoqing-luo-5a2657198/;zorina-han-87ab48152/;;mauajama-firdaus-9b577a16a/", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Alberta", "aff_unique_dep": "", "aff_unique_url": "https://www.ualberta.ca", "aff_unique_abbr": "UAlberta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Canada" }, { "id": "fNlSVIsbIT", "title": "HoneyBee: Progressive Instruction Finetuning of Large Language Models for Materials Science", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We propose an instruction-based process for trustworthy data curation in materials science (MatSci-Instruct), which we then apply to finetune a LLaMa-based language model targeted for materials science (HoneyBee). MatSci-Instruct helps alleviate the scarcity of relevant, high-quality materials science textual data available in the open literature, and HoneyBee is the first billion-parameter language model specialized to materials science. In MatSci-Instruct we improve the trustworthiness of generated data by prompting multiple commercially available large language models for generation with an Instructor module (e.g. Chat-GPT) and verification from an independent Verifier module (e.g. Claude). Using MatSci-Instruct, we construct a dataset of multiple tasks and measure the quality of our dataset along multiple dimensions, including accuracy against known facts, relevance to materials science, as well as completeness and reasonableness of the data. Moreover, we iteratively generate more targeted instructions and instruction-data in a finetuning-evaluation-feedback loop leading to progressively better performance for our finetuned HoneyBee models. Our evaluation on the MatSci-NLP benchmark shows HoneyBee's outperformance of existing language models on materials science tasks and iterative improvement in successive stages of instruction-data refinement. We study the quality of HoneyBee's language modeling through automatic evaluation and analyze case studies to further understand the model's capabilities and limitations. Our code and relevant datasets are publicly available at https://github.com/BangLab-UdeM-Mila/NLP4MatSci-HoneyBee.", "keywords": "materials science;LLaMa;instructions based finetuning;progressive finetuning;feedback based instructions", "primary_area": "", "supplementary_material": "", "author": "Yu Song;Santiago Miret;Huan Zhang;Bang Liu", "authorids": "~Yu_Song4;~Santiago_Miret1;~Huan_Zhang11;~Bang_Liu1", "gender": "M;M;F;M", "homepage": "https://sites.google.com/view/yusong/home;https://www.intel.ai/bio/santiago-miret/;;http://www-labs.iro.umontreal.ca/~liubang/", "dblp": "54/1216-5;241/5030;;", "google_scholar": ";HLQ_te4AAAAJ;;lmfAnP4AAAAJ", "or_profile": "~Yu_Song4;~Santiago_Miret1;~Huan_Zhang11;~Bang_Liu1", "aff": ";Intel;University of Waterloo;University of Montreal", "aff_domain": ";intel.com;uwaterloo.ca;umontreal.ca", "position": ";Researcher;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nsong2023honeybee,\ntitle={HoneyBee: Progressive Instruction Finetuning of Large Language Models for Materials Science},\nauthor={Yu Song and Santiago Miret and Huan Zhang and Bang Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fNlSVIsbIT}\n}", "github": "", "project": "", "reviewers": "ztYe;RMWP;1nqS", "site": "https://openreview.net/forum?id=fNlSVIsbIT", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;3;3", "reproducibility": "3;2;2", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-5121-3853;0009-0001-0373-8241;0000-0002-9483-8984", "linkedin": ";santiago-miret/;;bang-liu-12b66789/?originalSubdomain=ca", "aff_unique_index": "0;1;2", "aff_unique_norm": "Intel;University of Waterloo;University of Montreal", "aff_unique_dep": "Intel Corporation;;", "aff_unique_url": "https://www.intel.com;https://uwaterloo.ca;https://wwwumontreal.ca", "aff_unique_abbr": "Intel;UW;UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;Canada" }, { "id": "fONyQKyvsY", "title": "Analysing State-Backed Propaganda Websites: a New Dataset and Linguistic Study", "track": "main", "status": "Short Main", "tldr": "", "abstract": "This paper analyses two hitherto unstudied sites sharing state-backed disinformation, Reliable Recent News (rrn.world) and WarOnFakes (waronfakes.com), which publish content in Arabic, Chinese, English, French, German, and Spanish. We describe our content acquisition methodology and perform cross-site unsupervised topic clustering on the resulting multilingual dataset. We also perform linguistic and temporal analysis of the web page translations and topics over time, and investigate articles with false publication dates. We make publicly available this new dataset of 14,053 articles, annotated with each language version, and additional metadata such as links and images. The main contribution of this paper for the NLP community is in the novel dataset which enables studies of disinformation networks, and the training of NLP tools for disinformation detection.", "keywords": "misinformation;disinformation;social media;dataset;social science", "primary_area": "", "supplementary_material": "", "author": "Freddy Heppell;Kalina Bontcheva;Carolina Scarton", "authorids": "~Freddy_Heppell1;~Kalina_Bontcheva2;~Carolina_Scarton1", "gender": "M;F;", "homepage": "https://freddyheppell.com;;https://carolscarton.github.io", "dblp": "342/7278;https://dblp.uni-trier.de/pid/b/KalinaBontcheva.html;23/8672", "google_scholar": ";https://scholar.google.co.uk/citations?user=kUbDCnMAAAAJ;e6YOuiQAAAAJ", "or_profile": "~Freddy_Heppell1;~Kalina_Bontcheva2;~Carolina_Scarton1", "aff": "University of Sheffield;University of Sheffield;University of Sheffield", "aff_domain": "sheffield.ac.uk;shef.ac.uk;sheffield.ac.uk", "position": "PhD student;Full Professor;Lecturer", "bibtex": "@inproceedings{\nheppell2023analysing,\ntitle={Analysing State-Backed Propaganda Websites: a New Dataset and Linguistic Study},\nauthor={Freddy Heppell and Kalina Bontcheva and Carolina Scarton},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fONyQKyvsY}\n}", "github": "", "project": "", "reviewers": "q5Fz;Rv3e;Bm7V", "site": "https://openreview.net/forum?id=fONyQKyvsY", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;2", "excitement": "4;4;4", "reproducibility": "4;5;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0003-7241-5846;0000-0001-6152-9600;0000-0002-0103-4072", "linkedin": ";;carolina-scarton/", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Sheffield", "aff_unique_dep": "", "aff_unique_url": "https://www.sheffield.ac.uk", "aff_unique_abbr": "Sheffield", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "fOoZipX9z3", "title": "Grounding Visual Illusions in Language: Do Vision-Language Models Perceive Illusions Like Humans?", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Vision-Language Models (VLMs) are trained on vast amounts of data captured by humans emulating our understanding of the world. However, known as visual illusions, human's perception of reality isn't always faithful to the physical world. This raises a key question: do VLMs have the similar kind of illusions as humans do, or do they faithfully learn to represent reality? To investigate this question, we build a dataset containing five types of visual illusions and formulate four tasks to examine visual illusions in state-of-the-art VLMs. Our findings have shown that although the overall alignment is low, larger models are closer to human perception and more susceptible to visual illusions. Our dataset and initial findings will promote a better understanding of visual illusions in humans and machines and provide a stepping stone for future computational models that can better align humans and machines in perceiving and communicating about the shared visual world. The code and data are available at [github.com/vl-illusion/dataset](https://github.com/vl-illusion/dataset).", "keywords": "vision-language model;visual illusion;grounding", "primary_area": "", "supplementary_material": "", "author": "Yichi Zhang;Jiayi Pan;Yuchen Zhou;Rui Pan;Joyce Chai", "authorids": "~Yichi_Zhang1;~Jiayi_Pan1;~Yuchen_Zhou5;~Rui_Pan6;~Joyce_Chai2", "gender": "M;M;F;F;F", "homepage": "https://594zyc.github.io/;https://www.jiayipan.me/;;https://www.kellogg.northwestern.edu/research/science-of-science/faculty-and-contributors.aspx;https://web.eecs.umich.edu/~chaijy/", "dblp": "86/7054-1;39/6476-2;;;c/JoyceYChai", "google_scholar": "xkBBhY8AAAAJ;n9Y_sQEAAAAJ;;;", "or_profile": "~Yichi_Zhang1;~Jiayi_Pan1;~Yuchen_Zhou5;~Rui_Pan6;~Joyce_Y_Chai1", "aff": "University of Michigan;Shanghai Jiaotong University;Shanghai Jiaotong University;Northwestern University;University of Michigan", "aff_domain": "umich.edu;sjtu.edu.cn;sjtu.edu.cn;northwestern.edu;umich.edu", "position": "PhD student;Undergrad student;Undergrad student;PhD student;Full Professor", "bibtex": "@inproceedings{\nzhang2023grounding,\ntitle={Grounding Visual Illusions in Language: Do Vision-Language Models Perceive Illusions Like Humans?},\nauthor={Yichi Zhang and Jiayi Pan and Yuchen Zhou and Rui Pan and Joyce Chai},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fOoZipX9z3}\n}", "github": "", "project": "", "reviewers": "Zh5s;EWKP;2Xp9", "site": "https://openreview.net/forum?id=fOoZipX9z3", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;3", "excitement": "4;4;4", "reproducibility": "4;5;4", "correctness": "5;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.333333333333333, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-3214-1070;0000-0003-0817-4083;;;0000-0002-9658-2230", "linkedin": "yichi-zhang-354a83128/;;yuchen-zhou-b14864225;;", "aff_unique_index": "0;1;1;2;0", "aff_unique_norm": "University of Michigan;Shanghai Jiao Tong University;Northwestern University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.umich.edu;https://www.sjtu.edu.cn;https://www.northwestern.edu", "aff_unique_abbr": "UM;SJTU;NU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "United States;China" }, { "id": "fRpif5Sflc", "title": "Rumor Detection on Social Media with Crowd Intelligence and ChatGPT-Assisted Networks", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In the era of widespread dissemination through social media, the task of rumor detection plays a pivotal role in establishing a trustworthy and reliable information environment. Nonetheless, existing research on rumor detection confronts several challenges: the limited expressive power of text encoding sequences, difficulties in domain knowledge coverage and effective information extraction with knowledge graph-based methods, and insufficient mining of semantic structural information. To address these issues, we propose a Crowd Intelligence and ChatGPT-Assisted Network(CICAN) for rumor classification. Specifically, we present a crowd intelligence-based semantic feature learning module to capture textual content's sequential and hierarchical features. Then, we design a knowledge-based semantic structural mining module that leverages ChatGPT for knowledge enhancement. Finally, we construct an entity-sentence heterogeneous graph and design Entity-Aware Heterogeneous Attention to effectively integrate diverse structural information meta-paths. Experimental results demonstrate that CICAN achieves performance improvement in rumor detection tasks, validating the effectiveness and rationality of using large language models as auxiliary tools.", "keywords": "Rumor Detection; Crowd Intelligence; Large Language Model; Heterogeneous Graph; Semantic Feature Learning", "primary_area": "", "supplementary_material": "", "author": "Chang Yang;Peng Zhang;Wenbo Qiao;Hui Gao;JiaMing Zhao", "authorids": "~Chang_Yang4;~Peng_Zhang17;~Wenbo_Qiao1;~Hui_Gao4;~JiaMing_Zhao1", "gender": ";M;;F;M", "homepage": ";http://cic.tju.edu.cn/faculty/zhangpeng/index.html;;https://github.com/TJUIRLAB/SIGIR20_QINM;https://github.com/GGorMM1", "dblp": ";21/1048-2%20;;;170/2589", "google_scholar": ";tvDb5_cAAAAJ;;;", "or_profile": "~Chang_Yang4;~Peng_Zhang17;~Wenbo_Qiao1;~Hui_Gao4;~JiaMing_Zhao1", "aff": ";Tianjin University;;Tianjin University;Tianjin University", "aff_domain": ";tju.edu.cn;;tju.edu.cn;tju.edu.cn", "position": ";Full Professor;;PhD student;MS student", "bibtex": "@inproceedings{\nyang2023rumor,\ntitle={Rumor Detection on Social Media with Crowd Intelligence and Chat{GPT}-Assisted Networks},\nauthor={Chang Yang and Peng Zhang and Wenbo Qiao and Hui Gao and JiaMing Zhao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fRpif5Sflc}\n}", "github": "", "project": "", "reviewers": "2v6J;oyJe;6ht6", "site": "https://openreview.net/forum?id=fRpif5Sflc", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;4;3", "reproducibility": "5;4;3", "correctness": "5;4;3", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-0228-9330;;;0009-0003-9613-4774", "linkedin": ";;;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Tianjin University", "aff_unique_dep": "", "aff_unique_url": "http://www.tju.edu.cn", "aff_unique_abbr": "TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "fXyoHAVffT", "title": "Unsupervised Candidate Answer Extraction through Differentiable Masker-Reconstructor Model", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Question generation is a widely used data augmentation approach with extensive applications, and extracting qualified candidate answers from context passages is a critical step for most question generation systems. However, existing methods for candidate answer extraction are reliant on linguistic rules or annotated data that face the partial annotation issue and challenges in generalization. To overcome these limitations, we propose a novel unsupervised candidate answer extraction approach that leverages the inherent structure of context passages through a Differentiable Masker-Reconstructor (DMR) Model with the enforcement of self-consistency for picking up salient information tokens. We curated two datasets with exhaustively-annotated answers and benchmark a comprehensive set of supervised and unsupervised candidate answer extraction methods. We demonstrate the effectiveness of the DMR model by showing its performance is superior among unsupervised methods and comparable to supervised methods.", "keywords": "Candidate Answer Extraction;Self-consitency Learning;Unsupervised Learning;Masker-Reconstructor Model", "primary_area": "", "supplementary_material": "", "author": "Zhuoer Wang;Yicheng Wang;Ziwei Zhu;James Caverlee", "authorids": "~Zhuoer_Wang1;~Yicheng_Wang1;~Ziwei_Zhu1;~James_Caverlee2", "gender": "M;M;M;M", "homepage": "https://edillower.github.io/;;https://zziwei.github.io/;https://people.engr.tamu.edu/caverlee/", "dblp": "276/1293;;159/9916;55/3697.html", "google_scholar": "bWd8-mEAAAAJ;https://scholar.google.com/citations?hl=en;3S6pM7wAAAAJ;LB1dq_sAAAAJ", "or_profile": "~Zhuoer_Wang1;~Yicheng_Wang1;~Ziwei_Zhu1;~James_Caverlee2", "aff": "Texas A&M University;Texas A&M;George Mason University;Google", "aff_domain": "tamu.edu;tamu.edu;gmu.edu;google.com", "position": "PhD student;PhD student;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nwang2023unsupervised,\ntitle={Unsupervised Candidate Answer Extraction through Differentiable Masker-Reconstructor Model},\nauthor={Zhuoer Wang and Yicheng Wang and Ziwei Zhu and James Caverlee},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fXyoHAVffT}\n}", "github": "", "project": "", "reviewers": "P2SC;HZLu;wx9Z", "site": "https://openreview.net/forum?id=fXyoHAVffT", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "3;3;3", "reproducibility": "4;4;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 9, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-3990-4774;0000-0001-8350-8528", "linkedin": ";;;", "aff_unique_index": "0;0;1;2", "aff_unique_norm": "Texas A&M University;George Mason University;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.tamu.edu;https://www.gmu.edu;https://www.google.com", "aff_unique_abbr": "TAMU;GMU;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "fbbbbfhAxC", "title": "Cross-lingual Transfer Can Worsen Bias in Sentiment Analysis", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Sentiment analysis (SA) systems are widely deployed in many of the world's languages, and there is well-documented evidence of demographic bias in these systems. In languages beyond English, scarcer training data is often supplemented with transfer learning using pre-trained models, including multilingual models trained on other languages. In some cases, even supervision data comes from other languages. Does cross-lingual transfer also import new biases? To answer this question, we use counterfactual evaluation to test whether gender or racial biases are imported when using cross-lingual transfer, compared to a monolingual transfer setting. Across five languages, we find that systems using cross-lingual transfer usually become more biased than their monolingual counterparts. We also find racial biases to be much more prevalent than gender biases. To spur further research on this topic, we release the sentiment models we used for this study, and the intermediate checkpoints throughout training, yielding 1,525 distinct models; we also release our evaluation code.", "keywords": "bias;fairness;multilingual;sentiment analysis;counterfactual;contrastive", "primary_area": "", "supplementary_material": "", "author": "Seraphina Goldfarb-Tarrant;Bj\u00f6rn Ross;Adam Lopez", "authorids": "~Seraphina_Goldfarb-Tarrant1;~Bj\u00f6rn_Ross1;~Adam_Lopez1", "gender": "F;M;Not Specified", "homepage": "http://seraphinatarrant.github.io;https://sweb.inf.ed.ac.uk/bross3/;https://alopez.github.io/", "dblp": "239/4080;194/2453.html;65/5274", "google_scholar": "ywWpui8AAAAJ;https://scholar.google.co.uk/citations?user=RQ2zK8QAAAAJ;https://scholar.google.co.uk/citations?user=u4sxKZwAAAAJ", "or_profile": "~Seraphina_Goldfarb-Tarrant1;~Bj\u00f6rn_Ross1;~Adam_Lopez1", "aff": "University of Edinburgh, University of Edinburgh;University of Edinburgh, University of Edinburgh;University of Edinburgh", "aff_domain": "ed.ac.uk;ed.ac.uk;ed.ac.uk", "position": "PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\ngoldfarb-tarrant2023crosslingual,\ntitle={Cross-lingual Transfer Can Worsen Bias in Sentiment Analysis},\nauthor={Seraphina Goldfarb-Tarrant and Bj{\\\"o}rn Ross and Adam Lopez},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fbbbbfhAxC}\n}", "github": "", "project": "", "reviewers": "hiFd;S7SY;F94e", "site": "https://openreview.net/forum?id=fbbbbfhAxC", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "4;4;4", "reproducibility": "3;5;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-2717-3705;0000-0002-1533-9424", "linkedin": ";;adam-lopez-nlp-researcher/", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Edinburgh", "aff_unique_dep": "", "aff_unique_url": "https://www.ed.ac.uk", "aff_unique_abbr": "Edinburgh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "feiAVaSXdb", "title": "Measuring and Narrowing the Compositionality Gap in Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We investigate the ability of language models to perform compositional reasoning tasks where the overall solution depends on correctly composing the answers to sub-problems. We measure how often models can correctly answer all sub-problems but not generate the overall solution, a ratio we call the compositionality gap. We evaluate this ratio by asking multi-hop questions with answers that require composing multiple facts unlikely to have been observed together during pretraining. In the GPT-3 family of models, as model size increases we show that the single-hop question answering performance improves faster than the multi-hop performance does, therefore the compositionality gap does not decrease. This surprising result suggests that while more powerful models memorize and recall more factual knowledge, they show no corresponding improvement in their ability to perform this kind of compositional reasoning.\nWe then demonstrate how elicitive prompting (such as chain of thought) narrows the compositionality gap by reasoning explicitly instead of implicitly. We present a new method, self-ask, that further improves on chain of thought. In our method, the model explicitly asks itself (and then answers) follow-up questions before answering the initial question. We finally show that self-ask's structured prompting lets us easily plug in a search engine to answer the follow-up questions, which additionally improves accuracy.", "keywords": "language modeling;prompting;question answering;language model tool use", "primary_area": "", "supplementary_material": "", "author": "Ofir Press;Muru Zhang;Sewon Min;Ludwig Schmidt;Noah A. Smith;Mike Lewis", "authorids": "~Ofir_Press1;~Muru_Zhang1;~Sewon_Min1;~Ludwig_Schmidt1;~Noah_A._Smith2;~Mike_Lewis1", "gender": "M;M;F;M;M;M", "homepage": "https://ofir.io/about;https://nanami18.github.io/;https://www.sewonmin.com;http://people.csail.mit.edu/ludwigs/;;https://homes.cs.washington.edu/~nasmith/", "dblp": "185/0577;325/4648.html;203/9401;141/2720;19/6214;90/5204.html", "google_scholar": "LeHa8psAAAAJ;OJIXk7wAAAAJ;https://scholar.google.ca/citations?user=jU4IZs4AAAAJ;SWMKy70AAAAJ;SnQnQicAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Ofir_Press1;~Muru_Zhang1;~Sewon_Min1;~Ludwig_Schmidt1;~Mike_Lewis1;~Noah_Smith1", "aff": "University of Washington;University of Washington;Meta Facebook;Allen Institute for Artificial Intelligence;Facebook AI Research;Allen Institute for Artificial Intelligence", "aff_domain": "washington.edu;cs.washington.edu;fb.com;allenai.org;fb.com;allenai.org", "position": "PhD student;MS student;PhD student;Researcher;Research Scientist;Senior Director of NLP Research", "bibtex": "@inproceedings{\npress2023measuring,\ntitle={Measuring and Narrowing the Compositionality Gap in Language Models},\nauthor={Ofir Press and Muru Zhang and Sewon Min and Ludwig Schmidt and Noah A. Smith and Mike Lewis},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=feiAVaSXdb}\n}", "github": "", "project": "", "reviewers": "gDuZ;cRLS;9cAE", "site": "https://openreview.net/forum?id=feiAVaSXdb", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0002-2310-6380", "linkedin": ";muruzhang/;;ludwig-schmidt-87ba3612/;;", "aff_unique_index": "0;0;1;2;1;2", "aff_unique_norm": "University of Washington;Meta;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";Meta Platforms, Inc.;", "aff_unique_url": "https://www.washington.edu;https://meta.com;https://allenai.org", "aff_unique_abbr": "UW;Meta;AI2", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "fgQ7JQoBIM", "title": "C-STS: Conditional Semantic Textual Similarity", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Semantic textual similarity (STS) has been a cornerstone task in NLP that measures the degree of similarity between a pair of sentences, with applications in information retrieval, question answering, and embedding methods. However, it is an inherently ambiguous task, with the sentence similarity depending on the specific aspect of interest. We resolve this ambiguity by proposing a novel task called conditional STS (C-STS) which measures similarity conditioned on an aspect elucidated in natural language (hereon, condition). As an example, the similarity between the sentences \"The NBA player shoots a three-pointer.\" and \"A man throws a tennis ball into the air to serve.\" is higher for the condition \"The motion of the ball.\" (both upward) and lower for \"The size of the ball.\" (one large and one small). C-STS's advantages are two-fold: (1) it reduces the subjectivity and ambiguity of STS, and (2) enables fine-grained similarity evaluation using diverse conditions. C-STS contains almost 20,000 instances from diverse domains and we evaluate several state-of-the-art models to demonstrate that even the most performant fine-tuning and in-context learning models (GPT-4, Flan, SimCSE) find it challenging, with Spearman correlation scores of <50. We encourage the community to evaluate their models on C-STS to provide a more holistic view of semantic similarity and natural language understanding.", "keywords": "Conditional similarity;Sentence similarity;Dataset", "primary_area": "", "supplementary_material": "", "author": "Ameet Deshpande;Carlos E Jimenez;Howard Chen;Vishvak Murahari;Victoria Graf;Tanmay Rajpurohit;Ashwin Kalyan;Danqi Chen;Karthik R Narasimhan", "authorids": "~Ameet_Deshpande1;~Carlos_E_Jimenez1;~Howard_Chen1;~Vishvak_Murahari1;~Victoria_Graf1;~Tanmay_Rajpurohit1;~Ashwin_Kalyan6;~Danqi_Chen1;~Karthik_R_Narasimhan1", "gender": "M;M;M;F;M;F;M;M;M", "homepage": "https://www.carlosejimenez.com;https://howard50b.github.io/;https://vishvakmurahari.com/;;;https://www.cs.princeton.edu/~danqic/;http://www.karthiknarasimhan.com;https://ameet-1997.github.io;http://ashwinkalyan.com/", "dblp": "153/0588;06/2061;249/5621;348/6122.html;;87/7949;147/0322;220/4337;173/5217", "google_scholar": "Ue4wghAAAAAJ;wsNa_W4AAAAJ;Y_NYX7MAAAAJ;0arBo88AAAAJ;B4NztA8AAAAJ;sVR8ktkAAAAJ;euc0GX4AAAAJ;332L1coAAAAJ;KYHL9aIAAAAJ", "or_profile": "~Carlos_E_Jimenez1;~Howard_Chen1;~Vishvak_Murahari1;~Victoria_Graf1;~Tanmay_Rajpurohit1;~Danqi_Chen1;~Karthik_R_Narasimhan1;~Ameet_S_Deshpande1;~Ashwin_Kalyan_Vijayakumar1", "aff": "Princeton University;Princeton University;Princeton University;Princeton University;Independent Researcher;Princeton University;Princeton University;Princeton University;Allen Institute for Artificial Intelligence", "aff_domain": "princeton.edu;princeton.edu;princeton.edu;princeton.edu;tanmay.one;cs.princeton.edu;princeton.edu;princeton.edu;allenai.org", "position": "PhD student;PhD student;PhD student;Undergrad student;Researcher;Assistant Professor;Assistant Professor;PhD student;Research Scientist", "bibtex": "@inproceedings{\ndeshpande2023csts,\ntitle={C-{STS}: Conditional Semantic Textual Similarity},\nauthor={Ameet Deshpande and Carlos E Jimenez and Howard Chen and Vishvak Murahari and Victoria Graf and Tanmay Rajpurohit and Ashwin Kalyan and Danqi Chen and Karthik R Narasimhan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fgQ7JQoBIM}\n}", "github": "", "project": "", "reviewers": "L1HG;4jPR;cVEM", "site": "https://openreview.net/forum?id=fgQ7JQoBIM", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;3", "excitement": "4;4;4", "reproducibility": "4;4;5", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-9370-3909;;;;;;;;", "linkedin": ";;;;tanmay-rajpurohit-b13942125/;;;;", "aff_unique_index": "0;0;0;0;1;0;0;0;2", "aff_unique_norm": "Princeton University;Independent Researcher;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.princeton.edu;;https://allenai.org", "aff_unique_abbr": "Princeton;;AI2", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States;" }, { "id": "fhEkqMyvb0", "title": "Disentangling Structure and Style: Political Bias Detection in News by Inducing Document Hierarchy", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We address an important gap in detecting political bias in news articles. Previous works that perform document classification can be influenced by the writing style of each news outlet, leading to overfitting and limited generalizability. Our approach overcomes this limitation by considering both the sentence-level semantics and the document-level rhetorical structure, resulting in a more robust and style-agnostic approach to detecting political bias in news articles. We introduce a novel multi-head hierarchical attention model that effectively encodes the structure of long documents through a diverse ensemble of attention heads. While journalism follows a formalized rhetorical structure, the writing style may vary by news outlet. We demonstrate that our method overcomes this domain dependency and outperforms previous approaches for robustness and accuracy. Further analysis and human evaluation demonstrate the ability of our model to capture common discourse structures in journalism.", "keywords": "Natural Language Processing;NLP Applications;Political Bias;Journalism", "primary_area": "", "supplementary_material": "", "author": "Jiwoo Hong;Yejin Cho;Jiyoung Han;Jaemin Jung;James Thorne", "authorids": "~Jiwoo_Hong2;~Yejin_Cho2;~Jiyoung_Han1;~Jaemin_Jung1;~James_Thorne1", "gender": "M;F;F;M;", "homepage": ";https://github.com/bodhitrii?tab=repositories;;https://futures.kaist.ac.kr/en/?c=274;https://jamesthorne.com", "dblp": ";;;09/5571.html;204/1380", "google_scholar": "aj3mLdcAAAAJ;;https://scholar.google.co.kr/citations?user=kE8dZhEAAAAJ;https://scholar.google.com/citations?hl=ko;hao9RrgAAAAJ", "or_profile": "~Jiwoo_Hong2;~Yejin_Cho2;~Jiyoung_Han1;~Jaemin_Jung1;~James_Thorne1", "aff": "Sung Kyun Kwan University;Korea Advanced Institute of Science & Technology;;KAIST;KAIST", "aff_domain": "skku.edu;kaist.edu;;kaist.ac.kr;kaist.ac.kr", "position": "Undergrad student;MS student;;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nhong2023disentangling,\ntitle={Disentangling Structure and Style: Political Bias Detection in News by Inducing Document Hierarchy},\nauthor={Jiwoo Hong and Yejin Cho and Jiyoung Han and Jaemin Jung and James Thorne},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fhEkqMyvb0}\n}", "github": "", "project": "", "reviewers": "twSB;b9Zn;Nv3C", "site": "https://openreview.net/forum?id=fhEkqMyvb0", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;3;4", "excitement": "2;2;4", "reproducibility": "5;3;4", "correctness": "3;3;3", "rating_avg": 2.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-6771-4302;;0000-0002-4177-2889;0000-0002-6815-7763;", "linkedin": "jiwoohong09/;;;;", "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Sungkyunkwan University;Korea Advanced Institute of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.skku.edu;https://www.kaist.ac.kr", "aff_unique_abbr": "SKKU;KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "fi90p5364y", "title": "Generating Commonsense Counterfactuals for Stable Relation Extraction", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recent studies on counterfactual augmented data have achieved great success in the coarse-grained natural language processing tasks. However, existing methods encounter two major problems when dealing with the fine-grained relation extraction tasks. One is that they struggle to accurately identify causal terms under the invariant entity constraint. The other is that they ignore the commonsense constraint. To solve these problems, we propose a novel framework to generate commonsense counterfactuals for stable relation extraction. Specifically, to identify causal terms accurately, we introduce an intervention-based strategy and leverage a constituency parser for correction. To satisfy the commonsense constraint, we introduce the concept knowledge base WordNet and design a bottom-up relation expansion algorithm on it to uncover commonsense relations between entities. We conduct a series of comprehensive evaluations, including the low-resource, out-of-domain, and adversarial-attack settings. The results demonstrate that our framework significantly enhances the stability of base relation extraction models.", "keywords": "Counterfactual Data Augmentation;Commonsense-constrained Generation;Relation Extraction", "primary_area": "", "supplementary_material": "", "author": "Xin Miao;Yongqi Li;Tieyun Qian", "authorids": "~Xin_Miao4;~Yongqi_Li3;~Tieyun_Qian1", "gender": "M;M;", "homepage": "https://github.com/Double1203;https://liyongqi2002.github.io/;", "dblp": ";249/4156-2;17/5583", "google_scholar": ";2R_eMkkAAAAJ;MYTt4EwAAAAJ", "or_profile": "~Xin_Miao4;~Yongqi_Li3;~Tieyun_Qian1", "aff": "Wuhan University;Wuhan University;Wuhan University", "aff_domain": "whu.edu.cn;whu.edu.cn;whu.edu.cn", "position": "PhD student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nmiao2023generating,\ntitle={Generating Commonsense Counterfactuals for Stable Relation Extraction},\nauthor={Xin Miao and Yongqi Li and Tieyun Qian},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fi90p5364y}\n}", "github": "", "project": "", "reviewers": "Ck7v;Uqod;fvyK", "site": "https://openreview.net/forum?id=fi90p5364y", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;3", "excitement": "3;3;4", "reproducibility": "3;4;5", "correctness": "4;4;4", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-4667-5794", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Wuhan University", "aff_unique_dep": "", "aff_unique_url": "http://www.whu.edu.cn/", "aff_unique_abbr": "WHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "fkAKjbRvxj", "title": "Information Value: Measuring Utterance Predictability as Distance from Plausible Alternatives", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We present information value, a measure which quantifies the predictability of an utterance relative to a set of plausible alternatives. We introduce a method to obtain interpretable estimates of information value using neural text generators, and exploit their psychometric predictive power to investigate the dimensions of predictability that drive human comprehension behaviour. Information value is a stronger predictor of utterance acceptability in written and spoken dialogue than aggregates of token-level surprisal and it is complementary to surprisal for predicting eye-tracked reading times.", "keywords": "surprisal;alternatives;acceptability;reading times;predictability", "primary_area": "", "supplementary_material": "", "author": "Mario Giulianelli;Sarenne Wallbridge;Raquel Fern\u00e1ndez", "authorids": "~Mario_Giulianelli1;~Sarenne_Wallbridge1;~Raquel_Fern\u00e1ndez1", "gender": "M;;F", "homepage": "https://glnmario.github.io;https://sarenne.github.io/;http://www.illc.uva.nl/~raquel", "dblp": "205/2569;;02/5384", "google_scholar": "https://scholar.google.it/citations?user=ABZghWYAAAAJ;flRw1JAAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Mario_Giulianelli1;~Sarenne_Wallbridge1;~Raquel_Fern\u00e1ndez1", "aff": "University of Amsterdam;University of Edinburgh, University of Edinburgh;University of Amsterdam", "aff_domain": "uva.nl;ed.ac.uk;uva.nl", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\ngiulianelli2023information,\ntitle={Information Value: Measuring Utterance Predictability as Distance from Plausible Alternatives},\nauthor={Mario Giulianelli and Sarenne Wallbridge and Raquel Fern{\\'a}ndez},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fkAKjbRvxj}\n}", "github": "", "project": "", "reviewers": "RsrJ;i1qv;TJwo;fDKN", "site": "https://openreview.net/forum?id=fkAKjbRvxj", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "5;4;2;4", "excitement": "4;4;3;3", "reproducibility": "4;4;4;4", "correctness": "4;4;4;4", "rating_avg": 5.0, "confidence_avg": 3.75, "excitement_avg": 3.5, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-5540-5943", "linkedin": ";;raquel-fernandez-13578148/", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Amsterdam;University of Edinburgh", "aff_unique_dep": ";", "aff_unique_url": "https://www.uva.nl;https://www.ed.ac.uk", "aff_unique_abbr": "UvA;Edinburgh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Netherlands;United Kingdom" }, { "id": "fkmSyrSjnq", "title": "Topic-Informed Dialogue Summarization using Topic Distribution and Prompt-based Modeling", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Dealing with multiple topics should be considered an important issue in dialogue summarization, because dialogues, unlike documents, are prone to topic drift. Thus, we propose a new dialogue summarization model that reflects dialogue topic distribution to consider all topics present in the dialogue. First, the distribution of dialogue topics is estimated by an effective topic discovery model. Then topic-informed prompt transfers estimated topic distribution information to the output of encoder and decoder vectors. Finally, the topic extractor estimates the summary topic distribution from the output context vector of decoder to distinguish its difference from the dialogue topic distribution. To consider the proportion of each topic distribution appeared in the dialogue, the extractor is trained to reduce the difference between the distributions of the dialogue and the summary. The experimental results on SAMSum and DialogSum show that our model outperforms state-of-the-art methods on ROUGE scores. The human evaluation results also show that our framework well generates comprehensive summaries.", "keywords": "Dialogue Summarization;Topic Distribution;Topic-Informed Prompt", "primary_area": "", "supplementary_material": "", "author": "Jaeah You;Youngjoong Ko", "authorids": "~Jaeah_You1;~Youngjoong_Ko1", "gender": "F;M", "homepage": "https://nlplab-skku.github.io/index.html;", "dblp": ";29/1445", "google_scholar": ";", "or_profile": "~Jaeah_You1;~Youngjoong_Ko1", "aff": "Sung Kyun Kwan University;Sungkyunkwan University", "aff_domain": "skku.edu;skku.edu", "position": "MS student;Full Professor", "bibtex": "@inproceedings{\nyou2023topicinformed,\ntitle={Topic-Informed Dialogue Summarization using Topic Distribution and Prompt-based Modeling},\nauthor={Jaeah You and Youngjoong Ko},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fkmSyrSjnq}\n}", "github": "", "project": "", "reviewers": "Fkky;KN1n;GoNj", "site": "https://openreview.net/forum?id=fkmSyrSjnq", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;3;4", "excitement": "3;3;3", "reproducibility": "3;3;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Sungkyunkwan University", "aff_unique_dep": "", "aff_unique_url": "https://www.skku.edu", "aff_unique_abbr": "SKKU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "flkXLt9WKn", "title": "Dialogue Chain-of-Thought Distillation for Commonsense-aware Conversational Agents", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Human-like chatbots necessitate the use of commonsense reasoning in order to effectively comprehend and respond to implicit information present within conversations. Achieving such coherence and informativeness in responses, however, is a non-trivial task. Even for large language models (LLMs), the task of identifying and aggregating key evidence within a single hop presents a substantial challenge. This complexity arises because such evidence is scattered across multiple turns in a conversation, thus necessitating integration over multiple hops. Hence, our focus is to facilitate such multi-hop reasoning over a dialogue context, namely dialogue chain-of-thought (CoT) reasoning. \nTo this end, we propose a knowledge distillation framework that leverages LLMs as unreliable teachers and selectively distills consistent and helpful rationales via alignment filters. \nWe further present DOCTOR, a DialOgue Chain-of-ThOught Reasoner that provides reliable CoT rationales for response generation. \nWe conduct extensive experiments to show that enhancing dialogue agents with high-quality rationales from DOCTOR significantly improves the quality of their responses.", "keywords": "Commonsense Reasoning;Dialogue;Large Language Model", "primary_area": "", "supplementary_material": "", "author": "Hyungjoo Chae;Yongho Song;Kai Tzu-iunn Ong;Taeyoon Kwon;Minjin Kim;Youngjae Yu;Dongha Lee;Dongyeop Kang;Jinyoung Yeo", "authorids": "~Hyungjoo_Chae1;~Yongho_Song1;~Kai_Tzu-iunn_Ong1;~Taeyoon_Kwon1;~Minjin_Kim1;~Youngjae_Yu1;~Dongha_Lee1;~Dongyeop_Kang2;~Jinyoung_Yeo1", "gender": "M;;M;M;;M;M;M;M", "homepage": "https://hyungjoo-homepage.netlify.app/;https://kopf-yhs.github.io/;https://ktio89.weebly.com/;https://connoriginal.github.io;;https://yj-yu.github.io/home/;https://donalee.github.io;https://diyonsei.notion.site;https://dykang.github.io/", "dblp": "329/2385;;;;;188/6210;12/760-3;121/4335;69/9056", "google_scholar": "xNxPm-IAAAAJ;iJeuOHkAAAAJ;xF6qLHsAAAAJ;pkdCoTQAAAAJ;https://scholar.google.com/citations?hl=ko;https://scholar.google.co.kr/citations?user=WDO24ZYAAAAJ;driVwKwAAAAJ;rJBSLtAAAAAJ;https://scholar.google.co.kr/citations?user=fMKZOjwAAAAJ", "or_profile": "~Hyungjoo_Chae1;~Yongho_Song1;~Kai_Tzu-iunn_Ong1;~Taeyoon_Kwon1;~Minjin_Kim1;~Youngjae_Yu1;~Dongha_Lee1;~Jinyoung_Yeo1;~dongyeop_kang1", "aff": "Yonsei University;Yonsei University;Yonsei University;Yonsei University;Yonsei University;Allen Institute for Artificial Intelligence;University of Illinois, Urbana Champaign;Yonsei University;University of Minnesota", "aff_domain": "yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr;yonsei.ac.kr;allenai.org;illinois.edu;yonsei.ac.kr;umn.edu", "position": "Undergrad student;MS student;PhD student;MS student;MS student;Postdoc;Postdoc;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nchae2023dialogue,\ntitle={Dialogue Chain-of-Thought Distillation for Commonsense-aware Conversational Agents},\nauthor={Hyungjoo Chae and Yongho Song and Kai Tzu-iunn Ong and Taeyoon Kwon and Minjin Kim and Youngjae Yu and Dongha Lee and Dongyeop Kang and Jinyoung Yeo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=flkXLt9WKn}\n}", "github": "", "project": "", "reviewers": "dhkb;kVfw;cf7H;FRrp", "site": "https://openreview.net/forum?id=flkXLt9WKn", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "4;4;4;4", "excitement": "4;4;4;4", "reproducibility": "4;5;4;5", "correctness": "3;4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 4.5, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-7229-1255;;;;;0000-0003-2173-3476;0000-0003-3847-4917;0000-0002-9021-1789", "linkedin": "hyungjoo-chae-21236a204/;;;taeyoon-kwon-2349aa14a/;minjin-kim-035367260/;;;jinyoung-yeo-4623a3128/;dongyeop-kang-30ba0611/", "aff_unique_index": "0;0;0;0;0;1;2;0;3", "aff_unique_norm": "Yonsei University;Allen Institute for Artificial Intelligence;University of Illinois Urbana-Champaign;University of Minnesota", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.yonsei.ac.kr;https://allenai.org;https://illinois.edu;https://www.minnesota.edu", "aff_unique_abbr": "Yonsei;AI2;UIUC;UMN", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0;1;1;0;1", "aff_country_unique": "South Korea;United States" }, { "id": "fonxcS8gqM", "title": "Improving Long Document Topic Segmentation Models With Enhanced Coherence Modeling", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Topic segmentation is critical for obtaining structured documents and improving down- stream tasks such as information retrieval. Due to its ability of automatically exploring clues of topic shift from abundant labeled data, recent supervised neural models have greatly promoted the development of long document topic segmentation, but leaving the deeper relationship between coherence and topic segmentation underexplored. Therefore, this paper enhances the ability of supervised models to capture coherence from both logical structure and semantic similarity perspectives to further improve the topic segmentation performance, proposing Topic-aware Sentence Structure Prediction (TSSP) and Contrastive Semantic Similarity Learning (CSSL). Specifically, the TSSP task is proposed to force the model to comprehend structural information by learning the original relations between adjacent sentences in a disarrayed document, which is constructed by jointly disrupting the original document at topic and sentence levels. Moreover, we utilize inter- and intra-topic information to construct contrastive samples and design the CSSL objective to ensure that the sentences representations in the same topic have higher similarity, while those in different topics are less similar. Extensive experiments show that the Longformer with our approach significantly outperforms old state-of-the-art (SOTA) methods. Our approach improve $F_{1}$ of old SOTA by 3.42 (73.74 \u2192 77.16) and reduces $P_{k}$ by 1.11 points (15.0 \u2192 13.89) on WIKI-727K and achieves an average relative reduction of 4.3\\% on $P_{k}$ on WikiSection. The average relative $P_{k}$ drop of 8.38\\% on two out-of-domain datasets also demonstrates the robustness of our approach.", "keywords": "topic segmentation;text coherence;semantic similarity;sentence structure;contrastive learning", "primary_area": "", "supplementary_material": "", "author": "Hai Yu;Chong Deng;Qinglin Zhang;Jiaqing Liu;Qian Chen;Wen Wang", "authorids": "~Hai_Yu2;~Chong_Deng1;~Qinglin_Zhang1;~Jiaqing_Liu2;~Qian_Chen1;~Wen_Wang6", "gender": "M;M;M;M;M;", "homepage": "https://github.com/haiahaiah;;;;https://scholar.google.com/citations?user=8eosmSQAAAAJ&hl=en;https://scholar.google.com/citations?user=85Tj1OwAAAAJ&hl=en", "dblp": ";220/8430;67/4963;;11/1394-3;29/4680-1", "google_scholar": ";https://scholar.google.com/citations?view_op=list_works;6Q7NBaEAAAAJ;sQ7v9uUAAAAJ;8eosmSQAAAAJ;85Tj1OwAAAAJ", "or_profile": "~Hai_Yu2;~Chong_Deng1;~Qinglin_Zhang1;~Jiaqing_Liu2;~Qian_Chen1;~Wen_Wang6", "aff": "Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group", "aff_domain": "alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com", "position": "Researcher;Researcher;Researcher;Researcher;Researcher;Senior Staff Algorithm Engineer", "bibtex": "@inproceedings{\nyu2023improving,\ntitle={Improving Long Document Topic Segmentation Models With Enhanced Coherence Modeling},\nauthor={Hai Yu and Chong Deng and Qinglin Zhang and Jiaqing Liu and Qian Chen and Wen Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fonxcS8gqM}\n}", "github": "", "project": "", "reviewers": "wwEk;5wiE;b6Np", "site": "https://openreview.net/forum?id=fonxcS8gqM", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;3", "excitement": "3;3;3", "reproducibility": "4;5;3", "correctness": "4;3;2", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0001-6939-7438;0000-0002-0356-1968", "linkedin": ";;;;;wen-wang-414b548/", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Alibaba Group", "aff_unique_dep": "", "aff_unique_url": "https://www.alibaba.com", "aff_unique_abbr": "Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "fqKoLPfCba", "title": "Large-Scale and Multi-Perspective Opinion Summarization with Diverse Review Subsets", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Opinion summarization is expected to digest larger review sets and provide summaries from different perspectives. However, most existing solutions are deficient in epitomizing extensive reviews and offering opinion summaries from various angles due to the lack of designs for information selection. To this end, we propose SubSumm, a supervised summarization framework for large-scale multi-perspective opinion summarization. SubSumm consists of a review sampling strategy set and a two-stage training scheme. The sampling strategies take sentiment orientation and contrastive information value into consideration, with which the review subsets from different perspectives and quality levels can be selected. Subsequently, the summarizer is encouraged to learn from the sub-optimal and optimal subsets successively in order to capitalize on the massive input. Experimental results on AmaSum and Rotten Tomatoes datasets demonstrate that SubSumm is adept at generating pros, cons, and verdict summaries from hundreds of input reviews. Furthermore, our in-depth analysis verifies that the advanced selection of review subsets and the two-stage training scheme are vital to boosting the summarization performance.", "keywords": "opinion summarization;multi-document summarization;large-scale;multi-perspective;contrastive learning", "primary_area": "", "supplementary_material": "", "author": "Han Jiang;Rui Wang;Zhihua Wei;Yu Li;Xinpeng Wang", "authorids": "~Han_Jiang2;~Rui_Wang27;~Zhihua_Wei1;~Yu_Li23;~Xinpeng_Wang2", "gender": ";M;F;M;", "homepage": "https://github.com/Salomeeeee;https://github.com/mechanicalsea;;https://victorup.github.io/;https://github.com/LizLizLi", "dblp": ";https://dblp.org/rec/journals/corr/abs-2103-13581;55/3674-1;156/1668-1.html;", "google_scholar": ";1rsbf7IAAAAJ;;https://scholar.google.com.hk/citations?user=2euMY5oAAAAJ;", "or_profile": "~Han_Jiang2;~Rui_Wang27;~Zhihua_Wei1;~Xinpeng_Wang2;~Yu_Li32", "aff": "Tongji University;Tongji University;Tongji University;Tongji University;University of Electronic Science and Technology of China", "aff_domain": "tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;uestc.edu.cn", "position": "MS student;PhD student;Full Professor;PhD student;Undergrad student", "bibtex": "@inproceedings{\njiang2023largescale,\ntitle={Large-Scale and Multi-Perspective Opinion Summarization with Diverse Review Subsets},\nauthor={Han Jiang and Rui Wang and Zhihua Wei and Yu Li and Xinpeng Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fqKoLPfCba}\n}", "github": "", "project": "", "reviewers": "AegH;DG5P;RBcE", "site": "https://openreview.net/forum?id=fqKoLPfCba", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;3;3", "reproducibility": "3;3;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-5211-2114;;0000-0003-1857-0346;", "linkedin": ";%E7%91%9E-%E7%8E%8B-b98085b8/;;;", "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Tongji University;University of Electronic Science and Technology of China", "aff_unique_dep": ";", "aff_unique_url": "https://www.tongji.edu.cn;https://www.uestc.edu.cn", "aff_unique_abbr": "Tongji;UESTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "fqWbXPX99P", "title": "CT-GAT: Cross-Task Generative Adversarial Attack based on Transferability", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Neural network models are vulnerable to adversarial examples, and adversarial transferability further increases the risk of adversarial attacks. Current methods based on transferability often rely on substitute models, which can be impractical and costly in real-world scenarios due to the unavailability of training data and the victim model's structural details. In this paper, we propose a novel approach that directly constructs adversarial examples by extracting transferable features across various tasks. Our key insight is that adversarial transferability can extend across different tasks. Specifically, we train a sequence-to-sequence generative model named CT-GAT (Cross-Task Generative Adversarial Attack) using adversarial sample data collected from multiple tasks to acquire universal adversarial features and generate adversarial examples for different tasks.We conduct experiments on ten distinct datasets, and the results demonstrate that our method achieves superior attack performance with small cost.", "keywords": "Adversarial Attacks;Transferability;Generative Methods", "primary_area": "", "supplementary_material": "", "author": "Minxuan Lv;Chengwei Dai;Kun Li;Wei Zhou;Songlin Hu", "authorids": "~Minxuan_Lv1;~Chengwei_Dai1;~Kun_Li8;~Wei_Zhou5;~Songlin_Hu2", "gender": ";M;M;F;M", "homepage": "https://github.com/xiaoxuanNLP;https://github.com/C-W-D;;http://people.ucas.ac.cn/~iiezhouwei;http://people.ucas.ac.cn/~0000967?language=en", "dblp": "359/3555;;;69/5011-19;67/4108-1.html", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?hl=zh-CN;;", "or_profile": "~Minxuan_Lv1;~Chengwei_Dai1;~Kun_Li8;~Wei_Zhou5;~Songiln_Hu1", "aff": "Institute of Information Engineering,Chinese Academy of Sciences;University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;;Institute of Information Engineering, Chinese Academy of Sciences", "aff_domain": "iie.ac.cn;ucas.ac.cn;ucas.edu.cn;;iie.ac.cn", "position": "MS student;MS student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nlv2023ctgat,\ntitle={{CT}-{GAT}: Cross-Task Generative Adversarial Attack based on Transferability},\nauthor={Minxuan Lv and Chengwei Dai and Kun Li and Wei Zhou and Songlin Hu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fqWbXPX99P}\n}", "github": "", "project": "", "reviewers": "sMHo;sPVz;UDzM;xQiS", "site": "https://openreview.net/forum?id=fqWbXPX99P", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;4;2;3", "excitement": "4;2;4;2", "reproducibility": "3;1;5;3", "correctness": "3;2;4;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;;", "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_unique_dep": "Institute of Information Engineering;", "aff_unique_url": "http://www.cas.cn;http://www.ucas.ac.cn", "aff_unique_abbr": "CAS;UCAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "fsGowIsscZ", "title": "A Diachronic Perspective on User Trust in AI under Uncertainty", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In human-AI collaboration, users typically form a mental model of the AI system, which captures the user's beliefs about when the system performs well and when it does not.\nThe construction of this mental model is guided by both the system's veracity as well as the system output presented to the user e.g., the system's confidence and an explanation for the prediction.\nHowever, modern NLP systems are seldom calibrated and are often confidently incorrect about their predictions, which violates users\u2019 mental model and erodes their trust.\nIn this work, we design a study where users bet on the correctness of an NLP system, and use it to study the evolution of user trust as a response to these trust-eroding events and how the user trust is rebuilt as a function of time after these events.\nWe find that even a few highly inaccurate confidence estimation instances are enough to damage users' trust in the system and performance, which does not easily recover over time.\nWe further find that users are more forgiving to the NLP system if it is unconfidently correct rather than confidently incorrect, even though, from a game-theoretic perspective, their payoff is equivalent.\nFinally, we find that each user can entertain multiple mental models of the system based on the type of the question.\nThese results highlight the importance of confidence calibration in developing user-centered NLP applications to avoid damaging user trust and compromising the collaboration performance.", "keywords": "trust;calibration;confidence;collaboration", "primary_area": "", "supplementary_material": "", "author": "Shehzaad Zuzar Dhuliawala;Vil\u00e9m Zouhar;Mennatallah El-Assady;Mrinmaya Sachan", "authorids": "~Shehzaad_Zuzar_Dhuliawala3;~Vil\u00e9m_Zouhar1;~Mennatallah_El-Assady1;~Mrinmaya_Sachan3", "gender": "Not Specified;;M;M", "homepage": "https://vilda.net;;https://sites.google.com/site/mrinsachan/;https://shehzaadzd.github.io", "dblp": "254/1832;183/8957;86/10440.html;184/8733", "google_scholar": "2EUDwtkAAAAJ;;Tpp9ZjoAAAAJ;7O33ij4AAAAJ", "or_profile": "~Vil\u00e9m_Zouhar1;~Mennatallah_El-Assady1;~MRINMAYA_SACHAN2;~Shehzaad_Zuzar_Dhuliawala1", "aff": "Amazon;Department of Computer Science, ETHZ - ETH Zurich;Swiss Federal Institute of Technology;Swiss Federal Institute of Technology", "aff_domain": "amazon.com;inf.ethz.ch;ethz.ch;ethz.ch", "position": "Intern;Postdoc;Assistant Professor;PhD student", "bibtex": "@inproceedings{\ndhuliawala2023a,\ntitle={A Diachronic Perspective on User Trust in {AI} under Uncertainty},\nauthor={Shehzaad Zuzar Dhuliawala and Vil{\\'e}m Zouhar and Mennatallah El-Assady and Mrinmaya Sachan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fsGowIsscZ}\n}", "github": "", "project": "", "reviewers": "4Q7E;3PXu;Xqzv", "site": "https://openreview.net/forum?id=fsGowIsscZ", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "2;3;5", "reproducibility": "3;4;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-8526-2613;;", "linkedin": "vil%C3%A9m-zouhar-192988288/;;;", "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Amazon;ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": "Amazon.com, Inc.;Department of Computer Science;", "aff_unique_url": "https://www.amazon.com;https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "Amazon;ETHZ;ETH Zurich", "aff_campus_unique_index": "1", "aff_campus_unique": ";Zurich", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;Switzerland" }, { "id": "ft0c1K3492", "title": "SciRepEval: A Multi-Format Benchmark for Scientific Document Representations", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Learned representations of scientific documents can serve as valuable input features for downstream tasks without further fine-tuning. However, existing benchmarks for evaluating these representations fail to capture the diversity of relevant tasks. In response, we introduce SciRepEval, the first comprehensive benchmark for training and evaluating scientific document representations. It includes 24 challenging and realistic tasks, 8 of which are new, across four formats: classification, regression, ranking and search. We then use this benchmark to study and improve the generalization ability of scientific document representation models. We show how state-of-the-art models like SPECTER and SciNCL struggle to generalize across the task formats, and that simple multi-task training fails to improve them. However, a new approach that learns multiple embeddings per document, each tailored to a different format, can improve performance. We experiment with task-format-specific control codes and adapters and find they outperform the existing single-embedding state-of-the-art by over 2 points absolute. We release the resulting family of multi-format models, called SPECTER2, for the community to use and build on.", "keywords": "Scientific tasks benchmark;multi task learning;task specific embeddings", "primary_area": "", "supplementary_material": "", "author": "Amanpreet Singh;Mike D'Arcy;Arman Cohan;Doug Downey;Sergey Feldman", "authorids": "~Amanpreet_Singh2;~Mike_D'Arcy1;~Arman_Cohan1;~Doug_Downey1;~Sergey_Feldman1", "gender": "M;M;M;M;", "homepage": "https://github.com/amanpreet692;https://mdarcy220.gitlab.io/;http://www.armancohan.com;https://www.cs.northwestern.edu/~ddowney/;http://www.data-cowboys.com", "dblp": ";;160/1727;57/5363;81/8052", "google_scholar": "rk0hg3YAAAAJ;q5WUx2AAAAAJ;https://scholar.google.com/citations?hl=en;E8evkcQAAAAJ;C6-OMDIAAAAJ", "or_profile": "~Amanpreet_Singh2;~Mike_D'Arcy1;~Arman_Cohan1;~Doug_Downey1;~Sergey_Feldman1", "aff": "Allen Institute for Artificial Intelligence;Northwestern University;Allen Institute for Artificial Intelligence;Northwestern University;Data Cowboys", "aff_domain": "allenai.org;northwestern.edu;allenai.org;northwestern.edu;data-cowboys.com", "position": "Researcher;PhD student;Research Scientist;Professor;Machine Learning Consultant", "bibtex": "@inproceedings{\nsingh2023scirepeval,\ntitle={SciRepEval: A Multi-Format Benchmark for Scientific Document Representations},\nauthor={Amanpreet Singh and Mike D'Arcy and Arman Cohan and Doug Downey and Sergey Feldman},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ft0c1K3492}\n}", "github": "", "project": "", "reviewers": "U5Yz;Tbar;vkEw", "site": "https://openreview.net/forum?id=ft0c1K3492", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;2;4", "excitement": "3;3;4", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-0355-7157;;;0000-0003-0386-0922", "linkedin": "amanpreet-singh-k/;;;;", "aff_unique_index": "0;1;0;1;2", "aff_unique_norm": "Allen Institute for Artificial Intelligence;Northwestern University;Data Cowboys", "aff_unique_dep": ";;", "aff_unique_url": "https://allenai.org;https://www.northwestern.edu;", "aff_unique_abbr": "AI2;NU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States;" }, { "id": "fvGJOVkm0b", "title": "EntSUMv2: Dataset, Models and Evaluation for More Abstractive Entity-Centric Summarization", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Entity-centric summarization is a form of controllable summarization that aims to generate a summary for a specific entity given a document. Concise summaries are valuable in various real-life applications, as they enable users to quickly grasp the main points of the document focusing on an entity of interest. This paper presents ENTSUMV2, a more abstractive version of the original entity-centric ENTSUM summarization dataset. In ENTSUMV2 the annotated summaries are intentionally made shorter to benefit more specific and useful entity-centric summaries for downstream users. We conduct extensive experiments on this dataset using multiple abstractive summarization approaches that employ supervised fine-tuning or large-scale instruction tuning. Additionally, we perform comprehensive human evaluation that incorporates metrics for measuring crucial facets. These metrics provide a more fine-grained interpretation of the current state-of-the-art systems and highlight areas for future improvement.", "keywords": "summarization;entity;dataset;evaluation", "primary_area": "", "supplementary_material": "", "author": "Dhruv Mehra;Lingjue Xie;Ella Hofmann-Coyle;Mayank Kulkarni;Daniel Preotiuc-Pietro", "authorids": "~Dhruv_Mehra2;~Lingjue_Xie1;~Ella_Hofmann-Coyle1;~Mayank_Kulkarni1;~Daniel_Preotiuc-Pietro2", "gender": ";F;F;M;M", "homepage": "http://dhruvmehra.me;;;;https://www.preotiuc.ro/", "dblp": ";;;160/0475;126/8668", "google_scholar": ";0fFMusUAAAAJ;p7Aac7kAAAAJ;ycBuNT0AAAAJ;7HSgxLEAAAAJ", "or_profile": "~Dhruv_Mehra2;~Lingjue_Xie1;~Ella_Hofmann-Coyle1;~Mayank_Kulkarni1;~Daniel_Preotiuc-Pietro2", "aff": "Bloomberg;Bloomberg;Bloomberg;Amazon;Bloomberg", "aff_domain": "bloomberg.com;bloomberg.net;bloomberg.com;amazon.com;bloomberg.com", "position": "Research Engineer;Researcher;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nmehra2023entsumv,\ntitle={Ent{SUM}v2: Dataset, Models and Evaluation for More Abstractive Entity-Centric Summarization},\nauthor={Dhruv Mehra and Lingjue Xie and Ella Hofmann-Coyle and Mayank Kulkarni and Daniel Preotiuc-Pietro},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fvGJOVkm0b}\n}", "github": "", "project": "", "reviewers": "fMbs;gpN3;d8Nz", "site": "https://openreview.net/forum?id=fvGJOVkm0b", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;2;4", "excitement": "4;3;4", "reproducibility": "3;3;3", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";lingjuexie/;ellahofmanncoyle;mayank-kulkarni-a02732118/;danielpreotiuc/", "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Bloomberg;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.bloomberg.com;https://www.amazon.com", "aff_unique_abbr": "Bloomberg;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "fwA8iKyIlk", "title": "Elaborative Simplification as Implicit Questions Under Discussion", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Automated text simplification, a technique useful for making text more accessible to people such as children and emergent bilinguals, is often thought of as a monolingual translation task from complex sentences to simplified sentences using encoder-decoder models. This view fails to account for elaborative simplification, where new information is added into the simplified text. This paper proposes to view elaborative simplification through the lens of the Question Under Discussion (QUD) framework, providing a robust way to investigate what writers elaborate upon, how they elaborate, and how elaborations fit into the discourse context by viewing elaborations as explicit answers to implicit questions. We introduce ELABQUD, consisting of 1.3K elaborations accompanied with implicit QUDs, to study these phenomena. We show that explicitly modeling QUD (via question generation) not only provides essential understanding of elaborative simplification and how the elaborations connect with the rest of the discourse, but also substantially improves the quality of elaboration generation.", "keywords": "text simplification;elaborative simplification;questions under discussion", "primary_area": "", "supplementary_material": "", "author": "Yating Wu;William Berkeley Sheffield;Kyle Mahowald;Junyi Jessy Li", "authorids": "~Yating_Wu1;~William_Berkeley_Sheffield1;~Kyle_Mahowald1;~Junyi_Jessy_Li2", "gender": "Not Specified;;M;F", "homepage": "https://lingchensanwen.github.io/;;https://mahowak.github.io;https://jessyli.com", "dblp": "23/1500-2;;38/11196;148/9553", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;XUmFLVUAAAAJ;tJGm3-YAAAAJ", "or_profile": "~Yating_Wu1;~William_Berkeley_Sheffield1;~Kyle_Mahowald1;~Junyi_Jessy_Li2", "aff": "University of Texas at Austin;University of Texas at Austin;The University of Texas at Austin;University of Texas at Austin", "aff_domain": "utexas.edu;utexas.edu;utexas.edu;utexas.edu", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nwu2023elaborative,\ntitle={Elaborative Simplification as Implicit Questions Under Discussion},\nauthor={Yating Wu and William Berkeley Sheffield and Kyle Mahowald and Junyi Jessy Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fwA8iKyIlk}\n}", "github": "", "project": "", "reviewers": "uFn6;qEyA;iTg6", "site": "https://openreview.net/forum?id=fwA8iKyIlk", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;3;5", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "4;4;5", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "wuyating;william-sheffield-35546a226;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "fwmZinFwgX", "title": "Enhancing Reasoning Capabilities by Instruction Learning and Chain-of-Thoughts for Implicit Discourse Relation Recognition", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "The aim of implicit discourse relation recognition is to comprehend the sense of connection between two arguments. In this work, we present a classification method that is solely based on generative models. Our proposed approach employs a combination of instruction templates and in-context learning to refine the generative model for effectively addressing the implicit discourse relation recognition task. Furthermore, we utilize Chain-of-Thoughts to partition the inference process into a sequence of three successive stages. This strategy enables us to fully utilize the autoregressive generative model's potential for knowledge acquisition and inference, ultimately leading to enhanced performance on this natural language understanding task. The results of our experiments, evaluated on benchmark datasets PDTB 2.0, PDTB 3.0, and the CoNLL16 shared task, demonstrate superior performance compared to previous state-of-the-art models.", "keywords": "Implicit discourse relation recognition;Instruction learning;In-context learning;Chain-of-Thoughts", "primary_area": "", "supplementary_material": "", "author": "Yuxiang Lu;Yu Hong;Zhipang Wang;Guodong Zhou", "authorids": "~Yuxiang_Lu2;~Yu_Hong1;~Zhipang_Wang2;~Guodong_Zhou1", "gender": "M;M;;M", "homepage": ";;https://zpwang-ai.github.io/;http://nlp.suda.edu.cn/~gdzhou/", "dblp": ";66/5306;;", "google_scholar": "Jge974gAAAAJ;;;", "or_profile": "~Yuxiang_Lu2;~Yu_Hong1;~Zhipang_Wang2;~Guodong_Zhou1", "aff": "Soochow University;Suzhou University;Soochow University;Soochow University, China", "aff_domain": "suda.edu.cn;suda.edu.cn;suda.edu.cn;suda.edu.cn", "position": "MS student;Full Professor;MS student;Full Professor", "bibtex": "@inproceedings{\nlu2023enhancing,\ntitle={Enhancing Reasoning Capabilities by Instruction Learning and Chain-of-Thoughts for Implicit Discourse Relation Recognition},\nauthor={Yuxiang Lu and Yu Hong and Zhipang Wang and Guodong Zhou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fwmZinFwgX}\n}", "github": "", "project": "", "reviewers": "YJWg;XvkT;znNK", "site": "https://openreview.net/forum?id=fwmZinFwgX", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "4;3;3", "reproducibility": "2;3;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Soochow University;Suzhou University", "aff_unique_dep": ";", "aff_unique_url": "https://www.soochow.edu.cn;https://www.suda.edu.cn", "aff_unique_abbr": "Soochow U;Suda", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "fxdvWG4rJe", "title": "Towards Making the Most of ChatGPT for Machine Translation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "ChatGPT shows remarkable capabilities for machine translation (MT). Several prior studies have shown that it achieves comparable results to commercial systems for high-resource languages, but lags behind in complex tasks, e.g, low-resource and distant-language-pairs translation. However, they usually adopt simple prompts which can not fully elicit the capability of ChatGPT. In this report, we aim to further mine ChatGPT's translation ability by revisiting several aspects: temperature, task information, and domain information, and correspondingly propose two (simple but effective) prompts: Task-Specific Prompts (TSP) and Domain-Specific Prompts (DSP). We show that: 1) The performance of ChatGPT depends largely on temperature, and a lower temperature usually can achieve better performance; 2) Emphasizing the task information further improves ChatGPT's performance, particularly in complex MT tasks; 3) Introducing domain information can elicit ChatGPT's generalization ability and improve its performance in the specific domain; 4) ChatGPT tends to generate hallucinations for non-English-centric MT tasks, which can be partially addressed by our proposed prompts but still need to be highlighted for the MT/NLP community. We also explore the effects of advanced in-context learning strategies and find a (negative but interesting) observation: the powerful chain-of-thought prompt leads to word-by-word translation behavior, thus bringing significant translation degradation.", "keywords": "Machine Translation;Large Language Model;Prompt Engineering", "primary_area": "", "supplementary_material": "", "author": "keqin Peng;Liang Ding;Qihuang Zhong;Li Shen;Xuebo Liu;Min Zhang;Yuanxin Ouyang;Dacheng Tao", "authorids": "~keqin_Peng1;~Liang_Ding3;~Qihuang_Zhong1;~Li_Shen1;~Xuebo_Liu1;~Min_Zhang9;~Yuanxin_Ouyang1;~Dacheng_Tao1", "gender": "M;M;M;M;M;M;F;", "homepage": ";http://liamding.cc/;https://www.qihuangzhong.top/;https://sites.google.com/site/mathshenli/home;https://sunbowliu.github.io/;https://zhangmin-nlp-ai.github.io/;;", "dblp": ";88/3340-6.html;272/6439.html;91/3680-8;166/0029-2;83/5342-5;12/1640;", "google_scholar": "SKBL4okAAAAJ;lFCLvOAAAAAJ;https://scholar.google.com/citations?hl=zh-CN;yVhgENIAAAAJ;XkDl9aoAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;", "or_profile": "~keqin_Peng1;~Liang_Ding3;~Qihuang_Zhong1;~Li_Shen1;~Xuebo_Liu1;~Min_Zhang9;~Yuanxin_Ouyang1;~Dacheng_Tao1", "aff": "Beihang University;JD Explore Academy, JD.com Inc.;Wuhan University;JD Explore Academy;Harbin Institute of Technolgy, Shenzhen;Harbin Institute of Technology, Shenzhen;BEIHANG UNIVERSITY;", "aff_domain": "buaa.edu.cn;jd.com;whu.edu.cn;jd.com;hit.edu.cn;hit.edu.cn;buaa.edu.cn;", "position": "PhD student;Research Scientist;PhD student;Researcher;Assistant Professor;Full Professor;Full Professor;", "bibtex": "@inproceedings{\npeng2023towards,\ntitle={Towards Making the Most of Chat{GPT} for Machine Translation},\nauthor={keqin Peng and Liang Ding and Qihuang Zhong and Li Shen and Xuebo Liu and Min Zhang and Yuanxin Ouyang and Dacheng Tao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fxdvWG4rJe}\n}", "github": "", "project": "", "reviewers": "4hHT;ZQYG;gZrL", "site": "https://openreview.net/forum?id=fxdvWG4rJe", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;5;4", "excitement": "4;2;3", "reproducibility": "3;4;4", "correctness": "4;2;3", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-5270-9439;;;;;;0000-0003-3329-3906;", "linkedin": ";;;;xuebo-liu-47877b195/;;;", "aff_unique_index": "0;1;2;3;4;4;0", "aff_unique_norm": "Beihang University;JD.com Inc.;Wuhan University;JD;Harbin Institute of Technology", "aff_unique_dep": ";JD Explore Academy;;JD Explore Academy;", "aff_unique_url": "http://www.buaa.edu.cn/;https://www.jd.com;http://www.whu.edu.cn/;;http://en.hhit.edu.cn/", "aff_unique_abbr": "BUAA;JD.com;WHU;;HIT", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China;" }, { "id": "fxotfo1j8T", "title": "Navigating the Grey Area: How Expressions of Uncertainty and Overconfidence Affect Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The increased deployment of LMs for real-world tasks involving knowledge and facts makes it important to understand model epistemology: what LMs think they know, and how their attitudes toward that knowledge are affected by language use in their inputs. Here, we study an aspect of model epistemology: how epistemic markers of certainty, uncertainty, or evidentiality like \"I'm sure it's\", \"I think it's\", or \"Wikipedia says it's\" affect models, and whether they contribute to model failures. We develop a typology of epistemic markers and inject 50 markers into prompts for question answering. We find that LMs are highly sensitive to epistemic markers in prompts, with accuracies varying more than 80%. Surprisingly, we find that expressions of high certainty result in a 7% decrease in accuracy as compared to low certainty expressions; similarly, factive verbs hurt performance, while evidentials benefit performance. Our analysis of a popular pretraining dataset shows that these markers of uncertainty are associated with answers on question-answering websites, while markers of certainty are associated with questions. These associations may suggest that the behavior of LMs is based on mimicking observed language use, rather than truly reflecting epistemic uncertainty.", "keywords": "expressions of uncertainty;analysis of language models", "primary_area": "", "supplementary_material": "", "author": "Kaitlyn Zhou;Dan Jurafsky;Tatsunori Hashimoto", "authorids": "~Kaitlyn_Zhou1;~Dan_Jurafsky1;~Tatsunori_Hashimoto1", "gender": "F;M;M", "homepage": "https://cs.stanford.edu/~katezhou/;http://web.stanford.edu/~jurafsky/;https://thashim.github.io", "dblp": "179/4603;31/985;", "google_scholar": "SQAK2mwAAAAJ;uZg9l58AAAAJ;5ygiTwsAAAAJ", "or_profile": "~Kaitlyn_Zhou1;~Dan_Jurafsky1;~Tatsunori_Hashimoto1", "aff": "Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhou2023navigating,\ntitle={Navigating the Grey Area: How Expressions of Uncertainty and Overconfidence Affect Language Models},\nauthor={Kaitlyn Zhou and Dan Jurafsky and Tatsunori Hashimoto},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fxotfo1j8T}\n}", "github": "", "project": "", "reviewers": "SvKk;K4oh;JAsR", "site": "https://openreview.net/forum?id=fxotfo1j8T", "pdf_size": 0, "rating": "5;5;5", "confidence": "2;4;4", "excitement": "3;4;4", "reproducibility": "4;3;3", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-8804-8161;;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "fyza2OQ9NI", "title": "MathDial: A Dialogue Tutoring Dataset with Rich Pedagogical Properties Grounded in Math Reasoning Problems", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "While automatic dialogue tutors hold great potential in making education personalized and more accessible, research on such systems has been hampered by a lack of sufficiently large and high-quality datasets. Collecting such datasets remains challenging, as recording tutoring sessions raises privacy concerns and crowdsourcing leads to insufficient data quality. To address this, we propose a framework to generate such dialogues by pairing human teachers with a Large Language Model (LLM) prompted to represent common student errors.\nWe describe how we use this framework to collect MathDial, a dataset of 3k one-to-one teacher-student tutoring dialogues grounded in multi-step math reasoning problems. While models like GPT-3 are good problem solvers, they fail at tutoring because they generate factually incorrect feedback or are prone to revealing solutions to students too early. To overcome this, we let teachers provide learning opportunities to students by guiding them using various scaffolding questions according to a taxonomy of teacher moves. We demonstrate MathDial and its extensive annotations can be used to finetune models to be more effective tutors (and not just solvers). We confirm this by automatic and human evaluation, notably in an interactive setting that measures the trade-off between student solving success and telling solutions. The dataset is released publicly.", "keywords": "dialogue;dataset;collection;response generation;natural language generation;math;reasoning;tutoring", "primary_area": "", "supplementary_material": "", "author": "Jakub Macina;Nico Daheim;Sankalan Pal Chowdhury;Tanmay Sinha;Manu Kapur;Iryna Gurevych;Mrinmaya Sachan", "authorids": "~Jakub_Macina1;~Nico_Daheim1;~Sankalan_Pal_Chowdhury1;~Tanmay_Sinha1;~Manu_Kapur1;~Iryna_Gurevych1;~Mrinmaya_Sachan3", "gender": ";M;M;M;;;", "homepage": ";https://ndaheim.github.io;;https://lse.ethz.ch/people/postdocs/dr--tanmay-sinha.html;http://www.manukapur.com;;", "dblp": "204/6347;285/5587;250/9552.html;;;;", "google_scholar": "https://scholar.google.com/citations?hl=en;n6wJfqUAAAAJ;;W0amaYoAAAAJ;;;", "or_profile": "~Jakub_Macina1;~Nico_Daheim1;~Sankalan_Pal_Chowdhury1;~Tanmay_Sinha1;~Manu_Kapur1;~Iryna_Gurevych1;~Mrinmaya_Sachan3", "aff": "Department of Computer Science, ETHZ - ETH Zurich;Technische Universit\u00e4t Darmstadt;Swiss Federal Institute of Technology;ETH Zurich;ETHZ - ETH Zurich;;", "aff_domain": "inf.ethz.ch;tu-darmstadt.de;ethz.ch;gess.ethz.ch;ethz.ch;;", "position": "PhD student;PhD student;MS student;Postdoc;Full Professor;;", "bibtex": "@inproceedings{\nmacina2023mathdial,\ntitle={MathDial: A Dialogue Tutoring Dataset with Rich Pedagogical Properties Grounded in Math Reasoning Problems},\nauthor={Jakub Macina and Nico Daheim and Sankalan Pal Chowdhury and Tanmay Sinha and Manu Kapur and Iryna Gurevych and Mrinmaya Sachan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fyza2OQ9NI}\n}", "github": "", "project": "", "reviewers": "q15M;i8Bf;jaAZ;pp3K", "site": "https://openreview.net/forum?id=fyza2OQ9NI", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;4;4;4", "excitement": "5;4;3;3", "reproducibility": "4;4;3;4", "correctness": "3;4;3;3", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 3.75, "reproducibility_avg": 3.75, "correctness_avg": 3.25, "replies_avg": 13, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-5063-0543;;;;;;", "linkedin": "jakubmacina;;sankalan-palchowdhury-343a7760/;;;;", "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "ETH Zurich;Technische Universit\u00e4t Darmstadt;Swiss Federal Institute of Technology", "aff_unique_dep": "Department of Computer Science;;", "aff_unique_url": "https://www.ethz.ch;https://www.tu-darmstadt.de;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;TUD;ETH Zurich", "aff_campus_unique_index": "0", "aff_campus_unique": "Zurich;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "Switzerland;Germany" }, { "id": "fzb2sxexWN", "title": "Semi-automatic Data Enhancement for Document-Level Relation Extraction with Distant Supervision from Large Language Models", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Document-level Relation Extraction (DocRE), which aims to extract relations from a long context, is a critical challenge in achieving fine-grained structural comprehension and generating interpretable document representations.\nInspired by recent advances in in-context learning capabilities emergent from large language models (LLMs), such as ChatGPT, we aim to design an automated annotation method for DocRE with minimum human effort.\nUnfortunately, vanilla in-context learning is infeasible for DocRE due to the plenty of predefined fine-grained relation types and the uncontrolled generations of LLMs. To tackle this issue, we propose a method integrating an LLM and a natural language inference (NLI) module to generate relation triples, thereby augmenting document-level relation datasets.\nWe demonstrate the effectiveness of our approach by introducing an enhanced dataset known as DocGNRE, which excels in re-annotating numerous long-tail relation types. We are confident that our method holds the potential for broader applications in domain-specific relation type definitions and offers tangible benefits in advancing generalized language semantic comprehension.", "keywords": "Long Context\uff0cDocument-level\uff0cRelation Extraction;Large Language Model", "primary_area": "", "supplementary_material": "", "author": "Junpeng Li;Zixia Jia;Zilong Zheng", "authorids": "~Junpeng_Li2;~Zixia_Jia1;~Zilong_Zheng1", "gender": "M;F;M", "homepage": ";;http://zilongzheng.github.io", "dblp": "https://dblp.org/rec/conf/www/ZhangLMM18;257/1724.html;218/5234", "google_scholar": ";FdwGDyoAAAAJ;9sDx70IAAAAJ", "or_profile": "~Junpeng_Li2;~Zixia_Jia1;~Zilong_Zheng1", "aff": ";ShanghaiTech University;Beijing Institute for General Artificial Intelligence", "aff_domain": ";shanghaitech.edu.cn;bigai.ai", "position": ";PhD student;Researcher", "bibtex": "@inproceedings{\nli2023semiautomatic,\ntitle={Semi-automatic Data Enhancement for Document-Level Relation Extraction with Distant Supervision from Large Language Models},\nauthor={Junpeng Li and Zixia Jia and Zilong Zheng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=fzb2sxexWN}\n}", "github": "", "project": "", "reviewers": "rie9;y51e;KqzM", "site": "https://openreview.net/forum?id=fzb2sxexWN", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;3", "excitement": "3;3;3", "reproducibility": "3;2;3", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;1", "aff_unique_norm": "ShanghaiTech University;Beijing Institute for General Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.shanghaitech.edu.cn;http://www.bigaiai.org/", "aff_unique_abbr": "ShanghaiTech;BIGAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "g04NBFnIxb", "title": "ViPE: Visualise Pretty-much Everything", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Figurative and non-literal expressions are profoundly integrated in human communication. Visualising such expressions allow us to convey our creative thoughts, and evoke nuanced emotions. Recent text-to-image models like Stable Diffusion, on the other hand, struggle to depict non-literal expressions. Recent works primarily deal with this issue by compiling humanly annotated datasets on a small scale, which not only demands specialized expertise but also proves highly inefficient. To address this issue, we introduce ViPE: Visualise Pretty-much Everything. ViPE offers a series of lightweight and robust language models that have been trained on a large-scale set of lyrics with noisy visual descriptions that represent their implicit meaning. The synthetic visual descriptions are generated by GPT3.5 relying on neither human annotations nor images. ViPE effectively expresses any arbitrary piece of text into a visualisable description, enabling meaningful and high-quality image generation. We provide compelling evidence that ViPE is more robust than GPT3.5 in synthesising visual elaborations. ViPE also exhibits an understanding of figurative expressions comparable to human experts, providing a powerful and open-source backbone to many downstream applications such as music video and caption generation.", "keywords": "Visual metaphors;music video generation;text-to-image synthesis;abstract visualization;diffusion models for abstract art;synthetic data generation;unsupervised label generation", "primary_area": "", "supplementary_material": "", "author": "Hassan Shahmohammadi;Adhiraj Ghosh;Hendrik Lensch", "authorids": "~Hassan_Shahmohammadi1;~Adhiraj_Ghosh2;~Hendrik_Lensch2", "gender": "M;M;M", "homepage": "https://fittar.me/;http://adhirajghosh.github.io/;https://www.graphics.uni-tuebingen.de", "dblp": ";304/2904;99/6552.html", "google_scholar": "dvmM-3QAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.de/citations?hl=de", "or_profile": "~Hassan_Shahmohammadi1;~Adhiraj_Ghosh2;~Hendrik_Lensch2", "aff": "Eberhard-Karls-Universit\u00e4t T\u00fcbingen;Eberhard-Karls-Universit\u00e4t T\u00fcbingen;University of T\u00fcbingen", "aff_domain": "uni-tuebingen.de;uni-tuebingen.de;uni-tuebingen.de", "position": "PhD student;MS student;Professor", "bibtex": "@inproceedings{\nshahmohammadi2023vipe,\ntitle={Vi{PE}: Visualise Pretty-much Everything},\nauthor={Hassan Shahmohammadi and Adhiraj Ghosh and Hendrik Lensch},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=g04NBFnIxb}\n}", "github": "", "project": "", "reviewers": "jo7U;Wr1n;eBD7;9QUY", "site": "https://openreview.net/forum?id=g04NBFnIxb", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "4;3;4;2", "excitement": "4;4;5;4", "reproducibility": "4;3;4;5", "correctness": "3;3;5;3", "rating_avg": 5.0, "confidence_avg": 3.25, "excitement_avg": 4.25, "reproducibility_avg": 4.0, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";adhiraj-ghosh/;", "aff_unique_index": "0;0;1", "aff_unique_norm": "Eberhard Karls University of T\u00fcbingen;University of T\u00fcbingen", "aff_unique_dep": ";", "aff_unique_url": "https://www.uni-tuebingen.de/;https://www.uni-tuebingen.de/", "aff_unique_abbr": "Uni T\u00fcbingen;Uni T\u00fcbingen", "aff_campus_unique_index": "0;0", "aff_campus_unique": "T\u00fcbingen;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "g0wzziJSmN", "title": "The Effect of Scaling, Retrieval Augmentation and Form on the Factual Consistency of Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large Language Models (LLMs) make natural interfaces to factual knowledge, but their usefulness is limited by their tendency to deliver inconsistent answers to semantically equivalent questions. For example, a model might supply the answer \"Edinburgh\" to \"Anne Redpath passed away in X.\" and \"London\" to \"Anne Redpath's life ended in X.\". \nIn this work, we identify potential causes of inconsistency and evaluate the effectiveness of two mitigation strategies: up-scaling and augmenting the LM with a passage retrieval database. Our results on the LLaMA and Atlas models show that both strategies reduce inconsistency but that retrieval augmentation is considerably more efficient. We further consider and disentangle the consistency contributions of different components of Atlas. For all LMs evaluated we find that syntactical form and task artifacts impact consistency. Taken together, our results provide a better understanding of the factors affecting the factual consistency of language models.", "keywords": "consistency;evaluation;large language models;retrieval-augmentation;causal analysis", "primary_area": "", "supplementary_material": "", "author": "Lovisa Hagstr\u00f6m;Denitsa Saynova;Tobias Norlund;Moa Johansson;Richard Johansson", "authorids": "~Lovisa_Hagstr\u00f6m1;~Denitsa_Saynova1;~Tobias_Norlund1;~Moa_Johansson1;~Richard_Johansson1", "gender": "F;;;F;M", "homepage": "https://lovhag.github.io/;;http://tobias.norlund.se;https://www.cse.chalmers.se/~jomoa/;http://www.cse.chalmers.se/~richajo/", "dblp": "294/2054.html;;;02/452.html;56/3767", "google_scholar": "iRYBEYsAAAAJ;;;https://scholar.google.se/citations?hl=en;https://scholar.google.se/citations?user=FvhWYU8AAAAJ", "or_profile": "~Lovisa_Hagstr\u00f6m1;~Denitsa_Saynova1;~Tobias_Norlund1;~Moa_Johansson1;~Richard_Johansson1", "aff": "Chalmers University of Technology;Chalmers University of Technology;Chalmers University of Technology;Chalmers University of Technology;University of Gothenburg", "aff_domain": "chalmers.se;chalmers.se;chalmers.se;chalmers.se;gu.se", "position": "PhD student;PhD student;PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nhagstr{\\\"o}m2023the,\ntitle={The Effect of Scaling, Retrieval Augmentation and Form on the Factual Consistency of Language Models},\nauthor={Lovisa Hagstr{\\\"o}m and Denitsa Saynova and Tobias Norlund and Moa Johansson and Richard Johansson},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=g0wzziJSmN}\n}", "github": "", "project": "", "reviewers": "Moz1;Urmf;91nZ", "site": "https://openreview.net/forum?id=g0wzziJSmN", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;3", "excitement": "4;4;3", "reproducibility": "4;4;4", "correctness": "3;3;3", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-1268-8020;0000-0002-5974-8094;;0000-0002-1097-8278;0000-0002-9429-4884", "linkedin": "lovisa-hagstrom?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3B6PdtqlgFS9mBSGp95rIY3g%3D%3D;;;;", "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Chalmers University of Technology;University of Gothenburg", "aff_unique_dep": ";", "aff_unique_url": "https://www.chalmers.se;https://www.gu.se", "aff_unique_abbr": "Chalmers;GU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Sweden" }, { "id": "g1LLeiHX0P", "title": "Representative Demonstration Selection for In-Context Learning with Two-Stage Determinantal Point Process", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Although In-Context Learning has proven effective across a broad array of tasks, its ef\ufb01ciency is noticeably in\ufb02uenced by the selection of demonstrations. Existing methods tend to select different demonstrations for each test instance, which is time-consuming and poses limitations in practical scenarios. Therefore, this study aims to address the challenge of selecting a representative subset of in-context demonstrations that can effectively prompt different test instances in a speci\ufb01c task. We propose that this representative subset should be of high quality and diversity. Our empirical analyses con\ufb01rm that demonstrations that meet these criteria can indeed bolster model performance. To satisfy these criteria, this paper further introduces a two-stage Determinantal Point Process (DPP) method designed to incorporate both quality and diversity in the process of demonstration selection, thereby obtaining representative in-context demonstrations. Through comprehensive experimentation, we have con\ufb01rmed the ef\ufb01cacy of our proposed method, paving the way for more practical and effective In-Context Learning.", "keywords": "In-Context Learning;Representative Subset Selection;Determinantal Point Process", "primary_area": "", "supplementary_material": "", "author": "Zhao Yang;Yuanzhe Zhang;Dianbo Sui;Cao Liu;Jun Zhao;Kang Liu", "authorids": "~Zhao_Yang2;~Yuanzhe_Zhang1;~Dianbo_Sui1;~Cao_Liu1;~Jun_Zhao4;~Kang_Liu1", "gender": "M;M;M;M;M;M", "homepage": ";https://yuanzhe-zhang.github.io/;;;http://nlpr-web.ia.ac.cn/cip/english/~junzhao/index.html;http://www.nlpr.ia.ac.cn/cip/~liukang/index.html", "dblp": "21/2326-4;141/4448;254/8270;26/6730;https://dblp.uni-trier.de/pid/47/2026-1.html;42/4903.html", "google_scholar": "tp5Ez4AAAAAJ;H4GYRx8AAAAJ;yi639zEAAAAJ;;https://scholar.google.com.hk/citations?user=HljRttwAAAAJ;DtZCfl0AAAAJ", "or_profile": "~Zhao_Yang2;~Yuanzhe_Zhang1;~Dianbo_Sui1;~Cao_Liu1;~Jun_Zhao4;~Kang_Liu1", "aff": "Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Harbin Institute of Technology;;Institute of automation, Chinese academy of science;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;ia.ac.cn;hit.edu.cn;;nlpr.ia.ac.cn;ia.ac.cn", "position": "PhD student;Associate Professor;Lecturer;;Full Professor;Professor", "bibtex": "@inproceedings{\nyang2023representative,\ntitle={Representative Demonstration Selection for In-Context Learning with Two-Stage Determinantal Point Process},\nauthor={Zhao Yang and Yuanzhe Zhang and Dianbo Sui and Cao Liu and Jun Zhao and Kang Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=g1LLeiHX0P}\n}", "github": "", "project": "", "reviewers": "twH8;4SPy;3tF3", "site": "https://openreview.net/forum?id=g1LLeiHX0P", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;4;3", "excitement": "3;4;3", "reproducibility": "2;3;3", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";;;;;", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Chinese Academy of Sciences;Harbin Institute of Technology", "aff_unique_dep": "Institute of Automation;", "aff_unique_url": "http://www.ia.cas.cn;http://www.hit.edu.cn/", "aff_unique_abbr": "CAS;HIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "g1Q9Uu8lCp", "title": "xDial-Eval: A Multilingual Open-Domain Dialogue Evaluation Benchmark", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recent advancements in reference-free learned metrics for open-domain dialogue evaluation have been driven by the progress in pre-trained language models and the availability of dialogue data with high-quality human annotations. However, current studies predominantly concentrate on English dialogues, and the generalization of these metrics to other languages has not been fully examined. This is largely due to the absence of a multilingual dialogue evaluation benchmark. To address the issue, we introduce xDial-Eval, built on top of open-source English dialogue evaluation datasets. xDial-Eval includes 12 turn-level and 6 dialogue-level English datasets, comprising 14930 annotated turns and 8691 annotated dialogues respectively. The English dialogue data are extended to nine other languages with commercial machine translation systems. On xDial-Eval, we conduct comprehensive analyses of previous BERT-based metrics and the recently-emerged large language models. Lastly, we establish strong self-supervised and multilingual baselines. In terms of average Pearson correlations over all datasets and languages, the best baseline outperforms OpenAI's ChatGPT by absolute improvements of 6.5% and 4.6% at the turn and dialogue levels respectively, albeit with much fewer parameters. The data and code are publicly available at https://github.com/e0397123/xDial-Eval.", "keywords": "Automatic Dialogue Evaluation;Multilingual Dialogue;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "Chen Zhang;Luis Fernando D'Haro;chengguang tang;Ke Shi;Guohua Tang;Haizhou Li", "authorids": "~Chen_Zhang8;~Luis_Fernando_D'Haro2;~chengguang_tang1;~Ke_Shi2;~Guohua_Tang1;~Haizhou_Li3", "gender": "M;M;M;M;M;M", "homepage": "https://chen-zhang-shehong.github.io/;https://blogs.upm.es/gthau/luis-fernando-dharo/;https://github.com/chengguangtang;;http://ir.hit.edu.cn/~tangguohua/;https://colips.org/~eleliha/", "dblp": ";57/1419.html;264/5495.html;;;36/4118", "google_scholar": "XOj25XAAAAAJ;https://scholar.google.com.sg/citations?user=SCFRL80AAAAJ;9dMZi6_pT94C;FL-KoM8AAAAJ;;https://scholar.google.com.sg/citations?user=z8_x7C8AAAAJ", "or_profile": "~Chen_Zhang8;~Luis_Fernando_D'Haro2;~chengguang_tang1;~Ke_Shi2;~Guohua_Tang1;~Haizhou_Li3", "aff": "National University of Singapore;Universidad Polit\u00e9cnica de Madrid;Tencent AI Lab;Tencent AI Lab;Tencent AI Lab;National University of Singapore", "aff_domain": "u.nus.edu;upm.es;tencent.com;tencent.com;tencent.com;nus.edu.sg", "position": "PhD student;Associate Professor;Researcher;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nzhang2023xdialeval,\ntitle={xDial-Eval: A Multilingual Open-Domain Dialogue Evaluation Benchmark},\nauthor={Chen Zhang and Luis Fernando D'Haro and chengguang tang and Ke Shi and Guohua Tang and Haizhou Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=g1Q9Uu8lCp}\n}", "github": "", "project": "", "reviewers": "8yTD;SAJN;sT3H", "site": "https://openreview.net/forum?id=g1Q9Uu8lCp", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;3", "excitement": "3;3;2", "reproducibility": "3;2;3", "correctness": "3;3;2", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-2406-8734;0000-0002-3411-7384;;;;0000-0001-9158-9401", "linkedin": "czhang017/;lfdharo/?originalSubdomain=es;;;;haizhou-li-4ba74b6/", "aff_unique_index": "0;1;2;2;2;0", "aff_unique_norm": "National University of Singapore;Universidad Polit\u00e9cnica de Madrid;Tencent", "aff_unique_dep": ";;Tencent AI Lab", "aff_unique_url": "https://www.nus.edu.sg;https://www.upm.es;https://ai.tencent.com", "aff_unique_abbr": "NUS;UPM;Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;2;2;0", "aff_country_unique": "Singapore;Spain;China" }, { "id": "g3VOQpuqlF", "title": "Adapting Pretrained Text-to-Text Models for Long Text Sequences", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We present an empirical study of adapting an existing pretrained text-to-text model for long-sequence inputs. Through a comprehensive study along three axes of the pretraining pipeline -- model architecture, optimization objective, and pretraining corpus, we propose an effective recipe to build long-context models from existing short-context models. Specifically, we replace the full attention in transformers with \\textit{pooling-augmented blockwise attention}, and pretrain the model with a masked-span prediction task with spans of varying lengths. In terms of the pretraining corpus, we find that using randomly concatenated short-documents from a large open-domain corpus results in better performance than using existing long document corpora, which are typically limited in their domain coverage. With these findings, we build a long-context model that achieves competitive performance on long-text QA tasks and establishes the new state of the art on \\emph{five} long-text summarization datasets, often outperforming previous methods with larger model sizes.", "keywords": "long context;summarization", "primary_area": "", "supplementary_material": "", "author": "Wenhan Xiong;Anchit Gupta;Shubham Toshniwal;Yashar Mehdad;Scott Yih", "authorids": "~Wenhan_Xiong1;~Anchit_Gupta2;~Shubham_Toshniwal1;~Yashar_Mehdad2;~Scott_Yih1", "gender": "M;;;;", "homepage": "https://xwhan.github.io;;;;", "dblp": "203/8542;176/5620;;;", "google_scholar": ";https://scholar.google.co.in/citations?user=L5y3GwgAAAAJ;;;", "or_profile": "~Wenhan_Xiong1;~Anchit_Gupta2;~Shubham_Toshniwal1;~Yashar_Mehdad2;~Scott_Yih1", "aff": "Meta Facebook;Meta Facebook;;;", "aff_domain": "fb.com;fb.com;;;", "position": "Researcher;Researcher;;;", "bibtex": "@inproceedings{\nxiong2023adapting,\ntitle={Adapting Pretrained Text-to-Text Models for Long Text Sequences},\nauthor={Wenhan Xiong and Anchit Gupta and Shubham Toshniwal and Yashar Mehdad and Scott Yih},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=g3VOQpuqlF}\n}", "github": "", "project": "", "reviewers": "kAsi;eLL5;Z5kL", "site": "https://openreview.net/forum?id=g3VOQpuqlF", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;4", "excitement": "3;4;4", "reproducibility": "3;1;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.0, "replies_avg": 5, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;;", "aff_unique_index": "0;0", "aff_unique_norm": "Meta", "aff_unique_dep": "Meta Platforms, Inc.", "aff_unique_url": "https://meta.com", "aff_unique_abbr": "Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "g3faCfrwm7", "title": "Just Ask for Calibration: Strategies for Eliciting Calibrated Confidence Scores from Language Models Fine-Tuned with Human Feedback", "track": "main", "status": "Short Main", "tldr": "", "abstract": "A trustworthy real-world prediction system should produce well-calibrated confidence scores; that is, its confidence in an answer should be indicative of the likelihood that the answer is correct, enabling deferral to an expert in cases of low-confidence predictions. Recent studies have shown that unsupervised pre-training produces large language models (LMs) whose conditional probabilities are remarkably well-calibrated. However, the most widely-used LMs are fine-tuned with reinforcement learning from human feedback (RLHF-LMs), and some studies have suggested that RLHF-LMs produce conditional probabilities that are very poorly calibrated. In light of this perceived weakness, we conduct a broad evaluation of methods for extracting confidence scores from RLHF-LMs. For RLHF-LMs such as ChatGPT, GPT-4, and Claude, we find that verbalized confidences emitted as output tokens are typically better-calibrated than the model's conditional probabilities on the TriviaQA, SciQ, and TruthfulQA benchmarks, often reducing the expected calibration error by a relative 50%.", "keywords": "calibration;RLHF;language model;verbalized probability", "primary_area": "", "supplementary_material": "", "author": "Katherine Tian;Eric Mitchell;Allan Zhou;Archit Sharma;Rafael Rafailov;Huaxiu Yao;Chelsea Finn;Christopher D Manning", "authorids": "~Katherine_Tian1;~Eric_Mitchell1;~Allan_Zhou1;~Archit_Sharma1;~Rafael_Rafailov1;~Huaxiu_Yao1;~Chelsea_Finn1;~Christopher_D_Manning1", "gender": "F;M;;M;M;M;F;M", "homepage": ";https://ericmitchell.ai;http://bland.website;;https://rmrafailov.github.io/;http://huaxiuyao.mystrikingly.com;https://ai.stanford.edu/~cbfinn/;https://nlp.stanford.edu/~manning/", "dblp": ";238/0419;195/6907;220/3163.html;272/5358;197/1635;131/1783;m/ChristopherDManning", "google_scholar": ";q77J4fgAAAAJ;;_0IIzxgAAAAJ;TwABcRgAAAAJ;A20BZnQAAAAJ;vfPE6hgAAAAJ;1zmDOdwAAAAJ", "or_profile": "~Katherine_Tian1;~Eric_Mitchell1;~Allan_Zhou1;~Archit_Sharma1;~Rafael_Rafailov1;~Huaxiu_Yao1;~Chelsea_Finn1;~Christopher_D_Manning1", "aff": "Harvard University;Stanford University;Google Deepmind;Stanford University;Stanford University;Computer Science Department, Stanford University;Google;Computer Science Department, Stanford University", "aff_domain": "harvard.edu;stanford.edu;google.com;stanford.edu;stanford.edu;cs.stanford.edu;google.com;cs.stanford.edu", "position": "Undergrad student;PhD student;Intern;Graduate Student;PhD student;Postdoc;Research Scientist;Full Professor", "bibtex": "@inproceedings{\ntian2023just,\ntitle={Just Ask for Calibration: Strategies for Eliciting Calibrated Confidence Scores from Language Models Fine-Tuned with Human Feedback},\nauthor={Katherine Tian and Eric Mitchell and Allan Zhou and Archit Sharma and Rafael Rafailov and Huaxiu Yao and Chelsea Finn and Christopher D Manning},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=g3faCfrwm7}\n}", "github": "", "project": "", "reviewers": "B7ZR;P3om;M6yR;qYcs", "site": "https://openreview.net/forum?id=g3faCfrwm7", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;4;4;4", "excitement": "3;4;3;4", "reproducibility": "4;4;3;4", "correctness": "3;4;3;4", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 3.5, "reproducibility_avg": 3.75, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-7487-1744;;;;;;0000-0001-6155-649X", "linkedin": "katherine-tian/;;;;;huaxiuyao/;;christopher-manning-011575/", "aff_unique_index": "0;1;2;1;1;1;3;1", "aff_unique_norm": "Harvard University;Stanford University;DeepMind;Google", "aff_unique_dep": ";;DeepMind;Google", "aff_unique_url": "https://www.harvard.edu;https://www.stanford.edu;https://deepmind.com;https://www.google.com", "aff_unique_abbr": "Harvard;Stanford;DeepMind;Google", "aff_campus_unique_index": "1;1;1;1;2;1", "aff_campus_unique": ";Stanford;Mountain View", "aff_country_unique_index": "0;0;1;0;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "g4FAvRcSuf", "title": "Self-Supervised Behavior Cloned Transformers are Path Crawlers for Text Games", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "In this work, we introduce a self-supervised behavior cloning transformer for text games, which are challenging benchmarks for multi-step reasoning in virtual environments. Traditionally, Behavior Cloning Transformers excel in such tasks but rely on supervised training data. Our approach auto-generates training data by exploring trajectories (defined by common macro-action sequences) that lead to reward within the games, while determining the generality and utility of these trajectories by rapidly training small models then evalauating their performance on unseen development games. Through empirical analysis, we show our method consistently uncovers generalizable training data, achieving about 90\\% performance of supervised systems across three benchmark text games.", "keywords": "text games;reinforcement learning;behavior cloning;self-supervision", "primary_area": "", "supplementary_material": "", "author": "Ruoyao Wang;Peter Jansen", "authorids": "~Ruoyao_Wang1;~Peter_Jansen1", "gender": "M;", "homepage": "https://wsxzwps.github.io/;http://www.cognitiveai.org", "dblp": ";72/5962", "google_scholar": ";wc1Hbl8AAAAJ", "or_profile": "~Ruoyao_Wang1;~Peter_Jansen1", "aff": "University of Arizona;University of Arizona", "aff_domain": "arizona.edu;arizona.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwang2023selfsupervised,\ntitle={Self-Supervised Behavior Cloned Transformers are Path Crawlers for Text Games},\nauthor={Ruoyao Wang and Peter Jansen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=g4FAvRcSuf}\n}", "github": "", "project": "", "reviewers": "TBi2;AHeS;U234", "site": "https://openreview.net/forum?id=g4FAvRcSuf", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;2", "excitement": "3;2;2", "reproducibility": "4;4;4", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 2.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "University of Arizona", "aff_unique_dep": "", "aff_unique_url": "https://www.arizona.edu", "aff_unique_abbr": "UA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "g84UrdUwBA", "title": "Harnessing Black-Box Control to Boost Commonsense in LM's Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) such as GPT-3 have demonstrated a strong capability to generate coherent and contextually relevant text. However, amidst their successes, a crucial issue persists: their generated outputs still lack commonsense at times. \nMoreover, fine-tuning the entire LLM towards more commonsensical outputs is computationally expensive if not infeasible.\nIn this paper, we present a computation-efficient framework that steers a frozen Pre-Trained Language Model (PTLM) towards more commonsensical generation (i.e., producing a plausible output that incorporates a list of concepts in a meaningful way). Specifically, we first construct a reference-free evaluator that assigns a sentence with a commonsensical score by grounding the sentence to a dynamic commonsense knowledge base from four different relational aspects. We then use the scorer as the oracle for commonsense knowledge, and extend the controllable generation method called NADO to train an auxiliary head that guides a fixed PTLM to better satisfy the oracle.\nWe test our framework on a series of GPT-2-, Flan-T5-, and Alpaca-based language models (LMs) on two constrained concept-to-sentence benchmarks. Human evaluation results demonstrate that our method consistently leads to the most commonsensical outputs.", "keywords": "controllable text generation;generative commonsense reasoning", "primary_area": "", "supplementary_material": "", "author": "Yufei Tian;Felix Zhang;Nanyun Peng", "authorids": "~Yufei_Tian1;~Felix_Zhang2;~Nanyun_Peng1", "gender": ";;F", "homepage": ";;https://violetpeng.github.io/", "dblp": ";;117/4036", "google_scholar": ";;XxRXvX0AAAAJ", "or_profile": "~Yufei_Tian1;~Felix_Zhang2;~Nanyun_Peng1", "aff": ";University of California, Los Angeles;University of California, Los Angeles", "aff_domain": ";ucla.edu;ucla.edu", "position": ";MS student;Assistant Professor", "bibtex": "@inproceedings{\ntian2023harnessing,\ntitle={Harnessing Black-Box Control to Boost Commonsense in {LM}'s Generation},\nauthor={Yufei Tian and Felix Zhang and Nanyun Peng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=g84UrdUwBA}\n}", "github": "", "project": "", "reviewers": "bDkz;DEwH;Kqgt", "site": "https://openreview.net/forum?id=g84UrdUwBA", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "3;4;3", "reproducibility": "4;4;2", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";~fe/;", "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "g8OqNaz6dY", "title": "Efficient Algorithms for Recognizing Weighted Tree-Adjoining Languages", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The class of tree-adjoining languages can be characterized by various two-level formalisms, consisting of a context-free grammar (CFG) or pushdown automaton (PDA) controlling another CFG or PDA. These four formalisms are equivalent to tree-adjoining grammars (TAG), linear indexed grammars (LIG), pushdown-adjoining automata (PAA), and embedded pushdown automata (EPDA). We define semiring-weighted versions of the above two-level formalisms, and we design new algorithms for computing their stringsums (the weight of all derivations of a string) and allsums (the weight of all derivations). From these, we also immediately obtain stringsum and allsum algorithms for TAG, LIG, PAA, and EPDA. For LIG, our algorithm is more time-efficient by a factor of $\\mathcal{O}(n|\\mathcal{N}|)$ (where $n$ is the string length and $|\\mathcal{N}|$ is the size of the nonterminal set) and more space-efficient by a factor of $\\mathcal{O}(|\\Gamma|)$ (where $\\Gamma$ is the size of the stack alphabet) than the algorithm of Vijay-Shanker and Weir (1989). For EPDA, our algorithm is both more space-efficient and time-efficient than the algorithm of Alonso et al. (2001) by factors of $\\mathcal{O}(|\\Gamma|^2)$ and $\\mathcal{O}(|\\Gamma|^3)$, respectively. Finally, we give the first PAA stringsum and allsum algorithms.", "keywords": "algorithms;parsing;semirings;tree adjoining grammars;linear indexed grammars;embedded pushdown automata", "primary_area": "", "supplementary_material": "", "author": "Alexandra Butoi;Tim Vieira;Ryan Cotterell;David Chiang", "authorids": "~Alexandra_Butoi1;~Tim_Vieira1;~Ryan_Cotterell1;~David_Chiang1", "gender": "F;M;M;Not Specified", "homepage": ";http://timvieira.github.io;https://nd.edu/~dchiang;https://rycolab.io/", "dblp": "331/2281;127/0214;https://dblp.org/pers/hd/c/Chiang_0001:David;146/4361.html", "google_scholar": "rlgyxLMAAAAJ;Avtv7FkAAAAJ;dok0514AAAAJ;DexOqtoAAAAJ", "or_profile": "~Alexandra_Butoi1;~Tim_Vieira1;~David_Chiang1;~Ryan_D_Cotterell1", "aff": "ETHZ - ETH Zurich;Johns Hopkins University;University of Notre Dame;Swiss Federal Institute of Technology", "aff_domain": "ethz.ch;johnshopkins.edu;nd.edu;ethz.ch", "position": "MS student;PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nbutoi2023efficient,\ntitle={Efficient Algorithms for Recognizing Weighted Tree-Adjoining Languages},\nauthor={Alexandra Butoi and Tim Vieira and Ryan Cotterell and David Chiang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=g8OqNaz6dY}\n}", "github": "", "project": "", "reviewers": "VNxE;1qtB;F4Ew", "site": "https://openreview.net/forum?id=g8OqNaz6dY", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;4", "excitement": "4;4;3", "reproducibility": "0;0;5", "correctness": "4;5;3", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 1.6666666666666667, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-2043-1073;0000-0002-0435-4864;", "linkedin": ";tim-vieira-608b0396/;;", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "ETH Zurich;Johns Hopkins University;University of Notre Dame;Swiss Federal Institute of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ethz.ch;https://www.jhu.edu;https://www.nd.edu;https://www.ethz.ch", "aff_unique_abbr": "ETHZ;JHU;Notre Dame;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Switzerland;United States" }, { "id": "gAzBhetShk", "title": "Exploring Chain of Thought Style Prompting for Text-to-SQL", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In-context learning with large language models (LLMs) has recently caught increasing attention due to its superior few-shot performance on various tasks. However, its performance on text-to-SQL parsing still has much room for improvement. In this paper, we hypothesize that a crucial aspect of LLMs to improve for text-to-SQL parsing is their multi-step reasoning ability. Thus, we systematically study how to enhance LLMs' reasoning ability through chain of thought (CoT) style prompting, including the original chain-of-thought prompting and least-to-most prompting. Our experiments demonstrate that iterative prompting as in least-to-most prompting may be unnecessary for text-to-SQL parsing, and using detailed reasoning steps tends to have more error propagation issues. Based on these findings, we propose a new CoT-style prompting method for text-to-SQL parsing. It brings 5.2 and 6.5 point absolute gains on the Spider development set and the Spider Realistic set, respectively, compared to the standard prompting method without reasoning steps; 2.4 and 1.5 point absolute gains, compared to the least-to-most prompting method.", "keywords": "Text-to-Sql;Prompt engineering;Chain-of-Thought;Least-to-Most;Question Decomposition.", "primary_area": "", "supplementary_material": "", "author": "Chang-Yu Tai;Ziru Chen;TIANSHU ZHANG;Xiang Deng;Huan Sun", "authorids": "~Chang-Yu_Tai1;~Ziru_Chen1;~TIANSHU_ZHANG1;~Xiang_Deng2;~Huan_Sun1", "gender": "M;M;;M;F", "homepage": "https://www.semanticscholar.org/author/Chang-You-Tai/77145310;https://ronch99.github.io/;;https://xiang-deng.github.io/;https://u.osu.edu/ihudas/people/", "dblp": "247/1322.html;200/8335;118/6742-1.html;95/4545-1;33/2952-1.html", "google_scholar": ";1-pt7zMAAAAJ;;d-qpndsAAAAJ;wIFkulcAAAAJ", "or_profile": "~Chang-Yu_Tai1;~Ziru_Chen1;~TIANSHU_ZHANG1;~Xiang_Deng2;~Huan_Sun1", "aff": "The Ohio StateUniversity;Ohio State University, Columbus;Ohio State University, Columbus;Ohio State University;The Ohio State University, Columbus", "aff_domain": "cse.ohio-state.edu;osu.edu;osu.edu;osu.edu;osu.edu", "position": "MS student;PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\ntai2023exploring,\ntitle={Exploring Chain of Thought Style Prompting for Text-to-{SQL}},\nauthor={Chang-Yu Tai and Ziru Chen and TIANSHU ZHANG and Xiang Deng and Huan Sun},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=gAzBhetShk}\n}", "github": "", "project": "", "reviewers": "vQpJ;Cceb;DzXr", "site": "https://openreview.net/forum?id=gAzBhetShk", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "4;3;2", "reproducibility": "4;4;3", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;;huan-sun-81527924/?originalSubdomain=cn", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Ohio State University", "aff_unique_dep": "", "aff_unique_url": "https://www.osu.edu", "aff_unique_abbr": "OSU", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Columbus", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "gBI7thSo0X", "title": "Values, Ethics, Morals? On the Use of Moral Concepts in NLP Research", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "With language technology increasingly affecting individuals' lives, many recent works have investigated the ethical aspects of NLP. Among other topics, researchers focused on the notion of morality, investigating, for example, which moral judgements language models make. \nHowever, there has been little to no discussion of the terminology and the theories underpinning those efforts and their implications. This lack is highly problematic, as it hides the works' underlying assumptions and hinders a thorough and targeted scientific debate of morality in NLP. In this work, we address this research gap by (a)\u00a0providing an overview of some important ethical concepts stemming from philosophy and (b) systematically surveying the existing literature on moral NLP w.r.t. their philosophical foundation, terminology, and data basis. For instance, we analyse what ethical theory an approach is based on, how this decision is justified, and what implications it entails. Our findings surveying 92 papers show that, for instance, most papers neither provide a clear definition of the terms they use nor adhere to definitions from philosophy. Finally, (c) we give three recommendations for future research in the field. We hope our work will lead to a more informed, careful, and sound discussion of morality in language technology.", "keywords": "Ethics;Morality;Moral Values;Natural Language Processing;Survey", "primary_area": "", "supplementary_material": "", "author": "Karina Vida;Judith Simon;Anne Lauscher", "authorids": "~Karina_Vida1;~Judith_Simon2;~Anne_Lauscher1", "gender": ";;", "homepage": ";https://www.inf.uni-hamburg.de/en/inst/ab/eit.html;", "dblp": ";;209/6857", "google_scholar": ";;https://scholar.google.it/citations?user=IbJS3UEAAAAJ", "or_profile": "~Karina_Vida1;~Judith_Simon2;~Anne_Lauscher1", "aff": "Universit\u00e4t Hamburg;Universit\u00e4t Hamburg;Universit\u00e4t Hamburg", "aff_domain": "uni-hamburg.de;uni-hamburg.de;uni-hamburg.de", "position": "PhD student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nvida2023values,\ntitle={Values, Ethics, Morals? On the Use of Moral Concepts in {NLP} Research},\nauthor={Karina Vida and Judith Simon and Anne Lauscher},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=gBI7thSo0X}\n}", "github": "", "project": "", "reviewers": "fP8o;DdBe;gSJ6", "site": "https://openreview.net/forum?id=gBI7thSo0X", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "2;4;3", "reproducibility": "0;4;3", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-0550-5505;;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Hamburg", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-hamburg.de", "aff_unique_abbr": "UHH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "gGmccVXoy2", "title": "Decomposing Complex Queries for Tip-of-the-tongue Retrieval", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "When re-finding items, users who forget or are uncertain about identifying details often rely on creative strategies for expressing their information needs---complex queries that describe content elements (e.g., book characters or events), information beyond the document text (e.g., descriptions of book covers), or personal context (e.g., when they read a book). Standard retrieval models that rely on lexical or semantic overlap between query and document text are challenged in such retrieval settings, known as tip-of-the-tongue (TOT) retrieval.\nWe introduce a simple but effective framework for handling such complex queries by decomposing the query with an LLM into individual clues routing those as subqueries to specialized retrievers, and ensembling the results.\nOur approach takes advantage of off-the-shelf retrievers (e.g., CLIP for retrieving images of book covers) or incorporate retriever-specific logic (e.g., date constraints). We show that our framework incorporating query decomposition into retrievers can improve gold book recall up to 6\\% absolute gain for Recall@5 on a new collection of 14,441 real-world query-book pairs from an online community for resolving TOT inquiries.", "keywords": "information retrieval; large language models; query decomposition", "primary_area": "", "supplementary_material": "", "author": "Kevin Lin;Kyle Lo;Joseph E. Gonzalez;Dan Klein", "authorids": "~Kevin_Lin4;~Kyle_Lo1;~Joseph_E._Gonzalez1;~Dan_Klein1", "gender": "Not Specified;;M;", "homepage": "https://people.eecs.berkeley.edu/~kevinlin/;https://kyleclo.github.io/;http://eecs.berkeley.edu/~jegonzal;http://people.eecs.berkeley.edu/~klein/", "dblp": ";220/2020;61/8262;", "google_scholar": "InQnNGIAAAAJ;VJS12uMAAAAJ;https://scholar.google.com.tw/citations?user=gM2WW9UAAAAJ;", "or_profile": "~Kevin_Lin4;~Kyle_Lo1;~Joseph_E._Gonzalez1;~Dan_Klein1", "aff": "University of California, Berkeley;Allen Institute for Artificial Intelligence;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;allenai.org;berkeley.edu;berkeley.edu", "position": "PhD student;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nlin2023decomposing,\ntitle={Decomposing Complex Queries for Tip-of-the-tongue Retrieval},\nauthor={Kevin Lin and Kyle Lo and Joseph E. Gonzalez and Dan Klein},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=gGmccVXoy2}\n}", "github": "", "project": "", "reviewers": "fUsA;Tnm2;hexP", "site": "https://openreview.net/forum?id=gGmccVXoy2", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;3;4", "reproducibility": "4;4;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-2921-956X;", "linkedin": ";kylelo/;;dan-klein/", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of California, Berkeley;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.berkeley.edu;https://allenai.org", "aff_unique_abbr": "UC Berkeley;AI2", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "gI11vXg1W4", "title": "PRCA: Fitting Black-Box Large Language Models for Retrieval Question Answering via Pluggable Reward-Driven Contextual Adapter", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The Retrieval Question Answering (ReQA) task employs the retrieval-augmented framework, composed of a retriever and generator. The generators formulate the answer based on the documents retrieved by the retriever. Incorporating Large Language Models (LLMs) as generators is beneficial due to their advanced QA capabilities, but they are typically too large to be fine-tuned with budget constraints while some of them are only accessible via APIs. To tackle this issue and further improve ReQA performance, we propose a trainable Pluggable Reward-Driven Contextual Adapter (PRCA), keeping the generator as a black box. Positioned between the retriever and generator in a Pluggable manner, PRCA refines the retrieved information by operating in a token-autoregressive strategy via maximizing rewards of the reinforcement learning phase. Our experiments validate PRCA's effectiveness in enhancing ReQA performance on three datasets by up to 20% improvement to fit black-box LLMs into existing frameworks, demonstrating its considerable potential in the LLMs era.", "keywords": "Retrieval Question Answering;Black-Box LLMs;Retrieval Augmentation;Pluggable Reward-Driven Contextual Adapter", "primary_area": "", "supplementary_material": "", "author": "Haoyan Yang;Zhitao Li;Yong Zhang;Jianzong Wang;Ning Cheng;Ming Li;Jing Xiao", "authorids": "~Haoyan_Yang1;~Zhitao_Li1;~Yong_Zhang13;~Jianzong_Wang2;~Ning_Cheng2;~Ming_Li18;~Jing_Xiao7", "gender": "M;M;;M;M;M;", "homepage": "https://joyyang158.github.io;http://pingan.com;;https://largeaudiomodel.com/author/jianzong-wang/;https://largeaudiomodel.com/author/ning-cheng/;https://mingliiii.github.io/;", "dblp": ";;;70/8380;86/797-1;;", "google_scholar": "q8sCVVcAAAAJ;;;https://scholar.google.co.uk/citations?user=noi4qcUAAAAJ;;MpEoJegAAAAJ;", "or_profile": "~Haoyan_Yang1;~Zhitao_Li1;~Yong_Zhang13;~Jianzong_Wang2;~Ning_Cheng2;~Ming_Li18;~Jing_Xiao7", "aff": "BNU-HKBU United International College;Pingan Technology;;Pingan Technology;Pingan Technology;Texas A&M University - College Station;", "aff_domain": "uic.edu.cn;pingan.com.cn;;pingan.com.cn;pingan.com.cn;tamu.edu;", "position": "Undergrad student;Researcher;;Researcher;Principal Researcher;MS student;", "bibtex": "@inproceedings{\nyang2023prca,\ntitle={{PRCA}: Fitting Black-Box Large Language Models for Retrieval Question Answering via Pluggable Reward-Driven Contextual Adapter},\nauthor={Haoyan Yang and Zhitao Li and Yong Zhang and Jianzong Wang and Ning Cheng and Ming Li and Jing Xiao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=gI11vXg1W4}\n}", "github": "", "project": "", "reviewers": "JYC9;N8VF;bnUG", "site": "https://openreview.net/forum?id=gI11vXg1W4", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;3", "excitement": "3;3;4", "reproducibility": "3;2;3", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-9237-4231;;0009-0001-6491-4827;", "linkedin": "haoyan-yang;;;;;;", "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "United International College;PingAn Technology;Texas A&M University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uic.edu.hk;https://www.pingan.com;https://www.tamu.edu", "aff_unique_abbr": "UIC;;TAMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";College Station", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;United States" }, { "id": "gJXydPLBkt", "title": "QUDeval: The Evaluation of Questions Under Discussion Discourse Parsing", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Questions Under Discussion (QUD) is a versatile linguistic framework in which discourse progresses as continuously asking questions and answering them. Automatic parsing of a discourse to produce a QUD structure thus entails a complex question generation task: given a document and an answer sentence, generate a question that satisfies linguistic constraints of QUD and can be grounded in an anchor sentence in prior context. These questions are known to be curiosity-driven and open-ended. This work introduces the first framework for the automatic evaluation of QUD parsing, instantiating the theoretical constraints of QUD in a concrete protocol. We present QUDeval, a dataset of fine-grained evaluation of 2,190 QUD questions generated from both fine-tuned systems and LLMs. Using QUDeval, we show that satisfying all constraints of QUD is still challenging for modern LLMs, and that existing evaluation metrics poorly approximate parser quality. Encouragingly, human-authored QUDs are scored highly by our human evaluators, suggesting that there is headroom for further progress on language modeling to improve both QUD parsing and QUD evaluation.", "keywords": "discourse; questions under discussion; QUD; evaluation", "primary_area": "", "supplementary_material": "", "author": "Yating Wu;Ritika Rajesh Mangla;Greg Durrett;Junyi Jessy Li", "authorids": "~Yating_Wu1;~Ritika_Rajesh_Mangla1;~Greg_Durrett1;~Junyi_Jessy_Li2", "gender": "Not Specified;;M;F", "homepage": "https://lingchensanwen.github.io/;;http://www.cs.utexas.edu/~gdurrett/;https://jessyli.com", "dblp": "23/1500-2;;69/7968;148/9553", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=EpQ_sDEAAAAJ;tJGm3-YAAAAJ", "or_profile": "~Yating_Wu1;~Ritika_Rajesh_Mangla1;~Greg_Durrett1;~Junyi_Jessy_Li2", "aff": "University of Texas at Austin;University of Texas at Austin;University of Texas, Austin;University of Texas at Austin", "aff_domain": "utexas.edu;utexas.edu;utexas.edu;utexas.edu", "position": "PhD student;MS student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nwu2023qudeval,\ntitle={{QUD}eval: The Evaluation of Questions Under Discussion Discourse Parsing},\nauthor={Yating Wu and Ritika Rajesh Mangla and Greg Durrett and Junyi Jessy Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=gJXydPLBkt}\n}", "github": "", "project": "", "reviewers": "ttgK;WebN;FkHr", "site": "https://openreview.net/forum?id=gJXydPLBkt", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "4;2;4", "reproducibility": "3;4;3", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "wuyating;ritika-mangla/;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "gJZqSRfV21", "title": "ReLM: Leveraging Language Models for Enhanced Chemical Reaction Prediction", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Predicting chemical reactions, a fundamental challenge in chemistry, involves forecasting the resulting products from a given reaction process. Conventional techniques, notably those employing Graph Neural Networks (GNNs), are often limited by insufficient training data and their inability to utilize textual information, undermining their applicability in real-world applications. In this work, we propose **ReLM**, a novel framework that leverages the chemical knowledge encoded in language models (LMs) to assist GNNs, thereby enhancing the accuracy of real-world chemical reaction predictions. To further enhance the model's robustness and interpretability, we incorporate the confidence score strategy, enabling the LMs to self-assess the reliability of their predictions. Our experimental results demonstrate that ReLM improves the performance of state-of-the-art GNN-based methods across various chemical reaction datasets, especially in out-of-distribution settings. Codes are available at https://github.com/syr-cn/ReLM.", "keywords": "graph neural networks;chemical reaction", "primary_area": "", "supplementary_material": "", "author": "Yaorui Shi;An Zhang;Enzhi Zhang;Zhiyuan Liu;Xiang Wang", "authorids": "~Yaorui_Shi2;~An_Zhang2;~Enzhi_Zhang1;~Zhiyuan_Liu5;~Xiang_Wang6", "gender": "M;M;M;M;F", "homepage": ";;https://acharkq.github.io/;https://github.com/xiangwang1223;https://github.com/anzhang314", "dblp": ";;53/3245-10;31/2864-10;78/5581-3", "google_scholar": "EWU3rdIAAAAJ;;https://scholar.google.com.sg/citations?user=zF0AH64AAAAJ;https://scholar.google.com.sg/citations?user=HdhaQB0AAAAJ;https://scholar.google.com.sg/citations?user=BcX7GJcAAAAJ", "or_profile": "~Yaorui_Shi2;~Enzhi_Zhang1;~Zhiyuan_Liu5;~Xiang_Wang6;~AN_ZHANG1", "aff": "Xi'an Jiaotong University;Hokkaido University;National University of Singapore;University of Science and Technology of China;National University of Singapore", "aff_domain": "xjtu.edu.cn;hokudai.ac.jp;nus.edu.sg;ustc.edu.cn;nus.edu.sg", "position": "Undergrad student;PhD student;PhD student;Full Professor;Postdoc", "bibtex": "@inproceedings{\nshi2023relm,\ntitle={Re{LM}: Leveraging Language Models for Enhanced Chemical Reaction Prediction},\nauthor={Yaorui Shi and An Zhang and Enzhi Zhang and Zhiyuan Liu and Xiang Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=gJZqSRfV21}\n}", "github": "", "project": "", "reviewers": "i7LA;ZhrP;cAqR", "site": "https://openreview.net/forum?id=gJZqSRfV21", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;5", "excitement": "3;3;4", "reproducibility": "5;3;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 4.666666666666667, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-6421-0192;;0000-0002-6148-6329;", "linkedin": "https://www.linkedin.cn/incareer/in/ACoAADX8m7MBB85jekmcqEP6gMuGa_pp35cLmbo;;;;", "aff_unique_index": "0;1;2;3;2", "aff_unique_norm": "Xi'an Jiao Tong University;Hokkaido University;National University of Singapore;University of Science and Technology of China", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.xjtu.edu.cn;https://www.hokudai.ac.jp;https://www.nus.edu.sg;http://www.ustc.edu.cn", "aff_unique_abbr": "XJTU;Hokkaido U;NUS;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;2", "aff_country_unique": "China;Japan;Singapore" }, { "id": "gQUDsNE3Lh", "title": "HARE: Explainable Hate Speech Detection with Step-by-Step Reasoning", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "With the proliferation of social media, accurate detection of hate speech has become critical to ensure safety online.\nTo combat nuanced forms of hate speech, it is important to identify and thoroughly explain hate speech to help users understand its harmful effects. Recent benchmarks have attempted to tackle this issue by training generative models on free-text annotations of implications in hateful text. However, we find significant reasoning gaps in the existing annotations schemes, which may hinder the supervision of detection models. In this paper, we introduce a hate speech detection framework, **HARE**, which harnesses the reasoning capabilities of large language models (LLMs) to fill these gaps in explanations of hate speech, thus enabling effective supervision of detection models. Experiments on SBIC and Implicit Hate benchmarks show that our method, using model-generated data, consistently outperforms baselines, using existing free-text human annotations. Analysis demonstrates that our method enhances the explanation quality of trained models and improves generalization to unseen datasets. Our code is available at https://github.com/joonkeekim/hare-hate-speech.git.", "keywords": "Hate Speech Detection;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "Yongjin Yang;Joonkee Kim;Yujin Kim;Namgyu Ho;James Thorne;Se-Young Yun", "authorids": "~Yongjin_Yang1;~Joonkee_Kim1;~Yujin_Kim2;~Namgyu_Ho1;~James_Thorne1;~Se-Young_Yun1", "gender": "M;M;F;M;;M", "homepage": "https://yangyongjin.github.io/;;https://github.com/kimyuji;http://namgyu.com;https://jamesthorne.com;https://fbsqkd.github.io", "dblp": "159/8412;323/4661;128/3542;313/1580;204/1380;23/8862", "google_scholar": "qGVZm3sAAAAJ;LL9Yj54AAAAJ;17yTpxsAAAAJ;https://scholar.google.com/citations?view_op=list_works;hao9RrgAAAAJ;X_IAjb8AAAAJ", "or_profile": "~Yongjin_Yang1;~Joonkee_Kim1;~Yujin_Kim2;~Namgyu_Ho1;~James_Thorne1;~Se-Young_Yun1", "aff": "Seoul National University;Korea Advanced Institute of Science & Technology;KAIST, Graduate School of AI;Korea Advanced Institute of Science & Technology;KAIST;KAIST", "aff_domain": "snu.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "Undergrad student;MS student;MS student;MS student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nyang2023hare,\ntitle={{HARE}: Explainable Hate Speech Detection with Step-by-Step Reasoning},\nauthor={Yongjin Yang and Joonkee Kim and Yujin Kim and Namgyu Ho and James Thorne and Se-Young Yun},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=gQUDsNE3Lh}\n}", "github": "", "project": "", "reviewers": "dPPe;KSxq;gh8s", "site": "https://openreview.net/forum?id=gQUDsNE3Lh", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "4;3;2", "reproducibility": "4;3;5", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0005-3293-1904;;;;;", "linkedin": "yongjin-yang-0195a6184/;%EC%A4%80%EA%B8%B0-%EA%B9%80-5171831b3/;;itsnamgyu/;;seyoung-yun-395130ab/", "aff_unique_index": "0;1;2;1;1;1", "aff_unique_norm": "Seoul National University;Korea Advanced Institute of Science and Technology;KAIST", "aff_unique_dep": ";;Graduate School of AI", "aff_unique_url": "https://www.snu.ac.kr;https://www.kaist.ac.kr;https://www.kaist.edu", "aff_unique_abbr": "SNU;KAIST;KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "gQeZoe2j3v", "title": "Mulan: A Multi-Level Alignment Model for Video Question Answering", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Video Question Answering (VideoQA) aims to answer questions about the visual content of a video. Current methods mainly focus on improving joint representations of video and text. However, these methods pay little attention to the fine-grained semantic interaction between video and text. In this paper, we propose Mulan: a Multi-Level Alignment Model for Video Question Answering, which establishes alignment between visual and textual modalities at the object-level, frame-level, and video-level. Specifically, for object-level alignment, we propose a mask-guided visual feature encoding method and a visual-guided text description method to learn fine-grained spatial information. For frame-level alignment, we introduce the use of visual features from individual frames, combined with a caption generator, to learn overall spatial information within the scene. For video-level alignment, we propose an expandable ordinal prompt for textual descriptions, combined with visual features, to learn temporal information. Experimental results show that our method outperforms the state-of-the-art methods, even when utilizing the smallest amount of extra visual-language pre-training data and a reduced number of trainable parameters.", "keywords": "Video Question Answering;Multi-Level Alignment;Contrastive Learning", "primary_area": "", "supplementary_material": "", "author": "Yu Fu;Cong Cao;Yuling Yang;Yuhai Lu;Fangfang Yuan;Dakui Wang;Yanbing Liu", "authorids": "~Yu_Fu8;~Cong_Cao2;~Yuling_Yang2;~Yuhai_Lu1;~Fangfang_Yuan1;~Dakui_Wang1;~Yanbing_Liu1", "gender": "M;M;;M;F;M;M", "homepage": ";https://people.ucas.ac.cn/~caocong;;;;;https://people.ucas.edu.cn/~liuyanbing", "dblp": ";342/1223;;224/0755.html;07/10311.html;142/0190;", "google_scholar": ";;;;;;", "or_profile": "~Yu_Fu8;~Cong_Cao2;~Yuling_Yang2;~Yuhai_Lu1;~Fangfang_Yuan1;~Dakui_Wang1;~Yanbing_Liu1", "aff": "University of Chinese Academy of Sciences;Institute of Information Engineering\uff0cChinese Academy of Sciences;University of Chinese Academy of Sciences; Institute of Information Engineering,Chinese Academy of Sciences;;Institute of Information Engineering, Chinese Academy of Sciences;Institute of Information Engineering\uff0cChinese Academy of Sciences", "aff_domain": "ucas.ac.cn;iie.ac.cn;ucas.edu;iie.ac.cn;;iie.ac.cn;iie.ac.cn", "position": "MS student;Associate Professor;PhD student;Associate Professor;;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nfu2023mulan,\ntitle={Mulan: A Multi-Level Alignment Model for Video Question Answering},\nauthor={Yu Fu and Cong Cao and Yuling Yang and Yuhai Lu and Fangfang Yuan and Dakui Wang and Yanbing Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=gQeZoe2j3v}\n}", "github": "", "project": "", "reviewers": "eqr8;1gR7;YRUS;yuTo", "site": "https://openreview.net/forum?id=gQeZoe2j3v", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;4;4", "excitement": "2;4;3;3", "reproducibility": "4;2;2;3", "correctness": "2;4;3;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 2.75, "correctness_avg": 3.25, "replies_avg": 13, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-1881-1947;;;0000-0002-6368-8784;0009-0005-0000-5260;0000-0002-9653-073X", "linkedin": "%E7%85%9C-%E4%BB%98-6a3b99200;;https://www.linkedin.cn/incareer/in/ACoAAERuK0YB2PGdRKQBeAxjKmUaO0nEzGCzeBA;;;;", "aff_unique_index": "0;1;0;1;1;1", "aff_unique_norm": "University of Chinese Academy of Sciences;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Information Engineering", "aff_unique_url": "http://www.ucas.ac.cn;http://www.cas.cn", "aff_unique_abbr": "UCAS;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "gUKVyjoQBG", "title": "COHESENTIA: A Novel Benchmark of Incremental versus Holistic Assessment of Coherence in Generated Texts", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Coherence is a linguistic term that refers to the relations between small textual units (sentences, propositions), which make the text logically consistent and meaningful to the reader. With the advances of generative foundational models in NLP, there is a pressing need to automatically assess the human-perceived coherence of automatically generated texts.\nUp until now, little work has been done on explicitly assessing the coherence of generated texts and analyzing the factors contributing to (in)coherence. \nPrevious work on the topic used other tasks, e.g., sentence reordering, as proxies of coherence, rather than approaching coherence detection heads on. In this paper, we introduce {\\sc CoheSentia}, a novel benchmark of human-perceived coherence of automatically generated texts. \nOur annotation protocol reflects two perspectives; one is global, assigning a single coherence score, and the other is incremental, scoring sentence by sentence. The incremental method produces an (in)coherence score for each text fragment and also pinpoints reasons for incoherence at that point. Our benchmark contains 500 automatically-generated and human-annotated paragraphs, each annotated in both methods, by multiple raters. Our analysis shows that the inter-annotator agreement in the incremental mode is higher than in the holistic alternative, and our experiments show that standard LMs fine-tuned for coherence detection show varied performance on the different factors contributing to (in)coherence. All in all, these models yield unsatisfactory performance, emphasizing the need for developing more reliable methods for coherence assessment.", "keywords": "coherence;benchmark;cohesion;consistency;relevance;linguistic theory;gpt;nlp applications", "primary_area": "", "supplementary_material": "", "author": "Aviya Maimon;Reut Tsarfaty", "authorids": "~Aviya_Maimon1;~Reut_Tsarfaty1", "gender": "F;F", "homepage": ";", "dblp": ";21/3716", "google_scholar": ";", "or_profile": "~Aviya_Maimon1;~Reut_Tsarfaty1", "aff": "Bar-Ilan University;Bar-Ilan University, Technion", "aff_domain": "biu.ac.il;biu.ac.il", "position": "MS student;Associate Professor", "bibtex": "@inproceedings{\nmaimon2023cohesentia,\ntitle={{COHESENTIA}: A Novel Benchmark of Incremental versus Holistic Assessment of Coherence in Generated Texts},\nauthor={Aviya Maimon and Reut Tsarfaty},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=gUKVyjoQBG}\n}", "github": "", "project": "", "reviewers": "8DsP;8yo1;1hwA", "site": "https://openreview.net/forum?id=gUKVyjoQBG", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;5", "excitement": "4;3;4", "reproducibility": "4;4;5", "correctness": "4;4;5", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.333333333333333, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": "aviya-maimon;", "aff_unique_index": "0;0", "aff_unique_norm": "Bar-Ilan University", "aff_unique_dep": "", "aff_unique_url": "https://www.biu.ac.il", "aff_unique_abbr": "BIU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "id": "gVTtkPJbRq", "title": "GPT-4 as an Effective Zero-Shot Evaluator for Scientific Figure Captions", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "There is growing interest in systems that generate captions for scientific figures.\nHowever, assessing these systems' output poses a significant challenge.\nHuman evaluation requires academic expertise and is costly, while automatic evaluation depends on often low-quality author-written captions.\nThis paper investigates using large language models (LLMs) as a cost-effective, reference-free method for evaluating figure captions.\nWe first constructed SCICAP-EVAL, a human evaluation dataset that contains human judgments for 3,600 scientific figure captions, both original and machine-made, for 600 arXiv figures.\nWe then prompted LLMs like GPT-4 and GPT-3 to score (1-6) each caption based on its potential to aid reader understanding, given relevant context such as figure-mentioning paragraphs.\nResults show that GPT-4, used as a zero-shot evaluator, outperformed all other models and even surpassed assessments made by computer science undergraduates, achieving a Kendall correlation score of 0.401 with Ph.D. students' rankings.", "keywords": "text generation;scientific figure caption;caption evaluation", "primary_area": "", "supplementary_material": "", "author": "Ting-Yao Hsu;Chieh-Yang Huang;Ryan A. Rossi;Sungchul Kim;C. Lee Giles;Ting-Hao Kenneth Huang", "authorids": "~Ting-Yao_Hsu1;~Chieh-Yang_Huang1;~Ryan_A._Rossi2;~Sungchul_Kim1;~C._Lee_Giles1;~Ting-Hao_Kenneth_Huang1", "gender": "M;M;M;M;M;M", "homepage": "https://tingyaohsu.github.io;https://appleternity.github.io/chieh-yang/;https://sites.google.com/site/subright;https://clgiles.ist.psu.edu/;http://ryanrossi.com;http://kennethhuang.cc/", "dblp": ";190/5294;61/1573;g/CLeeGiles;17/5085;215/4581", "google_scholar": "rewH-4oAAAAJ;WSVl1oQAAAAJ;v8ISLgIAAAAJ;https://scholar.google.com.tw/citations?user=sAkg9T8AAAAJ;_Dc6lbQAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Ting-Yao_Hsu1;~Chieh-Yang_Huang1;~Sungchul_Kim1;~C._Lee_Giles1;~Ryan_Rossi1;~Ting-Hao_Huang1", "aff": "Pennsylvania State University;Pennsylvania State University;Adobe Systems;Pennsylvania State University;Adobe Research;Pennsylvania State University", "aff_domain": "psu.edu;psu.edu;adobe.com;ist.psu.edu;adobe.com;psu.edu", "position": "PhD student;PhD student;Researcher;Full Professor;Senior Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nhsu2023gpt,\ntitle={{GPT}-4 as an Effective Zero-Shot Evaluator for Scientific Figure Captions},\nauthor={Ting-Yao Hsu and Chieh-Yang Huang and Ryan A. Rossi and Sungchul Kim and C. Lee Giles and Ting-Hao Kenneth Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=gVTtkPJbRq}\n}", "github": "", "project": "", "reviewers": "whcN;Tt8o;EmJc", "site": "https://openreview.net/forum?id=gVTtkPJbRq", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;3;3", "reproducibility": "4;3;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0008-9082-6039;0009-0001-6736-9959;0000-0003-3580-5290;0000-0002-1931-585X;0000-0001-9758-0635;0000-0001-7021-4627", "linkedin": "tingyao-hsu/;chieh-yang-huang-4796a510b;;;;", "aff_unique_index": "0;0;1;0;1;0", "aff_unique_norm": "Pennsylvania State University;Adobe", "aff_unique_dep": ";Adobe Systems Incorporated", "aff_unique_url": "https://www.psu.edu;https://www.adobe.com", "aff_unique_abbr": "PSU;Adobe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "gWWjz9NBo9", "title": "PromptMix: A Class Boundary Augmentation Method for Large Language Model Distillation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Data augmentation is a widely used technique to address the problem of text classification when there is a limited amount of training data. Recent work often tackles this problem using large language models (LLMs) like GPT3 that can generate new examples given already available ones. In this work, we propose a method to generate more helpful augmented data by utilizing the LLM's abilities to follow instructions and perform few-shot classifications. Our specific PromptMix method consists of two steps: 1) generate challenging text augmentations near class boundaries; however, generating borderline examples increases the risk of false positives in the dataset, so we 2) relabel the text augmentations using a prompting-based LLM classifier to enhance the correctness of labels in the generated data. We evaluate the proposed method in challenging 2-shot and zero-shot settings on four text classification datasets: Banking77, TREC6, Subjectivity (SUBJ), and Twitter Complaints. Our experiments show that generating and, crucially, relabeling borderline examples facilitates the transfer of knowledge of a massive LLM like GPT3.5-turbo into smaller and cheaper classifiers like DistilBERT-base and BERT-base. Furthermore, 2-shot PromptMix outperforms multiple 5-shot data augmentation methods on the four datasets. Our code is available at https://github.com/ServiceNow/PromptMix-EMNLP-2023.", "keywords": "large language models;knowledge distillation;text classification;few-shot learning", "primary_area": "", "supplementary_material": "", "author": "Gaurav Sahu;Olga Vechtomova;Dzmitry Bahdanau;Issam H. Laradji", "authorids": "~Gaurav_Sahu2;~Olga_Vechtomova1;~Dzmitry_Bahdanau1;~Issam_H._Laradji1", "gender": "M;F;M;M", "homepage": "https://demfier.github.io;https://ov-research.uwaterloo.ca;;https://issamlaradji.github.io/", "dblp": "227/2467;64/3140;151/6504;142/0043", "google_scholar": "nMAt7UMAAAAJ;https://scholar.google.ca/citations?user=Ln1-_JIAAAAJ;https://scholar.google.ca/citations?user=Nq0dVMcAAAAJ;https://scholar.google.ca/citations?user=8vRS7F0AAAAJ", "or_profile": "~Gaurav_Sahu2;~Olga_Vechtomova1;~Dzmitry_Bahdanau1;~Issam_H._Laradji1", "aff": "University of Waterloo;University of Waterloo;ServiceNow Research;ServiceNow", "aff_domain": "uwaterloo.ca;uwaterloo.ca;servicenow.com;servicenow.com", "position": "PhD student;Full Professor;Research Scientist;Researcher", "bibtex": "@inproceedings{\nsahu2023promptmix,\ntitle={PromptMix: A Class Boundary Augmentation Method for Large Language Model Distillation},\nauthor={Gaurav Sahu and Olga Vechtomova and Dzmitry Bahdanau and Issam H. Laradji},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=gWWjz9NBo9}\n}", "github": "", "project": "", "reviewers": "ivSt;XgUZ;Z33a", "site": "https://openreview.net/forum?id=gWWjz9NBo9", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;5;3", "excitement": "3;4;4", "reproducibility": "3;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";olga-vechtomova-134ba91b7/;;issam-laradji-67ba1a99/", "aff_unique_index": "0;0;1;1", "aff_unique_norm": "University of Waterloo;ServiceNow", "aff_unique_dep": ";Research", "aff_unique_url": "https://uwaterloo.ca;https://www.servicenow.com", "aff_unique_abbr": "UW;ServiceNow", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1", "aff_country_unique": "Canada;United States" }, { "id": "gXq1cwkUZc", "title": "Query Rewriting in Retrieval-Augmented Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large Language Models (LLMs) play powerful, black-box readers in the retrieve-then-read pipeline, making remarkable progress in knowledge-intensive tasks. This work introduces a new framework, Rewrite-Retrieve-Read instead of the previous retrieve-then-read for the retrieval-augmented LLMs from the perspective of the query rewriting. Unlike prior studies focusing on adapting either the retriever or the reader, our approach pays attention to the adaptation of the search query itself, for there is inevitably a gap between the input text and the needed knowledge in retrieval. We first prompt an LLM to generate the query, then use a web search engine to retrieve contexts. Furthermore, to better align the query to the frozen modules, we propose a trainable scheme for our pipeline. A small language model is adopted as a trainable rewriter to cater to the black-box LLM reader. The rewriter is trained using the feedback of the LLM reader by reinforcement learning. Evaluation is conducted on downstream tasks, open-domain QA and multiple-choice QA. Experiments results show consistent performance improvement, indicating that our framework is proven effective and scalable, and brings a new framework for retrieval-augmented LLM.", "keywords": "large language model;retrieval augmentation;query rewriting.", "primary_area": "", "supplementary_material": "", "author": "Xinbei Ma;Yeyun Gong;Pengcheng He;hai zhao;Nan Duan", "authorids": "~Xinbei_Ma1;~Yeyun_Gong2;~Pengcheng_He2;~hai_zhao1;~Nan_Duan1", "gender": ";M;M;M;M", "homepage": ";;;http://bcmi.sjtu.edu.cn/~zhaohai/;https://nanduan.github.io/", "dblp": "301/8959;06/10400.html;116/8665;25/1145-1.html;", "google_scholar": "LpUi3EgAAAAJ;piUkwMYAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=4dU5KS0AAAAJ;Qaa6OxIAAAAJ", "or_profile": "~Xinbei_Ma1;~Yeyun_Gong2;~Pengcheng_He2;~hai_zhao1;~Nan_Duan1", "aff": "Shanghai Jiaotong University;Microsoft;Microsoft;Shanghai Jiaotong University;Microsoft Research Asia", "aff_domain": "sjtu.edu.cn;microsoft.com;microsoft.com;sjtu.edu.cn;microsoft.com", "position": "PhD student;Researcher;Principal Researcher;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nma2023query,\ntitle={Query Rewriting in Retrieval-Augmented Large Language Models},\nauthor={Xinbei Ma and Yeyun Gong and Pengcheng He and hai zhao and Nan Duan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=gXq1cwkUZc}\n}", "github": "", "project": "", "reviewers": "JBWk;rmM6;GP32;ksRg", "site": "https://openreview.net/forum?id=gXq1cwkUZc", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "4;3;4;3", "excitement": "2;3;3;3", "reproducibility": "2;4;4;2", "correctness": "2;3;4;4", "rating_avg": 5.0, "confidence_avg": 3.5, "excitement_avg": 2.75, "reproducibility_avg": 3.0, "correctness_avg": 3.25, "replies_avg": 12, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-1505-8603;;;;", "linkedin": ";;;;", "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "Shanghai Jiao Tong University;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.microsoft.com", "aff_unique_abbr": "SJTU;Microsoft", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "China;United States" }, { "id": "gZhvtIRu7i", "title": "MILDSum: A Novel Benchmark Dataset for Multilingual Summarization of Indian Legal Case Judgments", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Automatic summarization of legal case judgments is a practically important problem that has attracted substantial research efforts in many countries. In the context of the Indian judiciary, there is an additional complexity -- Indian legal case judgments are mostly written in complex English, but a significant portion of India's population lacks command of the English language. Hence, it is crucial to summarize the legal documents in Indian languages to ensure equitable access to justice. While prior research primarily focuses on summarizing legal case judgments in their source languages, this study presents a pioneering effort toward cross-lingual summarization of English legal documents into Hindi, the most frequently spoken Indian language. We construct the first high-quality legal corpus comprising of 3,122 case judgments from prominent Indian courts in English, along with their summaries in both English and Hindi, drafted by legal practitioners. We benchmark the performance of several diverse summarization approaches on our corpus and demonstrate the need for further research in cross-lingual summarization in the legal domain.", "keywords": "Cross-Lingual Summarization;Multilingual corpus for Summarization;Summarization-Translation Pipeline;Legal NLP", "primary_area": "", "supplementary_material": "", "author": "Debtanu Datta;Shubham Soni;Rajdeep Mukherjee;Saptarshi Ghosh", "authorids": "~Debtanu_Datta1;~Shubham_Soni1;~Rajdeep_Mukherjee1;~Saptarshi_Ghosh1", "gender": "M;M;M;M", "homepage": "https://sites.google.com/view/debtanudatta;https://www.linkedin.com/in/shubhamsonidev/;https://rajdeep345.github.io/;http://cse.iitkgp.ac.in/~saptarshi", "dblp": "359/0549.html;;124/3803;06/900-1", "google_scholar": "XCIFyoMAAAAJ;;https://scholar.google.com/citations?hl=en;https://scholar.google.co.in/citations?user=7TmKZv0AAAAJ", "or_profile": "~Debtanu_Datta1;~Shubham_Soni1;~Rajdeep_Mukherjee1;~Saptarshi_Ghosh1", "aff": "Indian Institute of Technology Kharagpur;Indian Institute of Technology, Kharagpur;Indian Institute of Technology Kharagpur;Indian Institute of Technology Kharagpur", "aff_domain": "iitkgp.ac.in;iitkgp.ac.in;iitkgp.ac.in;iitkgp.ac.in", "position": "PhD student;MS student;PhD student;Associate Professor", "bibtex": "@inproceedings{\ndatta2023mildsum,\ntitle={{MILDS}um: A Novel Benchmark Dataset for Multilingual Summarization of Indian Legal Case Judgments},\nauthor={Debtanu Datta and Shubham Soni and Rajdeep Mukherjee and Saptarshi Ghosh},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=gZhvtIRu7i}\n}", "github": "", "project": "", "reviewers": "eZYq;XjyQ;egHn", "site": "https://openreview.net/forum?id=gZhvtIRu7i", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;4", "excitement": "3;3;4", "reproducibility": "3;3;4", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0008-9568-2062;;0000-0002-2267-1695;", "linkedin": "debtanu-datta-56398a18b/;;rajdeepmukherjee89;saptarshi-ghosh-2aab3123/", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Indian Institute of Technology Kharagpur;Indian Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.iitkgp.ac.in;https://www.iitkgp.ac.in", "aff_unique_abbr": "IIT Kharagpur;IIT Kharagpur", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Kharagpur", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "India" }, { "id": "gZykO63OUh", "title": "DREAM: Deployment of Recombination and Ensembles in Argument Mining", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Current approaches to Argument Mining (AM) tend to take a holistic or black-box view of the overall pipeline. This paper, in contrast, aims to provide a solution to achieve increased performance based on current components instead of independent all-new solutions. To that end, it presents the Deployment of Recombination and Ensemble methods for Argument Miners (DREAM) framework that allows for the (automated) combination of AM components. Using ensemble methods, DREAM combines sets of AM systems to improve accuracy for the four tasks in the AM pipeline. Furthermore, it leverages recombination by using different argument miners elements throughout the pipeline. Experiments with five systems previously included in a benchmark show that the systems combined with DREAM can outperform the previous best single systems in terms of accuracy measured by an AM benchmark.", "keywords": "Argument Mining;Recombination;Ensemble Methods", "primary_area": "", "supplementary_material": "", "author": "Florian Ruosch;Cristina Sarasua;Abraham Bernstein", "authorids": "~Florian_Ruosch1;~Cristina_Sarasua2;~Abraham_Bernstein1", "gender": "M;;", "homepage": "https://www.ifi.uzh.ch/en/ddis/people/ruosch.html;;https://www.ifi.uzh.ch/en/ddis/people/bernstein.html", "dblp": "225/8177;91/7572;b/AbrahamBernstein", "google_scholar": "https://scholar.google.com/citations?hl=de;https://scholar.google.com/citations?hl=en;ZYNjTykAAAAJ", "or_profile": "~Florian_Ruosch1;~Cristina_Sarasua2;~Abraham_Bernstein1", "aff": "Department of Informatics, University of Zurich, University of Zurich;Koblenz University;Department of Informatics, University of Zurich, University of Zurich", "aff_domain": "ifi.uzh.ch;uni-koblenz.de;ifi.uzh.ch", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nruosch2023dream,\ntitle={{DREAM}: Deployment of Recombination and Ensembles in Argument Mining},\nauthor={Florian Ruosch and Cristina Sarasua and Abraham Bernstein},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=gZykO63OUh}\n}", "github": "", "project": "", "reviewers": "9Pww;tpwx;1CYs", "site": "https://openreview.net/forum?id=gZykO63OUh", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;2", "excitement": "4;3;3", "reproducibility": "5;5;3", "correctness": "4;4;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-0257-3318;;", "linkedin": ";;", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Zurich;Koblenz University", "aff_unique_dep": "Department of Informatics;", "aff_unique_url": "https://www.uzh.ch;https://www.uni-koblenz-landau.de", "aff_unique_abbr": "UZH;Uni Koblenz", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Switzerland;Germany" }, { "id": "gccSE5vDZ7", "title": "Multilingual Simplification of Medical Texts", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Automated text simplification aims to produce simple versions of complex texts. This task is especially useful in the medical domain, where the latest medical findings are typically communicated via complex and technical articles. This creates barriers for laypeople seeking access to up-to-date medical findings, consequently impeding progress on health literacy. Most existing work on medical text simplification has focused on monolingual settings, with the result that such evidence would be available only in just one language (most often, English). This work addresses this limitation via multilingual simplification, i.e., directly simplifying complex texts into simplified texts in multiple languages. We introduce MultiCochrane, the first sentence-aligned multilingual text simplification dataset for the medical domain in four languages: English, Spanish, French, and Farsi. We evaluate fine-tuned and zero-shot models across these languages with extensive human assessments and analyses. Although models can generate viable simplified texts, we identify several outstanding challenges that this dataset might be used to address.", "keywords": "Simplification;Medical Simplification;Multilingual", "primary_area": "", "supplementary_material": "", "author": "Sebastian Antony Joseph;Kathryn Kazanas;Keziah Reina;Vishnesh J Ramanathan;Wei Xu;Byron C Wallace;Junyi Jessy Li", "authorids": "~Sebastian_Antony_Joseph1;~Kathryn_Kazanas1;~Keziah_Reina1;~Vishnesh_J_Ramanathan1;~Wei_Xu5;~Byron_C_Wallace1;~Junyi_Jessy_Li2", "gender": "M;;;M;F;M;F", "homepage": "https://sebajoe.github.io;;;;https://cocoxu.github.io/;http://www.byronwallace.com/;https://jessyli.com", "dblp": "347/2026;;;;32/1213-4.html;00/8247;148/9553", "google_scholar": ";;;YNc31qMAAAAJ;BfOdG-oAAAAJ;KTzRHmwAAAAJ;tJGm3-YAAAAJ", "or_profile": "~Sebastian_Antony_Joseph1;~Kathryn_Kazanas1;~Keziah_Reina1;~Vishnesh_J_Ramanathan1;~Wei_Xu5;~Byron_C_Wallace1;~Junyi_Jessy_Li2", "aff": "University of Texas at Austin;University of Texas at Austin;University of Texas at Austin;Georgia Institute of Technology;Georgia Institute of Technology;Northeastern University;University of Texas at Austin", "aff_domain": "utexas.edu;utexas.edu;utexas.edu;gatech.edu;gatech.edu;northeastern.edu;utexas.edu", "position": "Undergrad student;Undergrad student;Undergrad student;Undergrad student;Associate Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\njoseph2023multilingual,\ntitle={Multilingual Simplification of Medical Texts},\nauthor={Sebastian Antony Joseph and Kathryn Kazanas and Keziah Reina and Vishnesh J Ramanathan and Wei Xu and Byron C Wallace and Junyi Jessy Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=gccSE5vDZ7}\n}", "github": "", "project": "", "reviewers": "dMiC;gRpt;XHER", "site": "https://openreview.net/forum?id=gccSE5vDZ7", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "4;3;4", "reproducibility": "2;4;5", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;", "linkedin": ";kathryn-kazanas-310bba239/;keziah-reina-985b8424b/;;;;", "aff_unique_index": "0;0;0;1;1;2;0", "aff_unique_norm": "University of Texas at Austin;Georgia Institute of Technology;Northeastern University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utexas.edu;https://www.gatech.edu;https://www.northeastern.edu", "aff_unique_abbr": "UT Austin;Georgia Tech;NEU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Austin;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "gd8TxhKoLv", "title": "PROTEGE: Prompt-based Diverse Question Generation from Web Articles", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Rich and diverse knowledge bases (KB) are foundational building blocks for online knowledge sharing communities such as StackOverflow and Quora, and applications such as conversational assistants (aka chatbots). A popular format for knowledge bases is question-answer pairs (or FAQs), where questions are designed to accurately match a multitude of queries. In this paper, we address the problem of automatic creation of such Q\\&A-based knowledge bases from domain-specific, long-form textual content (e.g., web articles). Specifically, we consider the problem of question generation, which is the task of generating questions given a paragraph of text as input, with a goal to achieve both diversity and fidelity of the generated questions. Towards this goal we propose PROTEGE, a diverse question generation framework which consists of (1) a novel encoder-decoder based Large Language Model (LLM) architecture which can take a variety of prompts and generate a diverse set of candidate questions, and (2) a hill-climbing algorithm that maximizes a sub-modular objective function to balance diversity with fidelity. Through our experiments on three popular public Q\\&A datasets, we demonstrate that PROTEGE improves diversity by +16% and fidelity by +8% over diverse beam search and prompt-based baselines.", "keywords": "Large Language Models;Question Generation;Question Answering;Diversity;Fidelity", "primary_area": "", "supplementary_material": "", "author": "Vinayak S Puranik;Anirban Majumder;Vineet Chaoji", "authorids": "~Vinayak_S_Puranik1;~Anirban_Majumder2;~Vineet_Chaoji1", "gender": "M;;M", "homepage": ";;", "dblp": ";;88/995", "google_scholar": ";;https://scholar.google.co.in/citations?user=sXPYZ-IAAAAJ", "or_profile": "~Vinayak_S_Puranik1;~Anirban_Majumder2;~Vineet_Chaoji1", "aff": "Amazon;;", "aff_domain": "amazon.com;;", "position": "Researcher;;", "bibtex": "@inproceedings{\npuranik2023protege,\ntitle={{PROTEGE}: Prompt-based Diverse Question Generation from Web Articles},\nauthor={Vinayak S Puranik and Anirban Majumder and Vineet Chaoji},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=gd8TxhKoLv}\n}", "github": "", "project": "", "reviewers": "aVs7;ypQJ;MjJZ", "site": "https://openreview.net/forum?id=gd8TxhKoLv", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;3;4", "excitement": "3;4;2", "reproducibility": "3;4;2", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "VINAYAK-PURANIK-3636971/;;vineetchaoji/", "aff_unique_index": "0", "aff_unique_norm": "Amazon", "aff_unique_dep": "Amazon.com, Inc.", "aff_unique_url": "https://www.amazon.com", "aff_unique_abbr": "Amazon", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "gdUBK65fwn", "title": "LLM-Adapters: An Adapter Family for Parameter-Efficient Fine-Tuning of Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The success of large language models (LLMs), like GPT-4 and ChatGPT, has led to the development of numerous cost-effective and accessible alternatives that are created by finetuning open-access LLMs with task-specific data (e.g., ChatDoctor) or instruction data (e.g., Alpaca). Among the various fine-tuning methods, adapter-based parameter-efficient fine-tuning (PEFT) is undoubtedly one of the most attractive topics, as it only requires fine-tuning a few external parameters instead of the entire LLMs while achieving comparable or even better performance. To enable further research on PEFT methods of LLMs, this paper presents LLM-Adapters, an easy-to-use framework that integrates various adapters into LLMs and can execute these adapter-based PEFT methods of LLMs for different tasks. The framework includes state-of-the-art open-access LLMs such as LLaMA, BLOOM, and GPT-J, as well as widely used adapters such as Series adapters, Parallel adapter, Prompt-based learning and Reparametrization-based methods. Moreover, we conduct extensive empirical studies on the impact of adapter types, placement locations, and hyper-parameters to the best design for each adapter-based methods. We evaluate the effectiveness of the adapters on fourteen datasets from two different reasoning tasks, Arithmetic Reasoning and Commonsense Reasoning. The results demonstrate that using adapter-based PEFT in smaller-scale LLMs (7B) with few extra trainable parameters yields comparable, and in some cases superior, performance to powerful LLMs (175B) in zero-shot inference on simple math reasoning datasets.", "keywords": "Parameter-Efficient Fine-Tuning;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "Zhiqiang Hu;Lei Wang;Yihuai Lan;Wanyu Xu;Ee-Peng Lim;Lidong Bing;Xing Xu;Soujanya Poria;Roy Ka-Wei Lee", "authorids": "~Zhiqiang_Hu3;~Lei_Wang28;~Yihuai_Lan1;~Wanyu_Xu1;~Ee-Peng_Lim1;~Lidong_Bing2;~Xing_Xu3;~Soujanya_Poria1;~Roy_Ka-Wei_Lee1", "gender": ";M;M;F;M;M;M;M;", "homepage": "https://hzq950419.github.io/HomePage/;https://demoleiwang.github.io/HomePage/;;;https://sis.smu.edu.sg/faculty/profile/9626;;https://soujanyaporia.github.io;https://www.socialai.studio/team;https://lidongbing.github.io", "dblp": ";;301/8144;318/2933;l/EePengLim.html;76/834-1.html;116/4904;139/2266;53/6625", "google_scholar": "vjQQUnwAAAAJ;VidA02oAAAAJ;;;https://scholar.google.com.tw/citations?user=r0wOAikAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.in/citations?user=oS6gRc4AAAAJ;https://scholar.google.com.sg/citations?user=uQxdOlsAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Zhiqiang_Hu3;~Lei_Wang28;~Yihuai_Lan1;~Wanyu_Xu1;~Ee-Peng_Lim1;~Xing_Xu3;~Soujanya_Poria1;~Roy_Ka-Wei_Lee1;~Lidong_Bing3", "aff": "Singapore University of Technology and Design;Singapore Management University;;Xihua University;Singapore Management University;University of Electronic Science and Technology of China;Singapore University of Technology and Design;Singapore University of Technology and Design;Alibaba Group", "aff_domain": "sutd.edu.sg;smu.edu.sg;;xhu.edu.cn;smu.edu.sg;uestc.edu.cn;sutd.edu.sg;sutd.edu.sg;alibaba-inc.com", "position": "PhD student;PhD student;;Undergrad student;Full Professor;Researcher;Associate Professor;Assistant Professor;Scientist", "bibtex": "@inproceedings{\nhu2023llmadapters,\ntitle={{LLM}-Adapters: An Adapter Family for Parameter-Efficient Fine-Tuning of Large Language Models},\nauthor={Zhiqiang Hu and Lei Wang and Yihuai Lan and Wanyu Xu and Ee-Peng Lim and Lidong Bing and Xing Xu and Soujanya Poria and Roy Ka-Wei Lee},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=gdUBK65fwn}\n}", "github": "", "project": "", "reviewers": "Rt96;XzQ1;s54V;FcDg", "site": "https://openreview.net/forum?id=gdUBK65fwn", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "3;4;4;4", "excitement": "4;4;4;4", "reproducibility": "3;3;3;3", "correctness": "4;4;4;4", "rating_avg": 5.0, "confidence_avg": 3.75, "excitement_avg": 4.0, "reproducibility_avg": 3.0, "correctness_avg": 4.0, "replies_avg": 13, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0003-0065-8665;;;0000-0002-1986-7750;", "linkedin": ";;;;;;;;", "aff_unique_index": "0;1;2;1;3;0;0;4", "aff_unique_norm": "Singapore University of Technology and Design;Singapore Management University;Xihua University;University of Electronic Science and Technology of China;Alibaba Group", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.sutd.edu.sg;https://www.smu.edu.sg;http://www.xihua.edu.cn;https://www.uestc.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "SUTD;SMU;;UESTC;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;1;0;0;1", "aff_country_unique": "Singapore;China" }, { "id": "ggTNeg2fem", "title": "Multimodal Automated Fact-Checking: A Survey", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Misinformation is often conveyed in multiple modalities, e.g. a miscaptioned image. Multimodal misinformation is perceived as more credible by humans, and spreads faster than its text-only counterparts. While an increasing body of research investigates automated \nfact-checking (AFC), previous surveys mostly focus on text. In this survey, we conceptualise a framework for AFC including subtasks unique to multimodal misinformation. Furthermore, we discuss related terms used in different communities and map them to our framework. We focus on four modalities prevalent in real-world fact-checking: text, image, audio, and video. We survey benchmarks and models, and discuss limitations and promising directions for future research", "keywords": "fact checking;multimodality;survey", "primary_area": "", "supplementary_material": "", "author": "Mubashara Akhtar;Michael Sejr Schlichtkrull;Zhijiang Guo;Oana Cocarascu;Elena Simperl;Andreas Vlachos", "authorids": "~Mubashara_Akhtar1;~Michael_Sejr_Schlichtkrull1;~Zhijiang_Guo2;~Oana_Cocarascu2;~Elena_Simperl1;~Andreas_Vlachos1", "gender": "F;M;M;;;M", "homepage": "https://www.mubasharaakhtar.com/;http://michschli.github.io/;https://cartus.github.io/;;;http://andreasvlachos.github.io/", "dblp": "324/3336;186/7091;43/6147;185/7576;p/ElenaPaslaruBontasSimperl;18/1071-1", "google_scholar": "x8K6TisAAAAJ;z8YvWyEAAAAJ;8b-u3icAAAAJ;https://scholar.google.co.uk/citations?hl=en;;https://scholar.google.es/citations?user=XjWnyM4AAAAJ", "or_profile": "~Mubashara_Akhtar1;~Michael_Sejr_Schlichtkrull1;~Zhijiang_Guo2;~Oana_Cocarascu2;~Elena_Simperl1;~Andreas_Vlachos1", "aff": "King's College London;University of Cambridge;University of Cambridge;King's College London;King's College London;University of Cambridge", "aff_domain": "kcl.ac.uk;cam.ac.uk;cam.ac.uk;kcl.ac.uk;kcl.ac.uk;cam.ac.uk", "position": "PhD student;Postdoc;Postdoc;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nakhtar2023multimodal,\ntitle={Multimodal Automated Fact-Checking: A Survey},\nauthor={Mubashara Akhtar and Michael Sejr Schlichtkrull and Zhijiang Guo and Oana Cocarascu and Elena Simperl and Andreas Vlachos},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ggTNeg2fem}\n}", "github": "", "project": "", "reviewers": "jMRp;4pKp;p4Yf", "site": "https://openreview.net/forum?id=ggTNeg2fem", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;3;3", "excitement": "4;4;2", "reproducibility": "4;4;0", "correctness": "3;2;2", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 2.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0003-2123-5071", "linkedin": ";;;;;andreas-vlachos-70ab391", "aff_unique_index": "0;1;1;0;0;1", "aff_unique_norm": "King's College London;University of Cambridge", "aff_unique_dep": ";", "aff_unique_url": "https://www.kcl.ac.uk;https://www.cam.ac.uk", "aff_unique_abbr": "KCL;Cambridge", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "ghF1EB6APx", "title": "Cross-Modal Conceptualization in Bottleneck Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Concept Bottleneck Models (CBMs) assume that training examples (e.g., x-ray images) are annotated with high-level concepts (e.g., types of abnormalities), and perform classification by first predicting the concepts, followed by predicting the label relying on these concepts. However, the primary challenge in employing CBMs lies in the requirement of defining concepts predictive of the label and annotating training examples with these concepts. In our approach, we adopt a more moderate assumption and instead use text descriptions (e.g., radiology reports), accompanying the images, to guide the induction of concepts. Our crossmodal approach treats concepts as discrete latent variables and promotes concepts that (1) are predictive of the label, and (2) can be predicted reliably from both the image and text. Through experiments conducted on datasets ranging from synthetic datasets (e.g., synthetic images with generated descriptions) to realistic medical imaging datasets, we demonstrate that crossmodal learning encourages the induction of interpretable concepts while also facilitating disentanglement.", "keywords": "interpretability;cross-modal learning;concept-based models;cross-attention mechanism;robustness", "primary_area": "", "supplementary_material": "", "author": "Danis Alukaev;Semen Kiselev;Ilya Pershin;Bulat Ibragimov;Vladimir V. Ivanov;Alexey Kornaev;Ivan Titov", "authorids": "~Danis_Alukaev1;~Semen_Kiselev1;~Ilya_Pershin1;~Bulat_Ibragimov3;~Vladimir_V._Ivanov1;~Alexey_Kornaev1;~Ivan_Titov1", "gender": "M;M;;M;M;;", "homepage": ";;;;;;http://ivan-titov.org", "dblp": ";;;;89/4816-1;;08/5391", "google_scholar": ";;;https://scholar.google.ru/citations?hl=ru;16AyxX0AAAAJ;;https://scholar.google.nl/citations?user=FKUc3vsAAAAJ", "or_profile": "~Danis_Alukaev1;~Semen_Kiselev1;~Ilya_Pershin1;~Bulat_Ibragimov3;~Vladimir_V._Ivanov1;~Alexey_Kornaev1;~Ivan_Titov1", "aff": "Innopolis University;Innopolis University;;University of Copenhagen;Innopolis University;;University of Amsterdam", "aff_domain": "innopolis.ru;innopolis.ru;;ku.dk;innopolis.ru;;uva.nl", "position": "Undergrad student;Researcher;;Associate Professor;Associate Professor;;Associate Professor", "bibtex": "@inproceedings{\nalukaev2023crossmodal,\ntitle={Cross-Modal Conceptualization in Bottleneck Models},\nauthor={Danis Alukaev and Semen Kiselev and Ilya Pershin and Bulat Ibragimov and Vladimir V. Ivanov and Alexey Kornaev and Ivan Titov},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ghF1EB6APx}\n}", "github": "", "project": "", "reviewers": "RnA7;K3Jd;UqZr", "site": "https://openreview.net/forum?id=ghF1EB6APx", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "4;4;3", "reproducibility": "4;4;3", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0003-3289-8188;;", "linkedin": "danis-alukaev/;semen-kiselev-1640b3184/;;;nomemm;;", "aff_unique_index": "0;0;1;0;2", "aff_unique_norm": "Innopolis University;University of Copenhagen;University of Amsterdam", "aff_unique_dep": ";;", "aff_unique_url": "https://innopolis.ru/en;https://www.ku.dk;https://www.uva.nl", "aff_unique_abbr": "Innopolis;UCPH;UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;2", "aff_country_unique": "Russian Federation;Denmark;Netherlands" }, { "id": "gjrs5oF8TC", "title": "INVITE: a Testbed of Automatically Generated Invalid Questions to Evaluate Large Language Models for Hallucinations", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Recent advancements in Large language models (LLMs) have enabled them to hold free form conversations over multiple turns, but they exhibit a tendency to make unfounded and incorrect statements, commonly known as hallucinations. \nIn particular, LLMs hallucinate frequently when given invalid questions, i.e. ones with incorrect assumptions. \nThe most common approach to evaluate LLMs on hallucinations is to test them on Question Answering (QA) test sets such as TruthfulQA. \nHowever, LLMs are increasingly pretrained on massive text corpora scraped from the Internet, which may inevitably expose these test sets to the model during training, leading eventually to an overestimation of model performances on these test sets. \nIn this work, we present an alternative framework to address this risk and to foster further research towards making LLMs robust against invalid questions.\nWe name our framework INVITE: a testbed of automatically generated INValId questions to evaluaTE large language models for hallucinations. In each instantiation, our framework is set up to create a fresh batch of invalid questions by distorting valid facts in which subjects or objects are replaced by similar entities. We evaluate several state of the art LLMs against a testset generated by our framework and highlight its capacity to trigger hallucinations in these models.", "keywords": "Large Language Models;Hallucinations;Question Answering", "primary_area": "", "supplementary_material": "", "author": "Anil Ramakrishna;Rahul Gupta;Jens Lehmann;Morteza Ziyadi", "authorids": "~Anil_Ramakrishna1;~Rahul_Gupta3;~Jens_Lehmann3;~Morteza_Ziyadi1", "gender": ";M;M;M", "homepage": ";;http://jens-lehmann.org;", "dblp": "135/6428;;71/4882.html;", "google_scholar": "KNu_OpsAAAAJ;1CFrm2YAAAAJ;https://scholar.google.de/citations?user=sEaQ5rgAAAAJ;", "or_profile": "~Anil_Ramakrishna1;~Rahul_Gupta3;~Jens_Lehmann3;~Morteza_Ziyadi1", "aff": "Amazon;Amazon;Fraunhofer IAIS;", "aff_domain": "amazon.com;amazon.com;iais.fraunhofer.de;", "position": "Researcher;Researcher;Lead Scientist;", "bibtex": "@inproceedings{\nramakrishna2023invite,\ntitle={{INVITE}: a Testbed of Automatically Generated Invalid Questions to Evaluate Large Language Models for Hallucinations},\nauthor={Anil Ramakrishna and Rahul Gupta and Jens Lehmann and Morteza Ziyadi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=gjrs5oF8TC}\n}", "github": "", "project": "", "reviewers": "5D6c;obFN;E6J6", "site": "https://openreview.net/forum?id=gjrs5oF8TC", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;4;2", "reproducibility": "5;4;5", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.666666666666667, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-9108-4278;", "linkedin": ";;jenslehmann82/;morteza-ziyadi-a3818ba9/", "aff_unique_index": "0;0;1", "aff_unique_norm": "Amazon;Fraunhofer Institute for Applied Information Technology", "aff_unique_dep": "Amazon.com, Inc.;", "aff_unique_url": "https://www.amazon.com;https://www.iais.fraunhofer.de/", "aff_unique_abbr": "Amazon;Fraunhofer IAIS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Germany" }, { "id": "gkQo3CoPLd", "title": "GeoLM: Empowering Language Models for Geospatially Grounded Language Understanding", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Humans subconsciously engage in geospatial reasoning when reading articles. We recognize place names and their spatial relations in text and mentally associate them with their physical locations on Earth. Although pretrained language models can mimic this cognitive process using linguistic context, they do not utilize valuable geospatial information in large, widely available geographical databases, e.g., OpenStreetMap. This paper introduces GeoLM, a geospatially grounded language model that enhances the understanding of geo-entities in natural language. GeoLM leverages geo-entity mentions as anchors to connect linguistic information in text corpora with geospatial information extracted from geographical databases. GeoLM connects the two types of context through contrastive learning and masked language modeling. It also incorporates a spatial coordinate embedding mechanism to encode distance and direction relations to capture geospatial context. In the experiment, we demonstrate that GeoLM exhibits promising capabilities in supporting toponym recognition, toponym linking, relation extraction, and geo-entity typing, which bridge the gap between natural language processing and geospatial sciences. The code is publicly available at https://github.com/knowledge-computing/geolm.", "keywords": "geospatial grounding;language model", "primary_area": "", "supplementary_material": "", "author": "Zekun Li;Wenxuan Zhou;Yao-Yi Chiang;Muhao Chen", "authorids": "~Zekun_Li6;~Wenxuan_Zhou2;~Yao-Yi_Chiang1;~Muhao_Chen1", "gender": ";M;;M", "homepage": "https://zekun-li.github.io/;https://wzhouad.github.io/;https://yaoyichi.github.io/;https://muhaochen.github.io/", "dblp": ";;39/2145.html;173/2608", "google_scholar": "7oD0aCcAAAAJ;https://scholar.google.com/citations?hl=en;Xf3M93cAAAAJ;k79yEZkAAAAJ", "or_profile": "~Zekun_Li6;~Wenxuan_Zhou2;~Yao-Yi_Chiang1;~Muhao_Chen1", "aff": "University of Minnesota - Twin Cities;University of Southern California;University of Minnesota, Minneapolis;University of Southern California", "aff_domain": "umn.edu;usc.edu;umn.edu;usc.edu", "position": "PhD student;PhD student;Associate Professor;Assistant Research Professor", "bibtex": "@inproceedings{\nli2023geolm,\ntitle={Geo{LM}: Empowering Language Models for Geospatially Grounded Language Understanding},\nauthor={Zekun Li and Wenxuan Zhou and Yao-Yi Chiang and Muhao Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=gkQo3CoPLd}\n}", "github": "", "project": "", "reviewers": "TG2x;HVHc;zGPk", "site": "https://openreview.net/forum?id=gkQo3CoPLd", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;4", "excitement": "3;3;3", "reproducibility": "5;3;3", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-8923-0130;0000-0003-0118-3147", "linkedin": ";;;", "aff_unique_index": "0;1;0;1", "aff_unique_norm": "University of Minnesota;University of Southern California", "aff_unique_dep": ";", "aff_unique_url": "https://www.minnesota.edu;https://www.usc.edu", "aff_unique_abbr": "UMN;USC", "aff_campus_unique_index": "0;1;2;1", "aff_campus_unique": "Twin Cities;Los Angeles;Minneapolis", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "glxrubmH91", "title": "RAPL: A Relation-Aware Prototype Learning Approach for Few-Shot Document-Level Relation Extraction", "track": "main", "status": "Long Main", "tldr": "", "abstract": "How to identify semantic relations among entities in a document when only a few labeled documents are available? Few-shot document-level relation extraction (FSDLRE) is crucial for addressing the pervasive data scarcity problem in real-world scenarios. Metric-based meta-learning is an effective framework widely adopted for FSDLRE, which constructs class prototypes for classification. However, existing works often struggle to obtain class prototypes with accurate relational semantics: 1) To build prototype for a target relation type, they aggregate the representations of all entity pairs holding that relation, while these entity pairs may also hold other relations, thus disturbing the prototype. 2) They use a set of generic NOTA (none-of-the-above) prototypes across all tasks, neglecting that the NOTA semantics differs in tasks with different target relation types. In this paper, we propose a relation-aware prototype learning method for FSDLRE to strengthen the relational semantics of prototype representations. By judiciously leveraging the relation descriptions and realistic NOTA instances as guidance, our method effectively refines the relation prototypes and generates task-specific NOTA prototypes. Extensive experiments demonstrate that our method outperforms state-of-the-art approaches by average 2.61\\% $F_1$ across various settings of two FSDLRE benchmarks.", "keywords": "Document-Level Relation Extraction;Few-Shot Learning;Metric-Based Meta-Learning;Relation-Aware Prototype Learning", "primary_area": "", "supplementary_material": "", "author": "Shiao Meng;Xuming Hu;Aiwei Liu;Shuang Li;Fukun Ma;Yawen Yang;Lijie Wen", "authorids": "~Shiao_Meng1;~Xuming_Hu1;~Aiwei_Liu1;~Shuang_Li11;~Fukun_Ma1;~Yawen_Yang1;~Lijie_Wen1", "gender": "M;M;M;M;M;M;M", "homepage": "https://github.com/msa30;https://xuminghu.github.io/;https://exlaw.github.io/;;;https://www.thss.tsinghua.edu.cn/en/faculty/lijiewen.htm;https://thulishuang.github.io/", "dblp": "333/0475;262/3664;321/4365;277/5603;273/6340;36/172-1;43/6294-15", "google_scholar": "https://scholar.google.com.hk/citations?user=2rd5iDIAAAAJ;dbBKbXoAAAAJ;UCOOmcEAAAAJ;;;https://scholar.google.com.tw/citations?user=f3C0jUIAAAAJ;LSTOX04AAAAJ", "or_profile": "~Shiao_Meng1;~Xuming_Hu1;~Aiwei_Liu1;~Fukun_Ma1;~Yawen_Yang1;~Lijie_Wen1;~Shu'ang_Li1", "aff": "Tsinghua University;Tsinghua University;Chinese University of Hong Kong;Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;cuhk.hk;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;Visiting Scholar;PhD student;PhD student;Associate Professor;PhD student", "bibtex": "@inproceedings{\nmeng2023rapl,\ntitle={{RAPL}: A Relation-Aware Prototype Learning Approach for Few-Shot Document-Level Relation Extraction},\nauthor={Shiao Meng and Xuming Hu and Aiwei Liu and Shuang Li and Fukun Ma and Yawen Yang and Lijie Wen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=glxrubmH91}\n}", "github": "", "project": "", "reviewers": "e9xn;rUUw;cDEF", "site": "https://openreview.net/forum?id=glxrubmH91", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;2", "excitement": "3;4;4", "reproducibility": "3;4;3", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-6075-4224;;;;0000-0003-0358-3160;0000-0003-0794-8091", "linkedin": ";;%E7%91%B7%E7%8E%AE-%E5%88%98-0722731a6/;;;;", "aff_unique_index": "0;0;1;0;0;0;0", "aff_unique_norm": "Tsinghua University;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.cuhk.edu.hk", "aff_unique_abbr": "THU;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "gmVEVn0Qi5", "title": "InterroLang: Exploring NLP Models and Datasets through Dialogue-based Explanations", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "While recently developed NLP explainability methods let us open the black box in various ways (Madsen et al., 2022), a missing ingredient in this endeavor is an interactive tool offering a conversational interface. Such a dialogue system can help users explore datasets and models with explanations in a contextualized manner, e.g. via clarification or follow-up questions, and through a natural language interface. We adapt the conversational explanation framework TalkToModel (Slack et al., 2022) to the NLP domain, add new NLP-specific operations such as free-text rationalization, and illustrate its generalizability on three NLP tasks (dialogue act classification, question answering, hate speech detection). To recognize user queries for explanations, we evaluate fine-tuned and few-shot prompting models and implement a novel adapter-based approach. We then conduct two user studies on (1) the perceived correctness and helpfulness of the dialogues, and (2) the simulatability, i.e. how objectively helpful dialogical explanations are for humans in figuring out the model's predicted label when it's not shown. We found rationalization and feature attribution were helpful in explaining the model behavior. Moreover, users could more reliably predict the model outcome based on an explanation dialogue rather than one-off explanations.", "keywords": "explainability;dialogue;interpretability;dataset analysis;conversational ai;simulatability", "primary_area": "", "supplementary_material": "", "author": "Nils Feldhus;Qianli Wang;Tatiana Anikina;Sahil Chopra;Cennet Oguz;Sebastian M\u00f6ller", "authorids": "~Nils_Feldhus1;~Qianli_Wang1;~Tatiana_Anikina1;~Sahil_Chopra1;~Cennet_Oguz1;~Sebastian_M\u00f6ller1", "gender": "M;M;;M;F;M", "homepage": "https://nfelnlp.github.io/;https://qiaw99.github.io;https://www.dfki.de/en/web/about-us/employee/person/taan01;https://schopra6.github.io/;;", "dblp": "263/2666;217/4934;https://dblp.uni-trier.de/pid/337/1856;;;37/5849", "google_scholar": "nM50iv8AAAAJ;dKmUzp4AAAAJ;;;xMnsNJoAAAAJ;", "or_profile": "~Nils_Feldhus1;~Qianli_Wang1;~Tatiana_Anikina1;~Sahil_Chopra1;~Cennet_Oguz1;~Sebastian_M\u00f6ller1", "aff": "German Research Center for AI;Technische Universit\u00e4t Berlin;German Research Center for AI;Universit\u00e4t des Saarlandes;German Research Center for AI;Technische Universit\u00e4t Berlin", "aff_domain": "dfki.de;tu-berlin.de;dfki.de;uni-saarland.de;dfki.de;tu-berlin.de", "position": "PhD student;MS student;PhD student;MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nfeldhus2023interrolang,\ntitle={InterroLang: Exploring {NLP} Models and Datasets through Dialogue-based Explanations},\nauthor={Nils Feldhus and Qianli Wang and Tatiana Anikina and Sahil Chopra and Cennet Oguz and Sebastian M{\\\"o}ller},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=gmVEVn0Qi5}\n}", "github": "", "project": "", "reviewers": "VaJL;wK4Z;4uXs", "site": "https://openreview.net/forum?id=gmVEVn0Qi5", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;3;4", "excitement": "3;4;4", "reproducibility": "5;4;4", "correctness": "3;4;4", "rating_avg": 2.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0008-7408-7483;;;;0000-0002-6817-4133;", "linkedin": ";qianliwang/;;;;", "aff_unique_index": "0;1;0;2;0;1", "aff_unique_norm": "German Research Center for Artificial Intelligence;Technische Universit\u00e4t Berlin;Universit\u00e4t des Saarlandes", "aff_unique_dep": ";;", "aff_unique_url": "https://www.dfki.de/;https://www.tu-berlin.de;https://www.uni-saarland.de", "aff_unique_abbr": "DFKI;TU Berlin;UDS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "Germany" }, { "id": "goH9e5Vd44", "title": "Licon: A Diverse, Controllable and Challenging Linguistic Concept Learning Benchmark", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Concept Learning requires learning the definition of a general category from given training examples. Most of the existing methods focus on learning concepts from images. However, the visual information cannot present abstract concepts exactly, which struggles the introduction of novel concepts related to known concepts (e.g., \u2018Plant\u2019\u2192\u2018Asteroids\u2019). In this paper, inspired by the fact that humans learn most concepts through linguistic description, we introduce Linguistic Concept Learning benchmark (Licon), where concepts in diverse forms (e.g., plain attributes, images, and text) are defined by linguistic descriptions. The difficulty to learn novel concepts can be controlled by the number of attributes or the hierarchical relationships between concepts. The diverse and controllable concepts are used to support challenging evaluation tasks, including concept classification, attribute prediction, and concept relationship recognition. In addition, we design an entailment-based concept learning method (EnC) to model the relationship among concepts. Extensive experiments demonstrate the effectiveness of EnC. The benchmark will be released to the public soon.", "keywords": "Concept Learning;Zero-shot Learning;Linguistic Description", "primary_area": "", "supplementary_material": "", "author": "Shenglong Yu;Ying Zhang;wenya guo;Zhengkun Zhang;Ru Zhou;Xiaojie Yuan", "authorids": "~Shenglong_Yu2;~Ying_Zhang7;~wenya_guo1;~Zhengkun_Zhang1;~Ru_Zhou1;~Xiaojie_Yuan1", "gender": "M;F;F;M;F;", "homepage": "https://dbis.nankai.edu.cn;https://dbis.nankai.edu.cn/2023/0322/c12139a506904/page.htm;https://dbis.nankai.edu.cn/2023/0322/c12139a506909/page.htm;;https://github.com/SilyRab;https://dbis.nankai.edu.cn/2023/0322/c12139a506919/page.htm", "dblp": ";13/6769-15;234/4615;218/0107;74/10618;79/2280", "google_scholar": ";;;;eDjQ4O0AAAAJ;", "or_profile": "~Shenglong_Yu2;~Ying_Zhang7;~wenya_guo1;~Zhengkun_Zhang1;~Ru_Zhou1;~Xiaojie_Yuan1", "aff": "Nankai University;Nankai University;Nankai University;Baidu;Nankai University;Nankai University", "aff_domain": "nankai.edu.cn;nankai.edu.cn;nankai.edu.cn;baidu.com;nankai.edu.cn;nankai.edu.cn", "position": "MS student;Full Professor;Lecturer;Researcher;MS student;Full Professor", "bibtex": "@inproceedings{\nyu2023licon,\ntitle={Licon: A Diverse, Controllable and Challenging Linguistic Concept Learning Benchmark},\nauthor={Shenglong Yu and Ying Zhang and wenya guo and Zhengkun Zhang and Ru Zhou and Xiaojie Yuan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=goH9e5Vd44}\n}", "github": "", "project": "", "reviewers": "rgen;y4Hw;1ug8", "site": "https://openreview.net/forum?id=goH9e5Vd44", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "4;3;3", "reproducibility": "4;1;3", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-4906-5828;0000-0001-5609-194X;;0009-0001-4525-1992;0000-0002-5876-6856", "linkedin": ";;;;;", "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Nankai University;Baidu", "aff_unique_dep": ";Baidu, Inc.", "aff_unique_url": "http://www.nankai.edu.cn;https://www.baidu.com", "aff_unique_abbr": "NKU;Baidu", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "gqkg54QNDY", "title": "ViSoBERT: A Pre-Trained Language Model for Vietnamese Social Media Text Processing", "track": "main", "status": "Long Main", "tldr": "", "abstract": "English and Chinese, known as resource-rich languages, have witnessed the strong development of transformer-based language models for natural language processing tasks. Although Vietnam has approximately 100M people speaking Vietnamese, several pre-trained models, e.g., PhoBERT, ViBERT, and vELECTRA, performed well on general Vietnamese NLP tasks, including POS tagging and named entity recognition. These pre-trained language models are still limited to Vietnamese social media tasks. In this paper, we present the first monolingual pre-trained language model for Vietnamese social media texts, ViSoBERT, which is pre-trained on a large-scale corpus of high-quality and diverse Vietnamese social media texts using XLM-R architecture. Moreover, we explored our pre-trained model on five important natural language downstream tasks on Vietnamese social media texts: emotion recognition, hate speech detection, sentiment analysis, spam reviews detection, and hate speech spans detection. Our experiments demonstrate that ViSoBERT, with far fewer parameters, surpasses the previous state-of-the-art models on multiple Vietnamese social media tasks. Our ViSoBERT model is available only for research purposes. Disclaimer: This paper contains actual comments on social networks that might be construed as abusive, offensive, or obscene.", "keywords": "Language Models;Social Media Processing;Low resource", "primary_area": "", "supplementary_material": "", "author": "Nam Quoc Nguyen;Thang Chau Phan;Duc-Vu Nguyen;Kiet Van Nguyen", "authorids": "~Nam_Quoc_Nguyen1;~Thang_Chau_Phan1;~Duc-Vu_Nguyen1;~Kiet_Van_Nguyen1", "gender": "M;M;M;M", "homepage": ";;;https://sites.google.com/uit.edu.vn/kietnv", "dblp": ";338/8674;242/4583;174/4526", "google_scholar": "Lwy-MJIAAAAJ;;ThWzZL4AAAAJ;https://scholar.google.com.vn/citations?user=v3RSwOkAAAAJ", "or_profile": "~Nam_Quoc_Nguyen1;~Thang_Chau_Phan1;~Duc-Vu_Nguyen1;~Kiet_Van_Nguyen1", "aff": "University of Information Technology;University of Information Technology;VNUHCM - University of Information Technology;University of Information Technology, VNU-HCM", "aff_domain": "uit.edu.vn;uit.edu.vn;uit.edu.vn;uit.edu.vn", "position": "Undergrad student;Undergrad student;Researcher;PhD student", "bibtex": "@inproceedings{\nnguyen2023visobert,\ntitle={ViSo{BERT}: A Pre-Trained Language Model for Vietnamese Social Media Text Processing},\nauthor={Nam Quoc Nguyen and Thang Chau Phan and Duc-Vu Nguyen and Kiet Van Nguyen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=gqkg54QNDY}\n}", "github": "", "project": "", "reviewers": "117r;mcGf;iNVu", "site": "https://openreview.net/forum?id=gqkg54QNDY", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "5;3;4", "reproducibility": "4;2;2", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0009-1468-7740;0009-0004-0844-6151;;0000-0002-8456-2742", "linkedin": ";;;kiet-nguyen-14907215a/", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Information Technology", "aff_unique_dep": "", "aff_unique_url": "", "aff_unique_abbr": "", "aff_campus_unique_index": "1", "aff_campus_unique": ";HCM", "aff_country_unique_index": "1;1", "aff_country_unique": ";Vietnam" }, { "id": "gslZifaE3t", "title": "How to Determine the Most Powerful Pre-trained Language Model without Brute Force Fine-tuning? An Empirical Survey", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Transferability estimation has been attached to great attention in the computer vision fields. Researchers try to estimate with low computational cost the performance of a model when transferred from a source task to a given target task. \nConsidering the effectiveness of such estimations, the communities of natural language processing also began to study similar problems for the selection of pre-trained language models. \nHowever, there is a lack of a comprehensive comparison between these estimation methods yet.\nAlso, the differences between vision and language scenarios make it doubtful whether previous conclusions can be established across fields. \nIn this paper, we first conduct a thorough survey of existing transferability estimation methods being able to find the most suitable model, \nthen we conduct a detailed empirical study for the surveyed methods based on the GLUE benchmark.\nFrom qualitative and quantitative analyses, we demonstrate the strengths and weaknesses of existing methods and show that H-Score generally performs well with superiorities in effectiveness and efficiency.\nWe also outline the difficulties of consideration of training details, applicability to text generation, and consistency to certain metrics which shed light on future directions.", "keywords": "Pre-trained Language Models;Transfer Learning;Transferability Estimation;Model Selection", "primary_area": "", "supplementary_material": "", "author": "Jun Bai;Xiaofeng Zhang;Chen Li;Hanhua Hong;Xi Xu;Chenghua Lin;Wenge Rong", "authorids": "~Jun_Bai1;~Xiaofeng_Zhang2;~Chen_Li36;~Hanhua_Hong1;~Xi_Xu1;~Chenghua_Lin1;~Wenge_Rong1", "gender": "M;;M;M;M;;M", "homepage": ";;;;https://orcid.org/0000-0002-5269-0651;;", "dblp": ";;;;;;18/5572.html", "google_scholar": "https://scholar.google.com.hk/citations?user=D4WEfiEAAAAJ;;;;;;", "or_profile": "~Jun_Bai1;~Xiaofeng_Zhang2;~Chen_Li36;~Hanhua_Hong1;~Xi_Xu1;~Chenghua_Lin1;~Wenge_Rong1", "aff": "Beihang University;;Beihang University;Beihang University;School of Software Engineering;;Beihang University", "aff_domain": "buaa.edu.cn;;buaa.edu.cn;buaa.edu.cn;bjut.edu;;buaa.edu.cn", "position": "PhD student;;PhD student;Undergrad student;Lecturer;;Full Professor", "bibtex": "@inproceedings{\nbai2023how,\ntitle={How to Determine the Most Powerful Pre-trained Language Model without Brute Force Fine-tuning? An Empirical Survey},\nauthor={Jun Bai and Xiaofeng Zhang and Chen Li and Hanhua Hong and Xi Xu and Chenghua Lin and Wenge Rong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=gslZifaE3t}\n}", "github": "", "project": "", "reviewers": "35ZS;oqBj;JTER", "site": "https://openreview.net/forum?id=gslZifaE3t", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "4;2;2", "reproducibility": "4;4;4", "correctness": "3;2;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-5500-3976;;0000-0002-7508-7222;0009-0005-2106-5641;0000-0002-5269-0651;;", "linkedin": ";;;%E7%BF%B0%E5%8D%8E-%E6%B4%AA-60163727b/;;;", "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Beihang University;School of Software Engineering", "aff_unique_dep": ";Software Engineering", "aff_unique_url": "http://www.buaa.edu.cn/;", "aff_unique_abbr": "BUAA;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China;" }, { "id": "gybvlVXT6z", "title": "Black-Box Tuning of Vision-Language Models with Effective Gradient Approximation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Parameter-efficient fine-tuning (PEFT) methods have provided an effective way for adapting large vision-language models to specific tasks or scenarios. Typically, they learn a very small scale of parameters for pre-trained models in a white-box formulation, which assumes model architectures to be known and parameters to be accessible. However, large models are often not open-source due to considerations of preventing abuse or commercial factors, hence posing a barrier to the deployment of white-box PEFT methods. To alleviate the dependence on model accessibility, we introduce collaborative black-box tuning (CBBT) for both textual prompt optimization and output feature adaptation for black-box models. Specifically, considering that the backpropagation gradients are blocked, we approximate the gradients of textual prompts by analyzing the predictions with perturbed prompts. Secondly, a lightweight adapter is deployed over the output feature of the inaccessible model, further facilitating the model adaptation process. Empowered with these designs, our CBBT is extensively evaluated on eleven downstream benchmarks and achieves remarkable improvements compared to existing black-box VL adaptation methods. Our code will be made publicly available.", "keywords": "Prompt Tuning;Black-box Model;Vision-language Model", "primary_area": "", "supplementary_material": "", "author": "Zixian Guo;Yuxiang Wei;Ming Liu;Zhilong Ji;Jinfeng Bai;Yiwen Guo;Wangmeng Zuo", "authorids": "~Zixian_Guo1;~Yuxiang_Wei1;~Ming_Liu10;~Zhilong_Ji1;~Jinfeng_Bai1;~Yiwen_Guo1;~Wangmeng_Zuo3", "gender": "M;M;M;M;M;;M", "homepage": "https://github.com/guozix;;;;;;", "dblp": "247/3282;47/8871-1;20/2039-18;263/6772.html;120/7270.html;;93/2671", "google_scholar": ";hORhL7YAAAAJ;7PMGvggAAAAJ;;;;rUOpCEYAAAAJ", "or_profile": "~Zixian_Guo1;~Yuxiang_Wei1;~Ming_Liu10;~Zhilong_Ji1;~Jinfeng_Bai1;~Yiwen_Guo1;~Wangmeng_Zuo3", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;Tomorrow Advancing Life;TAL;;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;hit.edu.cn;hit.edu.cn;tal.com;tal.com;;hit.edu.cn", "position": "MS student;PhD student;PhD student;Researcher;Researcher;;Full Professor", "bibtex": "@inproceedings{\nguo2023blackbox,\ntitle={Black-Box Tuning of Vision-Language Models with Effective Gradient Approximation},\nauthor={Zixian Guo and Yuxiang Wei and Ming Liu and Zhilong Ji and Jinfeng Bai and Yiwen Guo and Wangmeng Zuo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=gybvlVXT6z}\n}", "github": "", "project": "", "reviewers": "ztat;ya24;QPix", "site": "https://openreview.net/forum?id=gybvlVXT6z", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "3;4;3", "reproducibility": "4;4;5", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-8993-7195;0000-0001-9136-8481;;;;0000-0002-3330-783X", "linkedin": ";;;;;;", "aff_unique_index": "0;0;0;1;2;0", "aff_unique_norm": "Harbin Institute of Technology;Tomorrow Advancing Life;TAL", "aff_unique_dep": ";;", "aff_unique_url": "http://www.hit.edu.cn/;;", "aff_unique_abbr": "HIT;;", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Harbin;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China;" }, { "id": "gzRBs4gIbz", "title": "Non-autoregressive Streaming Transformer for Simultaneous Translation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Simultaneous machine translation (SiMT) models are trained to strike a balance between latency and translation quality. However, training these models to achieve high quality while maintaining low latency often leads to a tendency for aggressive anticipation. We argue that such issue stems from the autoregressive architecture upon which most existing SiMT models are built. To address those issues, we propose non-autoregressive streaming Transformer (NAST) which comprises a unidirectional encoder and a non-autoregressive decoder with intra-chunk parallelism. We enable NAST to generate the blank token or repetitive tokens to adjust its READ/WRITE strategy flexibly, and train it to maximize the non-monotonic latent alignment with an alignment-based latency loss. Experiments on various SiMT benchmarks demonstrate that NAST outperforms previous strong autoregressive SiMT baselines.", "keywords": "simultaneous translation;non-autoregressive generation", "primary_area": "", "supplementary_material": "", "author": "Zhengrui Ma;Shaolei Zhang;Shoutao Guo;Chenze Shao;Min Zhang;Yang Feng", "authorids": "~Zhengrui_Ma1;~Shaolei_Zhang1;~Shoutao_Guo1;~Chenze_Shao1;~Min_Zhang9;~Yang_Feng4", "gender": "M;M;M;M;M;", "homepage": "http://nlp.ict.ac.cn/~mazhengrui;https://zhangshaolei1998.github.io/;;;https://zhangmin-nlp-ai.github.io/;http://people.ucas.edu.cn/~yangfeng?language=en", "dblp": "276/3133;;331/5767;227/3123;83/5342-5;07/6095-4.html", "google_scholar": "dUgq6tEAAAAJ;https://scholar.google.com.hk/citations?user=gWwAWo4AAAAJ;;LH_rZf8AAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en", "or_profile": "~Zhengrui_Ma1;~Shaolei_Zhang1;~Shoutao_Guo1;~Chenze_Shao1;~Min_Zhang9;~Yang_Feng4", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;Key Laboratory of Intelligent Information Processing Institute of Computing Technology, Chinese Academy of Sciences;Institute of computing technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Harbin Institute of Technology, Shenzhen;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;hit.edu.cn;ict.ac.cn", "position": "PhD student;PhD student;PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nma2023nonautoregressive,\ntitle={Non-autoregressive Streaming Transformer for Simultaneous Translation},\nauthor={Zhengrui Ma and Shaolei Zhang and Shoutao Guo and Chenze Shao and Min Zhang and Yang Feng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=gzRBs4gIbz}\n}", "github": "", "project": "", "reviewers": "JCCF;yohv;N9PX", "site": "https://openreview.net/forum?id=gzRBs4gIbz", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;5;2", "excitement": "4;4;3", "reproducibility": "3;4;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-7254-9380;;;;", "linkedin": ";;;;;", "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Chinese Academy of Sciences;Harbin Institute of Technology", "aff_unique_dep": "Institute of Computing Technology;", "aff_unique_url": "http://www.ict.ac.cn;http://en.hhit.edu.cn/", "aff_unique_abbr": "CAS;HIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "h00GHjWDEp", "title": "LINC: A Neurosymbolic Approach for Logical Reasoning by Combining Language Models with First-Order Logic Provers", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Logical reasoning, i.e., deductively inferring the truth value of a conclusion from a set of premises, is an important task for artificial intelligence with wide potential impacts on science, mathematics, and society. While many prompting-based strategies have been proposed to enable Large Language Models (LLMs) to do such reasoning more effectively, they still appear unsatisfactory, often failing in subtle and unpredictable ways. In this work, we investigate the validity of instead reformulating such tasks as modular neurosymbolic programming, which we call LINC: Logical Inference via Neurosymbolic Computation. In LINC, the LLM acts as a semantic parser, translating premises and conclusions from natural language to expressions in first-order logic. These expressions are then offloaded to an external theorem prover, which symbolically performs deductive inference. Leveraging this approach, we observe significant performance gains on FOLIO and a balanced subset of ProofWriter for three different models in nearly all experimental conditions we evaluate. On ProofWriter, augmenting the comparatively small open-source StarCoder+ (15.5B parameters) with LINC even outperforms GPT-3.5 and GPT-4 with Chain-of-Thought (CoT) prompting by an absolute 38% and 10%, respectively. When used with GPT-4, LINC scores 26% higher than CoT on ProofWriter while performing comparatively on FOLIO. Further analysis reveals that although both methods on average succeed roughly equally often on this dataset, they exhibit distinct and complementary failure modes. We thus provide promising evidence for how logical reasoning over natural language can be tackled through jointly leveraging LLMs alongside symbolic provers. All corresponding code is publicly available.", "keywords": "large language models;logical reasoning;neuro-symbolic AI", "primary_area": "", "supplementary_material": "", "author": "Theo X. Olausson;Alex Gu;Ben Lipkin;Cedegao E. Zhang;Armando Solar-Lezama;Joshua B. Tenenbaum;Roger P. Levy", "authorids": "~Theo_X._Olausson1;~Alex_Gu1;~Ben_Lipkin1;~Cedegao_E._Zhang1;~Armando_Solar-Lezama1;~Joshua_B._Tenenbaum1;~Roger_P._Levy1", "gender": "M;M;M;M;;M;M", "homepage": "https://minimario.github.io/;https://benlipkin.github.io/;https://cedzhang.com;https://people.csail.mit.edu/asolar/;;http://www.mit.edu/~rplevy;https://people.csail.mit.edu/theoxo/", "dblp": "285/4734;346/0247;245/7546.html;95/6919;t/JoshuaBTenenbaum;23/90;334/7669", "google_scholar": "jRQtBp0AAAAJ;zN6vxGUAAAAJ;b6arhCEAAAAJ;https://scholar.google.com.tw/citations?user=8BX3BokAAAAJ;;i86O0SAAAAAJ;e7K3ZagAAAAJ", "or_profile": "~Alex_Gu1;~Ben_Lipkin1;~Cedegao_E._Zhang1;~Armando_Solar-Lezama1;~Joshua_B._Tenenbaum1;~Roger_Levy1;~Theo_X_Olausson1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu;mit.edu;mit.edu;mit.edu;mit.edu;mit.edu", "position": "PhD student;PhD student;PhD student;Full Professor;Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nolausson2023linc,\ntitle={{LINC}: A Neurosymbolic Approach for Logical Reasoning by Combining Language Models with First-Order Logic Provers},\nauthor={Theo X. Olausson and Alex Gu and Ben Lipkin and Cedegao E. Zhang and Armando Solar-Lezama and Joshua B. Tenenbaum and Roger P. Levy},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=h00GHjWDEp}\n}", "github": "", "project": "", "reviewers": "HxCp;iSNf;1NfW", "site": "https://openreview.net/forum?id=h00GHjWDEp", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;3;4", "reproducibility": "3;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-7465-5315;;;;0000-0002-4493-8864;0000-0001-6653-2227", "linkedin": "alex-gu-8b7664175/;;;;;roger-levy-502a6011/;", "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "h1YhUpPKEq", "title": "Non-Programmers Can Label Programs Indirectly via Active Examples: A Case Study with Text-to-SQL", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Can non-programmers annotate natural language utterances with complex programs that represent their meaning? We introduce APEL, a framework in which non-programmers select among candidate programs generated by a seed semantic parser (e.g., Codex). Since they cannot understand the candidate programs, we ask them to select indirectly by examining the programs' input-ouput examples. For each utterance, APEL actively searches for a simple input on which the candidate programs tend to produce different outputs. It then asks the non-programmers only to choose the appropriate output, thus allowing us to infer which program is correct and could be used to fine-tune the parser. As a first case study, we recruited human non-programmers to use APEL to re-annotate SPIDER, a text-to-SQL dataset. Our approach achieved the same annotation accuracy as the original expert annotators (75%) and exposed many subtle errors in the original annotations.", "keywords": "Semantic Parsing; Annotation; Code generation", "primary_area": "", "supplementary_material": "", "author": "Ruiqi Zhong;Charlie Victor Snell;Dan Klein;Jason Eisner", "authorids": "~Ruiqi_Zhong1;~Charlie_Victor_Snell1;~Dan_Klein1;~Jason_Eisner1", "gender": "M;M;;M", "homepage": "https://ruiqi-zhong.github.io;https://sea-snell.github.io;http://people.eecs.berkeley.edu/~klein/;http://cs.jhu.edu/~jason", "dblp": "222/3024;;;37/3263", "google_scholar": "GskOShAAAAAJ;dD7EpwQAAAAJ;;tjb2UccAAAAJ", "or_profile": "~Ruiqi_Zhong1;~Charlie_Victor_Snell1;~Dan_Klein1;~Jason_Eisner1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley;Microsoft", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu;microsoft.com", "position": "PhD student;PhD student;Full Professor;Director of Research ", "bibtex": "@inproceedings{\nzhong2023nonprogrammers,\ntitle={Non-Programmers Can Label Programs Indirectly via Active Examples: A Case Study with Text-to-{SQL}},\nauthor={Ruiqi Zhong and Charlie Victor Snell and Dan Klein and Jason Eisner},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=h1YhUpPKEq}\n}", "github": "", "project": "", "reviewers": "uyi9;EWXL;Vovf", "site": "https://openreview.net/forum?id=h1YhUpPKEq", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;4;5", "reproducibility": "5;5;3", "correctness": "5;5;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.333333333333333, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.666666666666667, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-8861-0772", "linkedin": ";;dan-klein/;", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of California, Berkeley;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.berkeley.edu;https://www.microsoft.com", "aff_unique_abbr": "UC Berkeley;Microsoft", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "h1nUUpmvpf", "title": "Cross-Cultural Analysis of Human Values, Morals, and Biases in Folk Tales", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Folk tales are strong cultural and social influences in children's lives, and they are known to teach morals and values. However, existing studies on folk tales are largely limited to European tales. In our study, we compile a large corpus of over 1,900 tales originating from 27 diverse cultures across six continents. Using a range of lexicons and correlation analyses, we examine how human values, morals, and gender biases are expressed in folk tales across cultures. We discover differences between cultures in prevalent values and morals, as well as cross-cultural trends in problematic gender biases. Furthermore, we find trends of reduced value expression when examining public-domain fiction stories, extrinsically validate our analyses against the multicultural Schwartz Survey of Cultural Values and the Global Gender Gap Report, and find traditional gender biases associated with values, morals, and agency. This large-scale cross-cultural study of folk tales paves the way towards future studies on how literature influences and reflects cultural norms.", "keywords": "values;morality;bias;folk tales", "primary_area": "", "supplementary_material": "", "author": "Winston Wu;Lu Wang;Rada Mihalcea", "authorids": "~Winston_Wu1;~Lu_Wang9;~Rada_Mihalcea1", "gender": ";F;F", "homepage": ";https://web.eecs.umich.edu/~wangluxy/;https://web.eecs.umich.edu/~mihalcea/", "dblp": ";49/3800-8;m/RadaMihalcea", "google_scholar": ";uczqEdUAAAAJ;https://scholar.google.com.tw/citations?user=UetM7FgAAAAJ", "or_profile": "~Winston_Wu1;~Lu_Wang9;~Rada_Mihalcea1", "aff": ";University of Michigan;University of Michigan", "aff_domain": ";umich.edu;umich.edu", "position": ";Associate Professor;Full Professor", "bibtex": "@inproceedings{\nwu2023crosscultural,\ntitle={Cross-Cultural Analysis of Human Values, Morals, and Biases in Folk Tales},\nauthor={Winston Wu and Lu Wang and Rada Mihalcea},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=h1nUUpmvpf}\n}", "github": "", "project": "", "reviewers": "Vntz;irgT;bzwS", "site": "https://openreview.net/forum?id=h1nUUpmvpf", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;5", "excitement": "4;3;4", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-0767-6703", "linkedin": ";;", "aff_unique_index": "0;0", "aff_unique_norm": "University of Michigan", "aff_unique_dep": "", "aff_unique_url": "https://www.umich.edu", "aff_unique_abbr": "UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "h4NNcIZUHT", "title": "DiSTRICT: Dialogue State Tracking with Retriever Driven In-Context Tuning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Dialogue State Tracking (DST), a key component of task-oriented conversation systems, represents user intentions by determining the values of pre-defined slots in an ongoing dialogue. \nExisting approaches use hand-crafted templates and additional slot information to fine-tune and prompt large pre-trained language models and elicit slot values from the dialogue context. Significant manual effort and domain knowledge is required to design effective prompts, limiting the generalizability of these approaches to new domains and tasks. \nIn this work, we propose DiSTRICT, a generalizable in-context tuning approach for DST that retrieves highly relevant training examples for a given dialogue to fine-tune the model without any hand-crafted templates. \nExperiments with the MultiWOZ benchmark datasets show that DiSTRICT outperforms existing approaches in various zero-shot and few-shot settings using a much smaller model, thereby providing an important advantage for real-world deployments that often have limited resource availability.", "keywords": "Dialogue state tracking;in-context tuning;semantic retrieval", "primary_area": "", "supplementary_material": "", "author": "Praveen Venkateswaran;Evelyn Duesterwald;Vatche Isahagian", "authorids": "~Praveen_Venkateswaran1;~Evelyn_Duesterwald1;~Vatche_Isahagian1", "gender": "M;;", "homepage": ";;", "dblp": "177/7837;;28/10038.html", "google_scholar": "jJI7sRgAAAAJ;hlrCF3YAAAAJ;VYN4CfEAAAAJ", "or_profile": "~Praveen_Venkateswaran1;~Evelyn_Duesterwald1;~Vatche_Isahagian1", "aff": "International Business Machines;International Business Machines;International Business Machines", "aff_domain": "ibm.com;ibm.com;ibm.com", "position": "Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nvenkateswaran2023district,\ntitle={Di{STRICT}: Dialogue State Tracking with Retriever Driven In-Context Tuning},\nauthor={Praveen Venkateswaran and Evelyn Duesterwald and Vatche Isahagian},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=h4NNcIZUHT}\n}", "github": "", "project": "", "reviewers": "LNYL;nejq;sWot", "site": "https://openreview.net/forum?id=h4NNcIZUHT", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;5;5", "excitement": "3;4;3", "reproducibility": "4;4;5", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.666666666666667, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "International Business Machines Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.ibm.com", "aff_unique_abbr": "IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "h5gum6ximf", "title": "Don't waste a single annotation: improving single-label classifiers through soft labels", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "In this paper, we address the limitations of the common data annotation and training methods for objective single-label classification tasks. Typically, when annotating such tasks annotators are only asked to provide a single label for each sample and annotator disagreement is discarded when a final hard label is decided through majority voting. We challenge this traditional approach, acknowledging that determining the appropriate label can be difficult due to the ambiguity and lack of context in the data samples. Rather than discarding the information from such ambiguous annotations, our soft label method makes use of them for training. Our findings indicate that additional annotator information, such as confidence, secondary label and disagreement, can be used to effectively generate soft labels. Training classifiers with these soft labels then leads to improved performance and calibration on the hard label test set.", "keywords": "Don't Waste a Single Annotation: Improving Single-Label Classifiers Through Soft Labels", "primary_area": "", "supplementary_material": "", "author": "Ben Peng Wu;Yue Li;Yida Mu;Carolina Scarton;Kalina Bontcheva;Xingyi Song", "authorids": "~Ben_Peng_Wu1;~Yue_Li9;~Yida_Mu1;~Carolina_Scarton1;~Kalina_Bontcheva2;~Xingyi_Song1", "gender": ";;;;F;M", "homepage": ";;;https://carolscarton.github.io;;https://www.sheffield.ac.uk/dcs/people/academic/xingyi-song", "dblp": "168/2621;;;23/8672;https://dblp.uni-trier.de/pid/b/KalinaBontcheva.html;185/5566", "google_scholar": "R7PZv1kAAAAJ;;;e6YOuiQAAAAJ;https://scholar.google.co.uk/citations?user=kUbDCnMAAAAJ;7seaj48AAAAJ", "or_profile": "~Ben_Peng_Wu1;~Yue_Li9;~Yida_Mu1;~Carolina_Scarton1;~Kalina_Bontcheva2;~Xingyi_Song1", "aff": "University of Sheffield;;;University of Sheffield;University of Sheffield;University of Sheffield", "aff_domain": "shef.ac.uk;;;sheffield.ac.uk;shef.ac.uk;sheffield.ac.uk", "position": "PhD student;;;Lecturer;Full Professor;Lecturer", "bibtex": "@inproceedings{\nwu2023dont,\ntitle={Don't waste a single annotation: improving single-label classifiers through soft labels},\nauthor={Ben Peng Wu and Yue Li and Yida Mu and Carolina Scarton and Kalina Bontcheva and Xingyi Song},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=h5gum6ximf}\n}", "github": "", "project": "", "reviewers": "qiyT;MMN4;N6xf", "site": "https://openreview.net/forum?id=h5gum6ximf", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;4;3", "excitement": "3;3;3", "reproducibility": "4;5;3", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0002-0918-526X;;;0000-0002-0103-4072;0000-0001-6152-9600;0000-0002-4188-6974", "linkedin": ";;;carolina-scarton/;;xingyi-song-230257b4/", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Sheffield", "aff_unique_dep": "", "aff_unique_url": "https://www.sheffield.ac.uk", "aff_unique_abbr": "Sheffield", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "h96N32OkAx", "title": "CoLT5: Faster Long-Range Transformers with Conditional Computation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Many natural language processing tasks benefit from long inputs, but processing long documents with Transformers is expensive -- not only due to quadratic attention complexity but also from applying feedforward and projection layers to every token. However, not all tokens are equally important, especially for longer documents. We propose CoLT5, a long-input Transformer model that builds on this intuition by employing conditional computation, devoting more resources to important tokens in both feedforward and attention layers. We show that CoLT5 achieves stronger performance than LongT5 with much faster training and inference, achieving SOTA on the long-input SCROLLS benchmark. Moreover, CoLT5 can effectively and tractably make use of extremely long inputs, showing strong gains up to 64k input length.", "keywords": "long context;conditional computation;efficient nlp", "primary_area": "", "supplementary_material": "", "author": "Joshua Ainslie;Tao Lei;Michiel de Jong;Santiago Ontanon;Siddhartha Brahma;Yury Zemlyanskiy;David Uthus;Mandy Guo;James Lee-Thorp;Yi Tay;Yun-Hsuan Sung;Sumit Sanghai", "authorids": "~Joshua_Ainslie1;~Tao_Lei1;~Michiel_de_Jong1;~Santiago_Ontanon1;~Siddhartha_Brahma1;~Yury_Zemlyanskiy1;~David_Uthus1;~Mandy_Guo2;~James_Lee-Thorp1;~Yi_Tay1;~Yun-Hsuan_Sung1;~Sumit_Sanghai1", "gender": ";M;M;;M;M;;M;M;M;M;F", "homepage": ";;;https://sites.google.com/site/santiagoontanonvillar/;;https://urikz.github.io/;;;http://yitay.net;https://research.google/people/105458/;;", "dblp": "263/3363;;223/0153;https://dblp.org/pers/o/Onta=ntilde==oacute=n:Santiago.html;;225/5302;09/2971.html;;;;;", "google_scholar": ";g2uay50AAAAJ;R7wXId8AAAAJ;aS-DrOwAAAAJ;OZj382cAAAAJ;fkkxyJUAAAAJ;9k31iVQAAAAJ;qsPv098AAAAJ;VBclY_cAAAAJ;JFr53PEAAAAJ;;qOiCKewAAAAJ", "or_profile": "~Joshua_Ainslie1;~Tao_Lei1;~Michiel_de_Jong1;~Santiago_Ontanon1;~Siddhartha_Brahma1;~Yury_Zemlyanskiy1;~David_Uthus1;~James_Lee-Thorp1;~Yi_Tay1;~Yun-Hsuan_Sung1;~Sumit_Sanghai1;~Xiaoyue_Guo1", "aff": "Google;Google;University of Southern California;Drexel University;Research, Google;;Google;Google;Google;Google;Research, Google;", "aff_domain": "google.com;google.com;usc.edu;drexel.edu;research.google.com;;google.com;google.com;google.com;google.com;research.google.com;", "position": "Software Engineer;Research scientist;PhD student;Associate Professor;Researcher;;Software Engineer;Researcher;Research Scientist;Researcher;Researcher;", "bibtex": "@inproceedings{\nainslie2023colt,\ntitle={Co{LT}5: Faster Long-Range Transformers with Conditional Computation},\nauthor={Joshua Ainslie and Tao Lei and Michiel de Jong and Santiago Ontanon and Siddhartha Brahma and Yury Zemlyanskiy and David Uthus and Mandy Guo and James Lee-Thorp and Yi Tay and Yun-Hsuan Sung and Sumit Sanghai},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=h96N32OkAx}\n}", "github": "", "project": "", "reviewers": "gBi6;gfnU;tPfd", "site": "https://openreview.net/forum?id=h96N32OkAx", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;4", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 12, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;0000-0001-6445-7155;;;;", "linkedin": ";;;;sidbrahma;yury-zemlyanskiy/;;;;yhsung/;sumit-sanghai-90961a5/;", "aff_unique_index": "0;0;1;2;0;0;0;0;0;0", "aff_unique_norm": "Google;University of Southern California;Drexel University", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;https://www.usc.edu;https://www.drexel.edu", "aff_unique_abbr": "Google;USC;Drexel", "aff_campus_unique_index": "0;0;1;0;0;0;0;0;0", "aff_campus_unique": "Mountain View;Los Angeles;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "hA8h2KtSv2", "title": "Stop Uploading Test Data in Plain Text: Practical Strategies for Mitigating Data Contamination by Evaluation Benchmarks", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Data contamination has become prevalent and challenging with the rise of models pretrained on large automatically-crawled corpora. \nFor closed models, the training data becomes a trade secret, and even for open models, it is not trivial to detect contamination.\nStrategies such as leaderboards with hidden answers, or using test data which is guaranteed to be unseen, are expensive and become fragile with time. Assuming that all relevant actors value clean test data and will cooperate to mitigate data contamination, what can be done? \nWe propose three strategies that can make a difference: \n(1) Test data made public should be encrypted with a public key and licensed\nto disallow derivative distribution;\n(2) demand training exclusion controls from closed API holders, and protect your test data by refusing to evaluate without them; \n(3) avoid data which appears with its solution on the internet, and release the web-page context of internet-derived data along with the data.\nThese strategies are practical and can be effective in preventing data contamination.", "keywords": "data contamination;contamination;evaluation;test data;benchmarks;closed models;pretraining", "primary_area": "", "supplementary_material": "", "author": "Alon Jacovi;Avi Caciularu;Omer Goldman;Yoav Goldberg", "authorids": "~Alon_Jacovi1;~Avi_Caciularu1;~Omer_Goldman1;~Yoav_Goldberg1", "gender": "M;M;;M", "homepage": "https://alonjacovi.github.io/;http://aviclu.github.io/;;https://www.cs.biu.ac.il/~yogo", "dblp": "218/5900;https://dblp.uni-trier.de/pid/207/8509;;68/5296", "google_scholar": "cX9TtloAAAAJ;https://scholar.google.co.il/citations?user=fPG_0aQAAAAJ;;https://scholar.google.co.il/citations?user=0rskDKgAAAAJ", "or_profile": "~Alon_Jacovi1;~Avi_Caciularu1;~Omer_Goldman1;~Yoav_Goldberg1", "aff": "Bar Ilan University;Google;;Allen Institute for Artificial Intelligence", "aff_domain": "biu.ac.il;google.com;;allenai.org", "position": "PhD student;Researcher;;Principal Researcher", "bibtex": "@inproceedings{\njacovi2023stop,\ntitle={Stop Uploading Test Data in Plain Text: Practical Strategies for Mitigating Data Contamination by Evaluation Benchmarks},\nauthor={Alon Jacovi and Avi Caciularu and Omer Goldman and Yoav Goldberg},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hA8h2KtSv2}\n}", "github": "", "project": "", "reviewers": "Vde9;Hv9v;AUDd", "site": "https://openreview.net/forum?id=hA8h2KtSv2", "pdf_size": 0, "rating": "", "confidence": "4;4;3", "excitement": "3;5;4", "reproducibility": "4;0;0", "correctness": "3;5;4", "rating_avg": 0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 1.3333333333333333, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0, "corr_rating_correctness": 0, "orcid": ";;;", "linkedin": ";avicaciularu/;;", "aff_unique_index": "0;1;2", "aff_unique_norm": "Bar-Ilan University;Google;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.biu.ac.il;https://www.google.com;https://allenai.org", "aff_unique_abbr": "BIU;Google;AI2", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Israel;United States" }, { "id": "hCsppacsqS", "title": "MuG: A Multimodal Classification Benchmark on Game Data with Tabular, Textual, and Visual Fields", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Previous research has demonstrated the advantages of integrating data from multiple sources over traditional unimodal data, leading to the emergence of numerous novel multimodal applications. We propose a multimodal classification benchmark MuG with eight datasets that allows researchers to evaluate and improve their models. These datasets are collected from four various genres of games that cover tabular, textual, and visual modalities. We conduct multi-aspect data analysis to provide insights into the benchmark, including label balance ratios, percentages of missing features, distributions of data within each modality, and the correlations between labels and input modalities. We further present experimental results obtained by several state-of-the-art unimodal classifiers and multimodal classifiers, which demonstrate the challenging and multimodal-dependent properties of the benchmark. MuG is released at https://github.com/lujiaying/MUG-Bench with the data, tutorials, and implemented baselines.", "keywords": "multimodal classification benchmark;resources and evaluation;multimodal graph neural network", "primary_area": "", "supplementary_material": "", "author": "Jiaying Lu;Yongchen Qian;Shifan Zhao;Yuanzhe Xi;Carl Yang", "authorids": "~Jiaying_Lu1;~Yongchen_Qian1;~Shifan_Zhao1;~Yuanzhe_Xi1;~Carl_Yang1", "gender": ";M;M;M;M", "homepage": "https://lujiaying.github.io/;https://qyccc3.github.io/;;http://www.math.emory.edu/~yxi26/;https://cs.emory.edu/~jyang71/", "dblp": "61/9803-1;;244/1502;;305/0254", "google_scholar": "wS9maWYAAAAJ;;;;mOINlwcAAAAJ", "or_profile": "~Jiaying_Lu1;~Yongchen_Qian1;~Shifan_Zhao1;~Yuanzhe_Xi1;~Carl_Yang1", "aff": "Emory University;Emory University;Emory University;;Emory University", "aff_domain": "emory.edu;emory.edu;emory.edu;;emory.edu", "position": "PhD student;Undergrad student;PhD student;;Assistant Professor", "bibtex": "@inproceedings{\nlu2023mug,\ntitle={MuG: A Multimodal Classification Benchmark on Game Data with Tabular, Textual, and Visual Fields},\nauthor={Jiaying Lu and Yongchen Qian and Shifan Zhao and Yuanzhe Xi and Carl Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hCsppacsqS}\n}", "github": "", "project": "", "reviewers": "L86v;SX92;TBbD", "site": "https://openreview.net/forum?id=hCsppacsqS", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "3;3;4", "reproducibility": "4;4;3", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-9052-6951;;;;0000-0001-9145-4531", "linkedin": ";yongchen-qian-578367141;%E4%B8%96%E5%87%A1-%E8%B5%B5-b35742ba/;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Emory University", "aff_unique_dep": "", "aff_unique_url": "https://www.emory.edu", "aff_unique_abbr": "Emory", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "hDzfqmLrol", "title": "DeltaScore: Fine-Grained Story Evaluation with Perturbations", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Numerous evaluation metrics have been developed for natural language generation tasks, but their effectiveness in evaluating stories is limited as they are not specifically tailored to assess intricate aspects of storytelling, such as fluency and interestingness.\nIn this paper, we introduce DeltaScore, a novel methodology that uses perturbation techniques for the evaluation of nuanced story aspects. We posit that the extent to which a story excels in a specific aspect (e.g., fluency) correlates with the magnitude of its susceptibility to particular perturbations (e.g., the introduction of typos). Given this, we measure the quality of an aspect by calculating the likelihood difference between pre- and post-perturbation states using pre-trained language models. We compare DeltaScore with existing metrics on storytelling datasets from two domains in five fine-grained story aspects:\nfluency, coherence, relatedness, logicality, and interestingness. DeltaScore demonstrates strong performance, revealing a surprising finding that one specific perturbation proves highly effective in capturing multiple aspects.\nSource code is available on our GitHub repository.", "keywords": "story;evaluation;metric;PLM", "primary_area": "", "supplementary_material": "", "author": "Zhuohan Xie;Miao Li;Trevor Cohn;Jey Han Lau", "authorids": "~Zhuohan_Xie1;~Miao_Li2;~Trevor_Cohn1;~Jey_Han_Lau2", "gender": "M;M;M;", "homepage": "https://www.linkedin.com/in/zhuohanxie/;https://oaimli.github.io/;https://people.eng.unimelb.edu.au/tcohn/;https://jeyhan.my/", "dblp": "220/7055;;66/4613;32/9014.html", "google_scholar": "W9mk-R4AAAAJ;ySkFXwoAAAAJ;https://scholar.google.com.au/citations?user=FCom398AAAAJ;https://scholar.google.com.au/citations?user=MFi65f4AAAAJ", "or_profile": "~Zhuohan_Xie1;~Miao_Li2;~Trevor_Cohn1;~Jey_Han_Lau2", "aff": "University of Melbourne;The University of Melbourne;The University of Melbourne;The University of Melbourne", "aff_domain": "unimelb.edu;unimelb.edu.au;unimelb.edu.au;unimelb.edu.au", "position": "PhD student;PhD student;Professor;Senior Lecturer", "bibtex": "@inproceedings{\nxie2023deltascore,\ntitle={DeltaScore: Fine-Grained Story Evaluation with Perturbations},\nauthor={Zhuohan Xie and Miao Li and Trevor Cohn and Jey Han Lau},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hDzfqmLrol}\n}", "github": "", "project": "", "reviewers": "cnBq;pCUx;5YuF", "site": "https://openreview.net/forum?id=hDzfqmLrol", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;2", "excitement": "4;4;3", "reproducibility": "3;4;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0008-2650-2857;0000-0002-1669-7063;;0000-0002-1647-4628", "linkedin": "zhuohanxie/;oaimli/;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Melbourne", "aff_unique_dep": "", "aff_unique_url": "https://www.unimelb.edu.au", "aff_unique_abbr": "UniMelb", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Australia" }, { "id": "hEWgNQF1TM", "title": "Parameter-Efficient Language Model Tuning with Active Learning in Low-Resource Settings", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Pre-trained language models (PLMs) have ignited a surge in demand for effective fine-tuning techniques, particularly in low-resource domains and languages. Active learning (AL), a set of algorithms designed to decrease labeling costs by minimizing label complexity, has shown promise in confronting the labeling bottleneck. In parallel, adapter modules designed for parameter-efficient fine-tuning (PEFT) have demonstrated notable potential in low-resource settings. However, the interplay between AL and adapter-based PEFT remains unexplored. We present an empirical study of PEFT behavior with AL in low-resource settings for text classification tasks. Our findings affirm the superiority of PEFT over full-fine tuning (FFT) in low-resource settings and demonstrate that this advantage persists in AL setups.\nWe further examine the properties of PEFT and FFT through the lens of forgetting dynamics and instance-level representations, where we find that PEFT yields more stable representations of early and middle layers compared to FFT. Our research underscores the synergistic potential of AL and PEFT in low-resource settings, paving the way for advancements in efficient and effective fine-tuning.", "keywords": "parameter-efficient fine-tuning;active learning;low-resource settings", "primary_area": "", "supplementary_material": "", "author": "Josip Juki\u0107;Jan Snajder", "authorids": "~Josip_Juki\u01071;~Jan_Snajder1", "gender": ";M", "homepage": ";http://www.zemris.fer.hr/~jan/", "dblp": "333/0711;34/5404", "google_scholar": "0NzyWBoAAAAJ;https://scholar.google.hr/citations?user=7h0lKgIAAAAJ", "or_profile": "~Josip_Juki\u01071;~Jan_Snajder1", "aff": "Faculty of Electrical Engineering and Computing, University of Zagreb;UniZg-FER, University of Zagreb", "aff_domain": "fer.hr;fer.unizg.hr", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\njuki{\\'c}2023parameterefficient,\ntitle={Parameter-Efficient Language Model Tuning with Active Learning in Low-Resource Settings},\nauthor={Josip Juki{\\'c} and Jan Snajder},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hEWgNQF1TM}\n}", "github": "", "project": "", "reviewers": "qrFB;iSq3;bhGY", "site": "https://openreview.net/forum?id=hEWgNQF1TM", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;2;3", "excitement": "3;2;3", "reproducibility": "4;3;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "University of Zagreb", "aff_unique_dep": "Faculty of Electrical Engineering and Computing", "aff_unique_url": "https://www.unizg.hr", "aff_unique_abbr": "UNIZG", "aff_campus_unique_index": "1", "aff_campus_unique": ";Zagreb", "aff_country_unique_index": "0;0", "aff_country_unique": "Croatia" }, { "id": "hEglNMGeqj", "title": "Better Together: Enhancing Generative Knowledge Graph Completion with Language Models and Neighborhood Information", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Real-world Knowledge Graphs (KGs) often suffer from incompleteness, which limits their potential performance. Knowledge Graph Completion (KGC) techniques aim to address this issue. However, traditional KGC methods are computationally intensive and impractical for large-scale KGs, necessitating the learning of dense node embeddings and computing pairwise distances. Generative transformer-based language models (e.g., T5 and recent KGT5) offer a promising solution as they can predict the tail nodes directly. In this study, we propose to include node neighborhoods as additional information to improve KGC methods based on language models. We examine the effects of this imputation and show that, on both inductive and transductive Wikidata subsets, our method outperforms KGT5 and conventional KGC approaches. We also provide an extensive analysis of the impact of neighborhood on model prediction and show its importance. Furthermore, we point the way to significantly improve KGC through more effective neighborhood selection.", "keywords": "Knowledge graphs;Knowledge graph completion;Language models;Graph neighborhood;Transformer", "primary_area": "", "supplementary_material": "", "author": "Alla Chepurova;Aydar Bulatov;Yuri Kuratov;Mikhail Burtsev", "authorids": "~Alla_Chepurova1;~Aydar_Bulatov1;~Yuri_Kuratov2;~Mikhail_Burtsev1", "gender": "F;M;M;", "homepage": ";;;", "dblp": ";324/5232;95/11265;222/9309", "google_scholar": "NwE8OmQAAAAJ;UvN4mlEAAAAJ;t_PLQakAAAAJ;BsDK7zIAAAAJ", "or_profile": "~Alla_Chepurova1;~Aydar_Bulatov1;~Mikhail_Burtsev1;~Yury_Kuratov1", "aff": ";Moscow Institute of Physics and Technology;London Institute for Mathematical Sciences;Moscow Institute of Physics and Technology", "aff_domain": ";phystech.edu;lims.ac.uk;phystech.edu", "position": ";PhD student;Researcher;Researcher", "bibtex": "@inproceedings{\nchepurova2023better,\ntitle={Better Together: Enhancing Generative Knowledge Graph Completion with Language Models and Neighborhood Information},\nauthor={Alla Chepurova and Aydar Bulatov and Yuri Kuratov and Mikhail Burtsev},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hEglNMGeqj}\n}", "github": "", "project": "", "reviewers": "Xgyv;ht6W;12WZ", "site": "https://openreview.net/forum?id=hEglNMGeqj", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "3;2;2", "reproducibility": "4;4;4", "correctness": "4;3;4", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";booydar/;;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Moscow Institute of Physics and Technology;London Institute for Mathematical Sciences", "aff_unique_dep": ";Mathematical Sciences", "aff_unique_url": "https://www.mipt.ru/en;https://www.lims.ac.uk", "aff_unique_abbr": "MIPT;LIMS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Russian Federation;United Kingdom" }, { "id": "hGUu750pcx", "title": "Exploring Large Language Models for Multi-Modal Out-of-Distribution Detection", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Out-of-distribution (OOD) detection is essential for reliable and trustworthy machine learning.\nRecent multi-modal OOD detection leverages textual information from in-distribution (ID) class names for visual OOD detection, yet it currently neglects the rich contextual information of ID classes.\nLarge language models (LLMs) encode a wealth of world knowledge and can be prompted to generate descriptive features for each class.\nIndiscriminately using such knowledge causes catastrophic damage to OOD detection due to LLMs' hallucinations, as is observed by our analysis.\nIn this paper, we propose to apply world knowledge to enhance OOD detection performance through selective generation from LLMs.\nSpecifically, we introduce a consistency-based uncertainty calibration method to estimate the confidence score of each generation.\nWe further extract visual objects from each image to fully capitalize on the aforementioned world knowledge.\nExtensive experiments demonstrate that our method consistently outperforms the state-of-the-art.", "keywords": "OOD Detection", "primary_area": "", "supplementary_material": "", "author": "Yi Dai;Hao Lang;Kaisheng Zeng;Fei Huang;Yongbin Li", "authorids": "~Yi_Dai2;~Hao_Lang1;~Kaisheng_Zeng1;~Fei_Huang1;~Yongbin_Li2", "gender": "M;M;M;M;M", "homepage": ";https://github.com/langhaobeijing;https://github.com/alpc43;https://yongbin-li.github.io/;https://sites.google.com/view/fei-huang", "dblp": "97/3173;71/6934.html;199/8788.html;;h/FeiHuang.html", "google_scholar": "Tc6C26sAAAAJ;0UGQL9QAAAAJ;https://scholar.google.com/citations?view_op=list_works;xF5VrokAAAAJ;9r98PpoAAAAJ", "or_profile": "~Yi_Dai2;~Hao_Lang1;~Kaisheng_Zeng1;~Yongbin_Li2;~Fei_Huang2", "aff": "Tsinghua University;Tongyi Lab, Alibaba Group;Tsinghua University;Alibaba Group;Alibaba Group US", "aff_domain": "tsinghua.edu.cn;alibaba-inc.com;tsinghua.edu.cn;alibaba-inc.com;alibaba-inc.com", "position": "PhD student;Researcher;PhD student;Researcher;Senior Research Director", "bibtex": "@inproceedings{\ndai2023exploring,\ntitle={Exploring Large Language Models for Multi-Modal Out-of-Distribution Detection},\nauthor={Yi Dai and Hao Lang and Kaisheng Zeng and Fei Huang and Yongbin Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hGUu750pcx}\n}", "github": "", "project": "", "reviewers": "qF8f;ReNH;QiKW", "site": "https://openreview.net/forum?id=hGUu750pcx", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;3;4", "excitement": "3;4;4", "reproducibility": "4;4;3", "correctness": "2;4;3", "rating_avg": 2.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-1219-2436;;0000-0002-8104-9652;;", "linkedin": ";;https://cn.linkedin.com/in/%E5%BC%80%E8%83%9C-%E6%9B%BE-496566107;;fei-huang-cas-cmu", "aff_unique_index": "0;1;0;1;1", "aff_unique_norm": "Tsinghua University;Alibaba Group", "aff_unique_dep": ";Tongyi Lab", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "THU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;United States" }, { "id": "hInB4JIQ5P", "title": "CoEdIT: Text Editing by Task-Specific Instruction Tuning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We introduce CoEdIT, a state-of-the-art text editing system for writing assistance. CoEdIT takes instructions from the user specifying the attributes of the desired text, such as \"Make the sentence simpler\" or \"Write it in a more neutral style,\" and outputs the edited text. We\npresent a large language model fine-tuned on a diverse collection of task-specific instructions for text editing (a total of 82K instructions).\nOur model (1) achieves state-of-the-art performance on various text editing benchmarks, (2) is competitive with publicly available largest-\nsized LLMs trained on instructions while being \u223c60x smaller, (3) is capable of generalizing to unseen edit instructions, and (4) exhibits abilities to generalize to composite instructions containing different combinations of edit actions. Through extensive qualitative and quantitative analysis, we show that writers prefer the edits suggested by CoEdIT relative to other state-of-the-art text editing models. Our code, data, and models are publicly available at https://github.com/vipulraheja/coedit.", "keywords": "Text Editing;Instruction Tuning;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "Vipul Raheja;Dhruv Kumar;Ryan Koo;Dongyeop Kang", "authorids": "~Vipul_Raheja1;~Dhruv_Kumar2;~Ryan_Koo1;~Dongyeop_Kang2", "gender": ";M;;", "homepage": ";https://ddhruvkr.github.io/;;", "dblp": ";159/9419-5;;", "google_scholar": ";IiMW328AAAAJ;;", "or_profile": "~Vipul_Raheja1;~Dhruv_Kumar2;~Ryan_Koo1;~Dongyeop_Kang2", "aff": ";;;", "aff_domain": ";;;", "position": ";;;", "bibtex": "@inproceedings{\nraheja2023coedit,\ntitle={CoEd{IT}: Text Editing by Task-Specific Instruction Tuning},\nauthor={Vipul Raheja and Dhruv Kumar and Ryan Koo and Dongyeop Kang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hInB4JIQ5P}\n}", "github": "", "project": "", "reviewers": "UcMe;MrqV;NJjA", "site": "https://openreview.net/forum?id=hInB4JIQ5P", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "3;3;3", "reproducibility": "3;4;4", "correctness": "2;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-8191-0123;;", "linkedin": ";dhruv-kumar-1a519383/;;" }, { "id": "hMqRphmoM9", "title": "Prompting is not a substitute for probability measurements in large language models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Prompting is now a dominant method for evaluating the linguistic knowledge of large language models (LLMs). While other methods directly read out models' probability distributions over strings, prompting requires models to access this internal information by processing linguistic input, thereby implicitly testing a new type of emergent ability: metalinguistic judgment. In this study, we compare metalinguistic prompting and direct probability measurements as ways of measuring models' linguistic knowledge. Broadly, we find that LLMs' metalinguistic judgments are inferior to quantities directly derived from representations. Furthermore, consistency gets worse as the prompt query diverges from direct measurements of next-word probabilities. Our findings suggest that negative results relying on metalinguistic prompts cannot be taken as conclusive evidence that an LLM lacks a particular linguistic generalization. Our results also highlight the value that is lost with the move to closed APIs where access to probability distributions is limited.", "keywords": "Large Language Models;Behavioral Testing of Language Models;Metalinguistic Judgment;Prompting;Minimal Pairs", "primary_area": "", "supplementary_material": "", "author": "Jennifer Hu;Roger P. Levy", "authorids": "~Jennifer_Hu1;~Roger_P._Levy1", "gender": ";M", "homepage": "https://jennhu.github.io/;http://www.mit.edu/~rplevy", "dblp": "217/1862;23/90", "google_scholar": ";i86O0SAAAAAJ", "or_profile": "~Jennifer_Hu1;~Roger_Levy1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nhu2023prompting,\ntitle={Prompting is not a substitute for probability measurements in large language models},\nauthor={Jennifer Hu and Roger P. Levy},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hMqRphmoM9}\n}", "github": "", "project": "", "reviewers": "3kd5;FmJA;vZ47", "site": "https://openreview.net/forum?id=hMqRphmoM9", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;4", "excitement": "4;4;4", "reproducibility": "5;5;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-4075-6876;0000-0002-4493-8864", "linkedin": ";roger-levy-502a6011/", "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "hNSbSaD1WC", "title": "The Cost of Compression: Investigating the Impact of Compression on Parametric Knowledge in Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Compressing large language models (LLMs), often consisting of billions of parameters, provides faster inference, smaller memory footprints, and enables local deployment. \nThe standard compression techniques are pruning and quantization, with the former eliminating redundant connections in model layers and the latter representing model parameters with as little as 4 bits. The key tradeoff is between the degree of compression and the impact on the quality of the compressed model. Existing research on LLM compression primarily focuses on performance in terms of general metrics like perplexity or downstream task accuracy. More fine-grained metrics, such as those measuring parametric knowledge, remain significantly underexplored. To help bridge this gap, we present a comprehensive analysis across multiple model families using the LAMA and LM-Harness benchmarks in order to systematically quantify the effect of commonly employed compression techniques on model performance. A particular focus is on tradeoffs involving parametric knowledge, with the goal of providing practitioners with practical insights to make informed decisions on compression.", "keywords": "Pruning;Quantization;Language Models", "primary_area": "", "supplementary_material": "", "author": "Satya Sai Srinath Namburi GNVV;Makesh Narsimhan Sreedhar;Srinath Srinivasan;Frederic Sala", "authorids": "~Satya_Sai_Srinath_Namburi_GNVV1;~Makesh_Narsimhan_Sreedhar1;~Srinath_Srinivasan1;~Frederic_Sala1", "gender": "M;;M;M", "homepage": ";;;https://pages.cs.wisc.edu/~fredsala/", "dblp": "362/5934;;;133/3602", "google_scholar": "brolZJEAAAAJ;;;9KhIkNkAAAAJ", "or_profile": "~Satya_Sai_Srinath_Namburi_GNVV1;~Makesh_Narsimhan_Sreedhar1;~Srinath_Srinivasan1;~Frederic_Sala1", "aff": "University of Wisconsin - Madison;;University of Wisconsin - Madison;University of Wisconsin, Madison", "aff_domain": "wisc.edu;;wisc.edu;wisc.edu", "position": "MS student;;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\ngnvv2023the,\ntitle={The Cost of Compression: Investigating the Impact of Compression on Parametric Knowledge in Language Models},\nauthor={Satya Sai Srinath Namburi GNVV and Makesh Narsimhan Sreedhar and Srinath Srinivasan and Frederic Sala},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hNSbSaD1WC}\n}", "github": "", "project": "", "reviewers": "E2eZ;xMEC;syfP", "site": "https://openreview.net/forum?id=hNSbSaD1WC", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "2;3;2", "reproducibility": "3;3;4", "correctness": "3;2;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "namburi-gnvv-satya-sai-srinath/;;srinath-srinivasan-a0153b172/;", "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Wisconsin-Madison;University of Wisconsin", "aff_unique_dep": ";", "aff_unique_url": "https://www.wisc.edu;https://www.wisc.edu", "aff_unique_abbr": "UW-Madison;UW", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Madison", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "hPr1QC623H", "title": "Is Probing All You Need? Indicator Tasks as an Alternative to Probing Embedding Spaces", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The ability to identify and control different kinds of linguistic information encoded in vector representations of words has many use cases, especially for explainability and bias removal. This is usually done via a set of simple classification tasks, termed \\textit{probes}, to evaluate the information encoded in the embedding space. However, the involvement of a trainable classifier leads to entanglement between the probe\u2019s results and the classifier\u2019s nature. As a result, contemporary works on probing include tasks that do not involve training of auxiliary models. In this work we introduce the term \\textit{indicator tasks} for non-trainable tasks which are used to query embedding spaces for the existence of certain properties, and claim that this kind of tasks may point to a direction opposite to probes, and that this contradiction complicates the decision on whether a property exists in an embedding space. We demonstrate our claims with two test cases, one dealing with gender debiasing and another with the erasure of morphological information from embedding spaces. We show that the application of a suitable indicator provides a more accurate picture of the information captured and removed compared to probes. We thus conclude that indicator tasks should be implemented and taken into consideration when eliciting information from embedded representations.", "keywords": "Probing;Probe;Indicator;Word Representations;Embedding Space;Interpretability;Context;Social Bias;Morphology;Semantics;Gender;Concept Erasure", "primary_area": "", "supplementary_material": "", "author": "Tal Levy;Omer Goldman;Reut Tsarfaty", "authorids": "~Tal_Levy1;~Omer_Goldman1;~Reut_Tsarfaty1", "gender": "M;;F", "homepage": ";;", "dblp": ";;21/3716", "google_scholar": ";;", "or_profile": "~Tal_Levy1;~Omer_Goldman1;~Reut_Tsarfaty1", "aff": "Bar-Ilan University;;Bar-Ilan University, Technion", "aff_domain": "biu.ac.il;;biu.ac.il", "position": "MS student;;Associate Professor", "bibtex": "@inproceedings{\nlevy2023is,\ntitle={Is Probing All You Need? Indicator Tasks as an Alternative to Probing Embedding Spaces},\nauthor={Tal Levy and Omer Goldman and Reut Tsarfaty},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hPr1QC623H}\n}", "github": "", "project": "", "reviewers": "8oKZ;p7Cp;VEsr", "site": "https://openreview.net/forum?id=hPr1QC623H", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;2;3", "excitement": "3;4;2", "reproducibility": "3;2;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 2.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "www.linkedin.com/in/tal-levy-b467ab15b;;", "aff_unique_index": "0;0", "aff_unique_norm": "Bar-Ilan University", "aff_unique_dep": "", "aff_unique_url": "https://www.biu.ac.il", "aff_unique_abbr": "BIU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "id": "hRJZIsC9VU", "title": "Introducing Rhetorical Parallelism Detection: A New Task with Datasets, Metrics, and Baselines", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Rhetoric, both spoken and written, involves not only content but also style. One common stylistic tool is $\\textit{parallelism}$: the juxtaposition of phrases which have the same sequence of linguistic ($\\textit{e.g.}$, phonological, syntactic, semantic) features. Despite the ubiquity of parallelism, the field of natural language processing has seldom investigated it, missing a chance to better understand the nature of the structure, meaning, and intent that humans convey. To address this, we introduce the task of $\\textit{rhetorical parallelism detection}$. We construct a formal definition of it; we provide one new Latin dataset and one adapted Chinese dataset for it; we establish a family of metrics to evaluate performance on it; and, lastly, we create baseline systems and novel sequence labeling schemes to capture it. On our strictest metric, we attain F$_1$ scores of $0.40$ and $0.43$ on our Latin and Chinese datasets, respectively.", "keywords": "rhetorical parallelism;sequence labeling;NLP;Latin;Chinese;resource", "primary_area": "", "supplementary_material": "", "author": "Stephen Bothwell;Justin DeBenedetto;Theresa Crnkovich;Hildegund Muller;David Chiang", "authorids": "~Stephen_Bothwell1;~Justin_DeBenedetto1;~Theresa_Crnkovich1;~Hildegund_Muller1;~David_Chiang1", "gender": ";;F;M;M", "homepage": "http://www.csc.villanova.edu/~jdeben/;https://classics.nd.edu/graduate-students/ma-in-early-christian-studies/alumni/;https://classics.nd.edu/faculty/hildegund-muller/;https://nd.edu/~dchiang;https://github.com/Mythologos", "dblp": ";;;https://dblp.org/pers/hd/c/Chiang_0001:David;259/4199", "google_scholar": "FRqYgtIAAAAJ;;;dok0514AAAAJ;9J8ntjEAAAAJ", "or_profile": "~Justin_DeBenedetto1;~Theresa_Crnkovich1;~Hildegund_Muller1;~David_Chiang1;~Stephen_Lawrence_Bothwell1", "aff": "Villanova University;University of Notre Dame;Universit\u00e4t Vienna;University of Notre Dame;University of Notre Dame", "aff_domain": "villanova.edu;nd.edu;univie.ac.at;nd.edu;nd.edu", "position": "Assistant Professor;MS student;PhD student;Associate Professor;PhD student", "bibtex": "@inproceedings{\nbothwell2023introducing,\ntitle={Introducing Rhetorical Parallelism Detection: A New Task with Datasets, Metrics, and Baselines},\nauthor={Stephen Bothwell and Justin DeBenedetto and Theresa Crnkovich and Hildegund Muller and David Chiang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hRJZIsC9VU}\n}", "github": "", "project": "", "reviewers": "MynM;FFhL;J4Zn", "site": "https://openreview.net/forum?id=hRJZIsC9VU", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;4", "excitement": "4;4;4", "reproducibility": "5;4;5", "correctness": "5;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.666666666666667, "correctness_avg": 4.333333333333333, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-0435-4864;0000-0002-0507-9032", "linkedin": ";;;;stephen-bothwell-a6770b167/", "aff_unique_index": "0;1;2;1;1", "aff_unique_norm": "Villanova University;University of Notre Dame;University of Vienna", "aff_unique_dep": ";;", "aff_unique_url": "https://www.villanova.edu;https://www.nd.edu;https://univie.ac.at", "aff_unique_abbr": "Villanova;Notre Dame;UV", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;Austria" }, { "id": "hTLIAYTi5w", "title": "ClozEx: A Task toward Generation of English Cloze Explanation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Providing explanations for cloze questions in language assessment (LA) has been recognized as a valuable approach to enhancing the language proficiency of learners. \nHowever, there is a noticeable absence of dedicated tasks and datasets specifically designed for generating language learner explanations.\nIn response to this gap, this paper introduces a novel task ClozEx of generating explanations for cloze questions in LA, with a particular focus on English as a Second Language (ESL) learners. \nTo support this task, we present a meticulously curated dataset comprising cloze questions paired with corresponding explanations. \nThis dataset aims to assess language proficiency and facilitates language learning by offering informative and accurate explanations.\nTo tackle the task, we fine-tuned various baseline models with our training data, including encoder-decoder and decoder-only architectures. \nWe also explored whether large language models (LLMs) are able to generate good explanations without fine-tuning, just using pre-defined prompts.\nThe evaluation results demonstrate that encoder-decoder models have the potential to deliver fluent and valid explanations when trained on our dataset.", "keywords": "language assessment;english education;text generation;dataset", "primary_area": "", "supplementary_material": "", "author": "Zizheng Zhang;Masato Mita;Mamoru Komachi", "authorids": "~Zizheng_Zhang1;~Masato_Mita1;~Mamoru_Komachi1", "gender": "M;M;M", "homepage": ";https://chemicaltree.github.io/;", "dblp": "227/1836;213/1183;88/2433", "google_scholar": "_6yfk60AAAAJ;fkWeWrwAAAAJ;dFPPwwIAAAAJ", "or_profile": "~Zizheng_Zhang1;~Masato_Mita1;~Mamoru_Komachi1", "aff": "Tokyo Metropolitan University;CyberAgent Inc.;Tokyo Metropolitan University, Japan", "aff_domain": "tmu.ac.jp;co.jp;tmu.ac.jp", "position": "PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nzhang2023clozex,\ntitle={ClozEx: A Task toward Generation of English Cloze Explanation},\nauthor={Zizheng Zhang and Masato Mita and Mamoru Komachi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hTLIAYTi5w}\n}", "github": "", "project": "", "reviewers": "zBsN;xPm4;SEmd", "site": "https://openreview.net/forum?id=hTLIAYTi5w", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;5;3", "excitement": "3;2;4", "reproducibility": "4;4;5", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";masato-mita-479113105/;mamorlis/", "aff_unique_index": "0;1;0", "aff_unique_norm": "Tokyo Metropolitan University;CyberAgent Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.tmuc.ac.jp;https://www.cyberagent.co.jp", "aff_unique_abbr": "TMU;CyberAgent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "id": "hUWrmo7nNh", "title": "Bi-Drop: Enhancing Fine-tuning Generalization via Synchronous sub-net Estimation and Optimization", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Pretrained language models have achieved remarkable success in natural language understanding. However, fine-tuning pretrained models on limited training data tends to overfit and thus diminish performance. This paper presents Bi-Drop, a fine-tuning strategy that selectively updates model parameters using gradients from various sub-nets dynamically generated by dropout. The sub-net estimation of Bi-Drop is performed in an in-batch manner, so it overcomes the problem of hysteresis in sub-net updating, which is possessed by previous methods that perform asynchronous sub-net estimation. Also, Bi-Drop needs only one mini-batch to estimate the sub-net so it achieves higher utility of training data. Experiments on the GLUE benchmark demonstrate that Bi-Drop consistently outperforms previous fine-tuning methods. Furthermore, empirical results also show that Bi-Drop exhibits excellent generalization ability and robustness for domain transfer, data imbalance, and low-resource scenarios.", "keywords": "pretrained language model;adaptive sub-net Optimization", "primary_area": "", "supplementary_material": "", "author": "Shoujie Tong;Heming Xia;Damai Dai;Runxin Xu;Tianyu Liu;Binghuai Lin;Yunbo Cao;Zhifang Sui", "authorids": "~Shoujie_Tong1;~Heming_Xia1;~Damai_Dai1;~Runxin_Xu2;~Tianyu_Liu3;~Binghuai_Lin1;~Yunbo_Cao3;~Zhifang_Sui1", "gender": ";M;M;M;M;;M;F", "homepage": ";https://hemingkx.github.io/;;;;;;http://eecs.pku.edu.cn/EN/People/Faculty/Detail/?ID=6024", "dblp": "292/7305;278/2940;199/2097;267/5291.html;134/1099-1;146/2946;33/4066.html;", "google_scholar": "LnQoWxEAAAAJ;6r2ESKkAAAAJ;8b-ysf0NWVoC;dRp21l4AAAAJ;https://scholar.google.com.hk/citations?user=6hHbBwwAAAAJ;;nNVDLb4AAAAJ;", "or_profile": "~Shoujie_Tong1;~Heming_Xia1;~Damai_Dai1;~Runxin_Xu2;~Tianyu_Liu3;~Binghuai_Lin1;~Yunbo_Cao3;~Zhifang_Sui1", "aff": ";Peking University;Peking University;Peking University;Tencent Cloud AI (LLM);Tencent;Tencent;Peking University", "aff_domain": ";pku.edu.cn;pku.edu.cn;pku.edu.cn;tencent.com;tencent.com;tencent.com;pku.edu.cn", "position": ";MS student;PhD student;MS student;Senior Researcher;Principal Researcher;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\ntong2023bidrop,\ntitle={Bi-Drop: Enhancing Fine-tuning Generalization via Synchronous sub-net Estimation and Optimization},\nauthor={Shoujie Tong and Heming Xia and Damai Dai and Runxin Xu and Tianyu Liu and Binghuai Lin and Yunbo Cao and Zhifang Sui},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hUWrmo7nNh}\n}", "github": "", "project": "", "reviewers": "NTTU;uYzG;TJ6o", "site": "https://openreview.net/forum?id=hUWrmo7nNh", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;2", "excitement": "3;3;3", "reproducibility": "5;3;4", "correctness": "4;4;3", "rating_avg": 3.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-5074-3441;;;;;;", "linkedin": ";;;;;;;", "aff_unique_index": "0;0;0;1;1;1;0", "aff_unique_norm": "Peking University;Tencent", "aff_unique_dep": ";LLM", "aff_unique_url": "http://www.pku.edu.cn;https://cloud.tencent.com", "aff_unique_abbr": "Peking U;Tencent AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "hWNsvpWfhy", "title": "Localizing Active Objects from Egocentric Vision with Symbolic World Knowledge", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The ability to actively ground task instructions from an egocentric view is crucial for AI agents to accomplish tasks or assist humans virtually.\nOne important step towards this goal is to localize and track key active objects that undergo major state change as a consequence of human actions/interactions to the environment without being told exactly what/where to ground (e.g., localizing and tracking the `sponge` in video from the instruction \"Dip the `sponge` into the bucket.\").\nWhile existing works approach this problem from a pure vision perspective,\nwe investigate to which extent the textual modality (i.e., task instructions) and their interaction with visual modality can be beneficial.\nSpecifically, we propose to improve phrase grounding models' ability on localizing the active objects by:\n(1) learning the role of `objects undergoing change` and extracting them accurately from the instructions,\n(2) leveraging pre- and post-conditions of the objects during actions,\nand (3) recognizing the objects more robustly with descriptional knowledge.\nWe leverage large language models (LLMs) to extract the aforementioned action-object knowledge,\nand design a per-object aggregation masking technique to effectively perform joint inference on object phrases and symbolic knowledge.\nWe evaluate our framework on Ego4D and Epic-Kitchens datasets.\nExtensive experiments demonstrate the effectiveness of our proposed framework, which leads to>54% improvements in all standard metrics on the TREK-150-OPE-Det localization + tracking task, >7% improvements in all standard metrics on the TREK-150-OPE tracking task, and >3% improvements in average precision (AP) on the Ego4D SCOD task.", "keywords": "Object state change;Pre-conditions;Post-conditions;Egocentric videos;Active grounding;Multimodal", "primary_area": "", "supplementary_material": "", "author": "Te-Lin Wu;Yu Zhou;Nanyun Peng", "authorids": "~Te-Lin_Wu1;~Yu_Zhou20;~Nanyun_Peng1", "gender": "M;M;F", "homepage": "https://telin0411.github.io/;https://yu-bryan-zhou.github.io/;https://violetpeng.github.io/", "dblp": "166/3298;36/2728-30.html;117/4036", "google_scholar": "Q5aezXQAAAAJ;61rJc-YAAAAJ;XxRXvX0AAAAJ", "or_profile": "~Te-Lin_Wu1;~Yu_Zhou20;~Nanyun_Peng1", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles", "aff_domain": "cs.ucla.edu;ucla.edu;ucla.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwu2023localizing,\ntitle={Localizing Active Objects from Egocentric Vision with Symbolic World Knowledge},\nauthor={Te-Lin Wu and Yu Zhou and Nanyun Peng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hWNsvpWfhy}\n}", "github": "", "project": "", "reviewers": "h4o5;5jwp;MyHn", "site": "https://openreview.net/forum?id=hWNsvpWfhy", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;5;3", "excitement": "3;3;4", "reproducibility": "4;4;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-2593-9167;", "linkedin": "telinwu/;yu-zhou-997359178/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Los Angeles", "aff_unique_dep": "", "aff_unique_url": "https://www.ucla.edu", "aff_unique_abbr": "UCLA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Los Angeles", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "hXXyBtlo4D", "title": "MMNMT: Modularizing Multilingual Neural Machine Translation with Flexibly Assembled MoE and Dense Blocks", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Mixture-of-Experts (MoE) based sparse architectures can significantly increase model capacity with sublinear computational overhead, which are hence widely used in massively multilingual neural machine translation (MNMT). However, they are prone to overfitting on low-resource language translation. \nIn this paper, we propose a modularized MNMT framework that is able to flexibly assemble dense and MoE-based sparse modules to achieve the best of both worlds. The training strategy of the modularized MNMT framework consists of three stages: (1) Pre-training basic MNMT models with different training objectives or model structures, (2) Initializing modules of the framework with pre-trained couterparts (e.g., encoder, decoder and embedding layers) from the basic models and (3) Fine-tuning the modularized MNMT framework to fit modules from different models together. We pre-train three basic MNMT models from scratch: a dense model, an MoE-based sparse model and a new MoE model, termed as MoE-LGR that explores multiple Language-Group-specifc Routers to incorporate language group knowledge into MNMT. The strengths of these pre-trained models are either on low-resource language translation, high-resource language translation or zero-shot translation. Our modularized MNMT framework attempts to incorporate these advantages into a single model with reasonable initialization and fine-tuning. Experiments on widely-used benchmark datasets demonstrate that the proposed modularized MNMT framwork substantially outperforms both MoE and dense models on high- and low-resource language translation as well as zero-shot translation. Our framework facilitates the combination of different methods with their own strengths and recycling off-the-shelf models for multilingual neural machine translation. Codes are available at https://github.com/lishangjie1/MMNMT.", "keywords": "MoE;Multilingual Machine Translation;Modularizing", "primary_area": "", "supplementary_material": "", "author": "Shangjie Li;Xiangpeng Wei;shaolin Zhu;Jun Xie;Baosong Yang;Deyi Xiong", "authorids": "~Shangjie_Li1;~Xiangpeng_Wei1;~shaolin_Zhu1;~Jun_Xie9;~Baosong_Yang1;~Deyi_Xiong2", "gender": ";M;M;M;M;Not Specified", "homepage": ";https://pemywei.github.io/;https://zsl-nlp.github.io/;https://baosongyang.site/;https://dyxiong.github.io;", "dblp": ";220/9947;206/8937;203/8245;55/6548;", "google_scholar": ";KnLk78UAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.tw/citations?user=fXsHJXkAAAAJ;QPLO3myO5PkC;YjuM2GsAAAAJ", "or_profile": "~Shangjie_Li1;~Xiangpeng_Wei1;~shaolin_Zhu1;~Baosong_Yang1;~Deyi_Xiong2;~jun_xie5", "aff": "Tianjin University;Alibaba Group;Tianjin University;Alibaba Group;Tianjin University;Alibaba DAMO Academy", "aff_domain": "tju.edu.cn;alibaba-inc.com;tju.edu.cn;alibaba-inc.com;tju.edu.cn;alibaba-inc.com", "position": "MS student;Researcher;Assistant Professor;Researcher;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nli2023mmnmt,\ntitle={{MMNMT}: Modularizing Multilingual Neural Machine Translation with Flexibly Assembled MoE and Dense Blocks},\nauthor={Shangjie Li and Xiangpeng Wei and shaolin Zhu and Jun Xie and Baosong Yang and Deyi Xiong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hXXyBtlo4D}\n}", "github": "", "project": "", "reviewers": "E1x3;bR2A;yF46", "site": "https://openreview.net/forum?id=hXXyBtlo4D", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "3;4;4", "reproducibility": "3;3;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-2353-5038;", "linkedin": "shangjie-li-b130a5226;;;;;", "aff_unique_index": "0;1;0;1;0;1", "aff_unique_norm": "Tianjin University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "http://www.tju.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "TJU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "haPIkA8aOk", "title": "Adaptation with Self-Evaluation to Improve Selective Prediction in LLMs", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models (LLMs) have recently shown great advances in a variety of tasks, including natural language understanding and generation. However, their use in high-stakes decision-making scenarios is still limited due to the potential for errors. *Selective prediction* is a technique that can be used to improve the reliability of the LLMs by allowing them to abstain from making predictions when they are unsure of the answer. In this work, we propose a novel framework for adaptation with self-evaluation to improve the selective prediction performance of LLMs. Our framework is based on the idea of using parameter-efficient tuning to adapt the LLM to the specific task at hand while improving its ability to perform self-evaluation. We evaluate our method on a variety of question-answering (QA) datasets and show that it outperforms state-of-the-art selective prediction methods. For example, on the CoQA benchmark, our method improves the AUACC from 91.23\\% to 92.63\\% and improves the AUROC from 74.61\\% to 80.25\\%.", "keywords": "large language models;selective prediction;adaptation with self-evaluation", "primary_area": "", "supplementary_material": "", "author": "Jiefeng Chen;Jinsung Yoon;Sayna Ebrahimi;Sercan O Arik;Tomas Pfister;Somesh Jha", "authorids": "~Jiefeng_Chen2;~Jinsung_Yoon1;~Sayna_Ebrahimi1;~Sercan_O_Arik1;~Tomas_Pfister1;~Somesh_Jha1", "gender": "M;M;F;M;M;M", "homepage": "https://jfc43.github.io/;https://sites.google.com/corp/view/jinsungyoon;https://saynaebrahimi.github.io/;https://www.sercanarik.com/;http://tomas.pfister.fi;", "dblp": "199/3381;173/5409.html;207/7584;;14/8360;j/SomeshJha", "google_scholar": "5mOfQfAAAAAJ;kiFd6A8AAAAJ;wRyjJfMAAAAJ;;ahSpJOAAAAAJ;BaI7l8QAAAAJ", "or_profile": "~Jiefeng_Chen2;~Jinsung_Yoon1;~Sayna_Ebrahimi1;~Sercan_O_Arik1;~Tomas_Pfister1;~Somesh_Jha1", "aff": "University of Wisconsin, Madison;Google;Google;Google;Google;Department of Computer Science, University of Wisconsin, Madison", "aff_domain": "wisc.edu;google.com;google.com;google.com;google.com;cs.wisc.edu", "position": "PhD student;Research Scientist;Research Scientist;Research Scientist;Head of Research @ Cloud AI;Full Professor", "bibtex": "@inproceedings{\nchen2023adaptation,\ntitle={Adaptation with Self-Evaluation to Improve Selective Prediction in {LLM}s},\nauthor={Jiefeng Chen and Jinsung Yoon and Sayna Ebrahimi and Sercan O Arik and Tomas Pfister and Somesh Jha},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=haPIkA8aOk}\n}", "github": "", "project": "", "reviewers": "7xz9;x6AJ;1jxs", "site": "https://openreview.net/forum?id=haPIkA8aOk", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "2;3;3", "reproducibility": "4;4;3", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-6333-1729;0009-0004-4088-8718;", "linkedin": "jiefeng-chen-aa1769122/;jinsung-yoon-bb7751b8;saynaebrahimi/;;;", "aff_unique_index": "0;1;1;1;1;2", "aff_unique_norm": "University of Wisconsin;Google;University of Wisconsin-Madison", "aff_unique_dep": ";Google;Department of Computer Science", "aff_unique_url": "https://www.wisc.edu;https://www.google.com;https://www.wisc.edu", "aff_unique_abbr": "UW;Google;UW-Madison", "aff_campus_unique_index": "0;1;1;1;1;0", "aff_campus_unique": "Madison;Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "hcDE6sOEfu", "title": "Ignore This Title and HackAPrompt: Exposing Systemic Vulnerabilities of LLMs Through a Global Prompt Hacking Competition", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large Language Models (LLMs) are increasingly being deployed in interactive contexts that involve direct user engagement, such as chatbots and writing assistants. These deployments are increasingly plagued by prompt injection and jailbreaking (collectively, prompt hacking), in which models are manipulated to ignore their original instructions and instead follow potentially malicious ones. Although widely acknowledged as a significant security threat, there is a dearth of a large-scale resource and quantitative study on prompt hacking. To address this lacuna, we launch a global prompt hacking competition, which allows for free-form human input attacks. We elicit 600K+ adversarial prompts against three state-of-the-art LLMs. We describe the dataset, which empirically verifies that current LLMs can indeed be manipulated via prompt hacking. We also present a comprehensive ontology of the types of adversarial prompts.", "keywords": "Ignore This Title: Expose Systemic Vulnerabilities of LLMs through a Global Scale Prompt Hacking Competition", "primary_area": "", "supplementary_material": "", "author": "Sander V Schulhoff;Jeremy Pinto;Anaum Khan;Louis-Fran\u00e7ois Bouchard;Chenglei Si;Svetlina Anati;Valen Tagliabue;Anson Liu Kost;Christopher R Carnahan;Jordan Lee Boyd-Graber", "authorids": "~Sander_V_Schulhoff1;~Jeremy_Pinto1;~Anaum_Khan1;~Louis-Fran\u00e7ois_Bouchard1;~Chenglei_Si1;~Svetlina_Anati1;~Valen_Tagliabue1;~Anson_Liu_Kost1;~Christopher_R_Carnahan1;~Jordan_Lee_Boyd-Graber1", "gender": "M;M;F;M;M;F;;;M;M", "homepage": "https://trigaten.github.io;https://www.jerpint.io/;;https://www.louisbouchard.ai;https://noviscl.github.io/;;;http://GitHub.com/schrodingers-turtle;https://github.com/realSpaceKangaroo;http://boydgraber.org", "dblp": ";;;;251/8778;;;;;57/5950", "google_scholar": ";;;45O9knIAAAAJ;https://scholar.google.com.sg/citations?user=CyKr1q8AAAAJ;;;;;BT4XTP4AAAAJ", "or_profile": "~Sander_V_Schulhoff1;~Jeremy_Pinto1;~Anaum_Khan1;~Louis-Fran\u00e7ois_Bouchard1;~Chenglei_Si1;~Svetlina_Anati1;~Valen_Tagliabue1;~Anson_Liu_Kost1;~Christopher_R_Carnahan1;~Jordan_Lee_Boyd-Graber1", "aff": "University of Maryland, College Park;Montreal Institute for Learning Algorithms, University of Montreal, University of Montreal;University of Maryland, College Park;Montreal Institute for Learning Algorithms, University of Montreal, Universit\u00e9 de Montr\u00e9al;Stanford University;;;New York University;;University of Maryland, College Park", "aff_domain": "umd.edu;mila.umontreal.ca;umd.edu;mila.umontreal.ca;stanford.edu;;;nyu.edu;;umd.edu", "position": "Undergrad student;Applied Research Scientist;Undergrad student;PhD student;PhD student;;;PhD student;;Associate Professor", "bibtex": "@inproceedings{\nschulhoff2023ignore,\ntitle={Ignore This Title and Hack{AP}rompt: Exposing Systemic Vulnerabilities of {LLM}s Through a Global Prompt Hacking Competition},\nauthor={Sander V Schulhoff and Jeremy Pinto and Anaum Khan and Louis-Fran{\\c{c}}ois Bouchard and Chenglei Si and Svetlina Anati and Valen Tagliabue and Anson Liu Kost and Christopher R Carnahan and Jordan Lee Boyd-Graber},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hcDE6sOEfu}\n}", "github": "", "project": "", "reviewers": "iFTb;Tp3Q;UoYj", "site": "https://openreview.net/forum?id=hcDE6sOEfu", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;5;4", "excitement": "4;5;5", "reproducibility": "4;0;3", "correctness": "4;4;5", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 4.666666666666667, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 10, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;;;0000-0002-7770-4431", "linkedin": ";jeremy-pinto/;anaumkhan/;whats-ai/;;svetlina-anati;valen-tagliabue-7a1870228/;;;jordan-boyd-graber-99a83994", "aff_unique_index": "0;1;0;1;2;3;0", "aff_unique_norm": "University of Maryland;University of Montreal;Stanford University;New York University", "aff_unique_dep": ";Montreal Institute for Learning Algorithms;;", "aff_unique_url": "https://www/umd.edu;https://www.umontreal.ca;https://www.stanford.edu;https://www.nyu.edu", "aff_unique_abbr": "UMD;UM;Stanford;NYU", "aff_campus_unique_index": "0;1;0;1;2;0", "aff_campus_unique": "College Park;Montreal;Stanford;", "aff_country_unique_index": "0;1;0;1;0;0;0", "aff_country_unique": "United States;Canada" }, { "id": "hdxMdgKddK", "title": "DocTrack: A Visually-Rich Document Dataset Really Aligned with Human Eye Movement for Machine Reading", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The use of visually-rich documents in various fields has created a demand for Document AI models that can read and comprehend documents like humans, which requires the overcoming of technical, linguistic, and cognitive barriers. Unfortunately, the lack of appropriate datasets has significantly hindered advancements in the field. To address this issue, we introduce DocTrack, a visually-rich document dataset really aligned with human eye-movement information using eye-tracking technology. This dataset can be used to investigate the challenges mentioned above. Additionally, we explore the impact of human reading order on document understanding tasks and examine what would happen if a machine reads in the same order as a human. Our results suggest that although Document AI models have made significant progresses, they still have a long way to go before they can read visually richer documents as accurately, continuously, and flexibly as humans do. These findings have potential implications for future research and development of document intelligence.", "keywords": "visually-rich document;dataset;eye tracking;human reading order;preordering", "primary_area": "", "supplementary_material": "", "author": "Hao Wang;Qingxuan Wang;Yue Li;Changqing Wang;Chenhui Chu;Rui Wang", "authorids": "~Hao_Wang23;~Qingxuan_Wang1;~Yue_Li13;~Changqing_Wang1;~Chenhui_Chu1;~Rui_Wang10", "gender": "M;M;;M;M;M", "homepage": "https://hint-lab.github.io/people/wang_hao;https://xuan-0612.github.io/;https://github.com/yli0623;https://wcq1744352243.github.io/wcq111.github.io/;http://researchmap.jp/chu/?lang=en;https://wangruinlp.github.io/", "dblp": "181/2812;;;;126/8755;w/RuiWang15", "google_scholar": ";;;;https://scholar.google.co.jp/citations?user=6ef0qbgAAAAJ;oTU0v5IAAAAJ", "or_profile": "~Hao_Wang23;~Qingxuan_Wang1;~Yue_Li13;~Changqing_Wang1;~Chenhui_Chu1;~Rui_Wang7", "aff": "Shanghai University;Shanghai University;Shanghai University;Shanghai University;Kyoto University;Shanghai Jiaotong University", "aff_domain": "shu.edu.cn;shu.edu.cn;shu.edu.cn;shu.edu.cn;kyoto-u.ac.jp;sjtu.edu.cn", "position": "Assistant Professor;MS student;Undergrad student;MS student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2023doctrack,\ntitle={DocTrack: A Visually-Rich Document Dataset Really Aligned with Human Eye Movement for Machine Reading},\nauthor={Hao Wang and Qingxuan Wang and Yue Li and Changqing Wang and Chenhui Chu and Rui Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hdxMdgKddK}\n}", "github": "", "project": "", "reviewers": "sdSf;hWn3;2oER;ku22", "site": "https://openreview.net/forum?id=hdxMdgKddK", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "2;3;4;4", "excitement": "4;4;3;3", "reproducibility": "3;4;4;5", "correctness": "3;3;4;3", "rating_avg": 4.0, "confidence_avg": 3.25, "excitement_avg": 3.5, "reproducibility_avg": 4.0, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-1089-9828;;;;0000-0001-9848-6384;0000-0001-8007-2503", "linkedin": ";;;;;", "aff_unique_index": "0;0;0;0;1;2", "aff_unique_norm": "Shanghai University;Kyoto University;Shanghai Jiao Tong University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.shu.edu.cn;https://www.kyoto-u.ac.jp;https://www.sjtu.edu.cn", "aff_unique_abbr": "SHU;Kyoto U;SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "China;Japan" }, { "id": "hfZKiBh4zS", "title": "MPrompt: Exploring Multi-level Prompt Tuning for Machine Reading Comprehension", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The large language models have achieved superior performance on various natural language tasks. One major drawback of such approaches is they are resource-intensive in fine-tuning new datasets. Soft-prompt tuning presents a resource-efficient solution to fine-tune the pre-trained language models (PLMs) while keeping their weight frozen. Existing soft prompt methods mainly focus on designing the input-independent prompts that steer the model to fit the domain of the new dataset. Those methods often ignore the fine-grained information about the task and context of the text. In this paper, we propose a multi-level prompt tuning (MPrompt) method for machine reading comprehension. It utilizes prompts at task-specific, domain-specific, and context-specific levels to enhance the comprehension of input semantics at different granularities. We also propose an independence constraint to steer each domain-specific prompt to focus on information within its domain to avoid redundancy. Moreover, we present a prompt generator that incorporates context-related knowledge in the prompt generation to enhance contextual relevancy. We conducted extensive experiments on 12 benchmarks of various QA formats and achieved an average improvement of 1.94\\% over the state-of-the-art methods.", "keywords": "Question Answering;Prompt Learning;Soft Prompt;Machine Reading Comprehension", "primary_area": "", "supplementary_material": "", "author": "Guoxin Chen;Yiming Qian;Bowen Wang;Liangzhi Li", "authorids": "~Guoxin_Chen1;~Yiming_Qian6;~Bowen_Wang1;~Liangzhi_Li1", "gender": ";M;M;M", "homepage": ";;https://www.bowen-wang.com/home;", "dblp": ";;64/4732;169/4123", "google_scholar": "I6EjtN0AAAAJ;gmpm0a8AAAAJ;hB4K5UMAAAAJ;JIRw_tMAAAAJ", "or_profile": "~Guoxin_Chen1;~Yiming_Qian6;~Bowen_Wang1;~Liangzhi_Li1", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;Institute of High Performance Computing, Singapore, A*STAR;Osaka University;Osaka University", "aff_domain": "ict.ac.cn;ihpc.a-star.edu.sg;osaka-u.ac.jp;osaka-u.ac.jp", "position": "MS student;Researcher;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nchen2023mprompt,\ntitle={{MP}rompt: Exploring Multi-level Prompt Tuning for Machine Reading Comprehension},\nauthor={Guoxin Chen and Yiming Qian and Bowen Wang and Liangzhi Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hfZKiBh4zS}\n}", "github": "", "project": "", "reviewers": "Pw7P;2zHj;Jx92", "site": "https://openreview.net/forum?id=hfZKiBh4zS", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;5", "excitement": "3;4;3", "reproducibility": "4;5;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-9000-4782;0000-0002-1795-2038;0000-0002-2911-5595;", "linkedin": ";;;", "aff_unique_index": "0;1;2;2", "aff_unique_norm": "Chinese Academy of Sciences;Institute of High Performance Computing;Osaka University", "aff_unique_dep": "Institute of Computing Technology;;", "aff_unique_url": "http://www.ict.ac.cn;https://www.ihpc.a-star.edu.sg;https://www.osaka-u.ac.jp", "aff_unique_abbr": "CAS;IHPC;Osaka U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;2", "aff_country_unique": "China;Singapore;Japan" }, { "id": "hfmmVWJecp", "title": "Non-Compositionality in Sentiment: New Data and Analyses", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "When natural language phrases are combined, their meaning is often more than the sum of their parts.\nIn the context of NLP tasks such as sentiment analysis, where the meaning of a phrase is its sentiment, that still applies.\nMany NLP studies on sentiment analysis, however, focus on the fact that sentiment computations are largely compositional.\nWe, instead, set out to obtain non-compositionality ratings for phrases with respect to their sentiment.\nOur contributions are as follows: a) a methodology for obtaining those non-compositionality ratings, b) a resource of ratings for 259 phrases \u2013 NonCompSST \u2013 along with an analysis of that resource, and c) an evaluation of computational models for sentiment analysis using this new resource.", "keywords": "sentiment analysis;compositionality;data annotation", "primary_area": "", "supplementary_material": "", "author": "Verna Dankers;Christopher G. Lucas", "authorids": "~Verna_Dankers1;~Christopher_G._Lucas1", "gender": "F;", "homepage": "https://vernadankers.com;http://christopherglucas.com", "dblp": "242/7711;69/3093", "google_scholar": "https://scholar.google.nl/citations?hl=en;", "or_profile": "~Verna_Dankers1;~Christopher_G._Lucas1", "aff": "University of Edinburgh;University of Edinburgh, University of Edinburgh", "aff_domain": "ed.ac.uk;ed.ac.uk", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\ndankers2023noncompositionality,\ntitle={Non-Compositionality in Sentiment: New Data and Analyses},\nauthor={Verna Dankers and Christopher G. Lucas},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hfmmVWJecp}\n}", "github": "", "project": "", "reviewers": "btm9;SRwS;zamx", "site": "https://openreview.net/forum?id=hfmmVWJecp", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "4;3;4", "reproducibility": "3;5;5", "correctness": "3;3;5", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 9, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": "verna-dankers-27396511b/;", "aff_unique_index": "0;0", "aff_unique_norm": "University of Edinburgh", "aff_unique_dep": "", "aff_unique_url": "https://www.ed.ac.uk", "aff_unique_abbr": "Edinburgh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "hgF8In32gL", "title": "Text encoders bottleneck compositionality in contrastive vision-language models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Performant vision-language (VL) models like CLIP represent captions using a single vector. How much information about language is lost in this bottleneck? We first curate CompPrompts, a set of increasingly compositional image captions that VL models should be able to capture (e.g., single object, to object+property, to multiple interacting objects). Then, we train text-only recovery probes that aim to reconstruct captions from single-vector text representations produced by several VL models. This approach does not require images, allowing us to test on a broader range of scenes compared to prior work. We find that: 1) CLIP\u2019s text encoder falls short on more compositional inputs, including object relationships, attribute-object association, counting, and negations; 2) some text encoders work significantly better than others; and 3) text-only recovery performance predicts multimodal matching performance on ControlledImCaps: a new evaluation benchmark we collect and release consisting of fine-grained compositional images and captions. Specifically, our results suggest text-only recoverability is a necessary (but not sufficient) condition for modeling compositional factors in contrastive VL models. We release our datasets and code.", "keywords": "vision-language;text encoders;interpretability", "primary_area": "", "supplementary_material": "", "author": "Amita Kamath;Jack Hessel;Kai-Wei Chang", "authorids": "~Amita_Kamath1;~Jack_Hessel1;~Kai-Wei_Chang1", "gender": "F;M;M", "homepage": "https://amitakamath.github.io/;https://www.jmhessel.com;http://kwchang.net", "dblp": "267/9823;https://dblp.uni-trier.de/pid/132/5250.html;18/2428", "google_scholar": "B_ek5IIAAAAJ;SxQQ1msAAAAJ;fqDBtzYAAAAJ", "or_profile": "~Amita_Kamath1;~Jack_Hessel1;~Kai-Wei_Chang1", "aff": "UCLA Computer Science Department, University of California, Los Angeles;Allen Institute for Artificial Intelligence;Amazon", "aff_domain": "cs.ucla.edu;allenai.org;amazon.com", "position": "PhD student;Researcher;Researcher", "bibtex": "@inproceedings{\nkamath2023text,\ntitle={Text encoders bottleneck compositionality in contrastive vision-language models},\nauthor={Amita Kamath and Jack Hessel and Kai-Wei Chang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hgF8In32gL}\n}", "github": "", "project": "", "reviewers": "n92c;uEYG;iU94", "site": "https://openreview.net/forum?id=hgF8In32gL", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "4;4;3", "reproducibility": "4;5;4", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-4012-8979;0000-0001-5365-0072", "linkedin": ";;kai-wei-chang-41239040", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of California, Los Angeles;Allen Institute for Artificial Intelligence;Amazon", "aff_unique_dep": "Computer Science Department;;Amazon.com, Inc.", "aff_unique_url": "https://www.ucla.edu;https://allenai.org;https://www.amazon.com", "aff_unique_abbr": "UCLA;AI2;Amazon", "aff_campus_unique_index": "0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "hiJ2hzwghq", "title": "BiasX: \u201cThinking Slow\u201d in Toxic Content Moderation with Explanations of Implied Social Biases", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Toxicity annotators and content moderators often default to mental shortcuts when making decisions. This can lead to subtle toxicity being missed, and seemingly toxic but harmless content being over-detected. We introduce BiasX, a framework that enhances content moderation setups with free-text explanations of statements' implied social biases, and explore its effectiveness through a large-scale crowdsourced user study. We show that indeed, participants substantially benefit from explanations for correctly identifying subtly (non-)toxic content. The quality of explanations is critical: imperfect machine-generated explanations (+2.4% on hard toxic examples) help less compared to expert-written human explanations (+7.2%). Our results showcase the promise of using free-text explanations to encourage more thoughtful toxicity moderation.", "keywords": "Social biases;Toxicity moderation;Human-AI collaboration;Free-text explanations", "primary_area": "", "supplementary_material": "", "author": "Yiming Zhang;Sravani Uttara Nanduri;Liwei Jiang;Tongshuang Wu;Maarten Sap", "authorids": "~Yiming_Zhang5;~Sravani_Uttara_Nanduri1;~Liwei_Jiang2;~Tongshuang_Wu1;~Maarten_Sap1", "gender": "M;F;F;F;M", "homepage": "http://y0mingzhang.github.io/;https://github.com/nandsra21/;https://liweijiang.me;http://cs.cmu.edu/~sherryw;http://maartensap.com", "dblp": "76/5416-22;;;179/3791;153/9519", "google_scholar": ";;lcPsDgUAAAAJ;CeQd_DsAAAAJ;gFN4QUYAAAAJ", "or_profile": "~Yiming_Zhang5;~Sravani_Uttara_Nanduri1;~Liwei_Jiang2;~Tongshuang_Wu1;~Maarten_Sap1", "aff": "School of Computer Science, Carnegie Mellon University;University of Washington;University of Washington;School of Computer Science, Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cs.cmu.edu;uw.edu;washington.edu;cs.cmu.edu;cmu.edu", "position": "PhD student;Undergrad student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023biasx,\ntitle={BiasX: {\\textquotedblleft}Thinking Slow{\\textquotedblright} in Toxic Content Moderation with Explanations of Implied Social Biases},\nauthor={Yiming Zhang and Sravani Uttara Nanduri and Liwei Jiang and Tongshuang Wu and Maarten Sap},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hiJ2hzwghq}\n}", "github": "", "project": "", "reviewers": "rNFj;5KD6;7QKF", "site": "https://openreview.net/forum?id=hiJ2hzwghq", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "4;3;4", "reproducibility": "4;4;3", "correctness": "3;5;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";sravani-nanduri-issaquah-2021/;;;", "aff_unique_index": "0;1;1;0;0", "aff_unique_norm": "Carnegie Mellon University;University of Washington", "aff_unique_dep": "School of Computer Science;", "aff_unique_url": "https://www.cmu.edu;https://www.washington.edu", "aff_unique_abbr": "CMU;UW", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Pittsburgh;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "hjEnagXGYV", "title": "Time-Considerable Dialogue Models via Reranking by Time Dependency", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In the last few years, generative dialogue models have shown excellent performance and have been used for various applications. As chatbots become more prevalent in our daily lives, more and more people expect them to behave more like humans, but existing dialogue models do not consider the time information that people are constantly aware of. In this paper, we aim to construct a time-considerable dialogue model that actively utilizes time information. First, we categorize responses by their naturalness at different times and introduce a new metric to classify responses into our categories. Then, we propose a new reranking method to make the existing dialogue model time-considerable using the proposed metric and subjectively evaluate the performances of the obtained time-considerable dialogue models by humans.", "keywords": "dialogue model;dialogue system;response generation;time information", "primary_area": "", "supplementary_material": "", "author": "Yuiko Tsunomori;Masakazu Ishihata;Hiroaki Sugiyama", "authorids": "~Yuiko_Tsunomori1;~Masakazu_Ishihata1;~Hiroaki_Sugiyama1", "gender": "F;M;M", "homepage": ";https://sites.google.com/site/masakazuishihata;", "dblp": "195/4988;16/7441;87/9254", "google_scholar": ";https://scholar.google.co.jp/citations?user=XpszkVIAAAAJ;https://scholar.google.co.jp/citations?user=hDyEfPYAAAAJ", "or_profile": "~Yuiko_Tsunomori1;~Masakazu_Ishihata1;~Hiroaki_Sugiyama1", "aff": "Nagoya University;NTT Communication Science Laboratories;NTT, Japan", "aff_domain": "nagoya-u.ac.jp;ntt.com;ntt.co.jp", "position": "PhD student;Researcher;Researcher", "bibtex": "@inproceedings{\ntsunomori2023timeconsiderable,\ntitle={Time-Considerable Dialogue Models via Reranking by Time Dependency},\nauthor={Yuiko Tsunomori and Masakazu Ishihata and Hiroaki Sugiyama},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hjEnagXGYV}\n}", "github": "", "project": "", "reviewers": "GNUY;eL5Z;Ytjk", "site": "https://openreview.net/forum?id=hjEnagXGYV", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;3;3", "excitement": "3;3;4", "reproducibility": "2;3;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;1;2", "aff_unique_norm": "Nagoya University;NTT Communication Science Laboratories;NTT", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nagoya-u.ac.jp;https://www.ntt-csl.com;https://www.ntt.co.jp", "aff_unique_abbr": "Nagoya U;NTT CSL;NTT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "id": "hl6TVdQjeh", "title": "Robustness Tests for Automatic Machine Translation Metrics with Adversarial Attacks", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "We investigate MT evaluation metric performance on adversarially-synthesized texts, to shed light on metric robustness. We experiment with word- and character-level attacks on three popular machine translation metrics: BERTScore, BLEURT, and COMET. Our human experiments validate that automatic metrics tend to overpenalize adversarially-degraded translations. We also identify inconsistencies in BERTScore ratings, where it judges the original sentence and the adversarially-degraded one as similar, while judging the degraded translation as notably worse than the original with respect to the reference. We identify patterns of brittleness that motivate more robust metric development.", "keywords": "Automatic metric;textual adversarial attack;machine translation", "primary_area": "", "supplementary_material": "", "author": "Yichen Huang;Timothy Baldwin", "authorids": "~Yichen_Huang1;~Timothy_Baldwin1", "gender": "M;", "homepage": "https://www.yichenwilliamhuang.com/;https://eltimster.github.io/www/", "dblp": ";65/4863", "google_scholar": ";wjBD1dkAAAAJ", "or_profile": "~Yichen_Huang1;~Timothy_Baldwin1", "aff": "Mohamed bin Zayed University of Artificial Intelligence;The University of Melbourne", "aff_domain": "mbzuai.ac.ae;unimelb.edu.au", "position": "MS student;Full Professor", "bibtex": "@inproceedings{\nhuang2023robustness,\ntitle={Robustness Tests for Automatic Machine Translation Metrics with Adversarial Attacks},\nauthor={Yichen Huang and Timothy Baldwin},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hl6TVdQjeh}\n}", "github": "", "project": "", "reviewers": "JiPk;1kEg;cxZu", "site": "https://openreview.net/forum?id=hl6TVdQjeh", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;3;3", "reproducibility": "2;4;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-4525-6950", "linkedin": ";", "aff_unique_index": "0;1", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence;University of Melbourne", "aff_unique_dep": ";", "aff_unique_url": "https://mbzuai.ac.ae;https://www.unimelb.edu.au", "aff_unique_abbr": "MBZUAI;UniMelb", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "United Arab Emirates;Australia" }, { "id": "hlqIu07ics", "title": "Towards a Mechanistic Interpretation of Multi-Step Reasoning Capabilities of Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recent work has shown that language models (LMs) have strong multi-step (i.e., procedural) reasoning capabilities. However, it is unclear whether LMs perform these tasks by cheating with answers memorized from pretraining corpus, or, via a multi-step reasoning mechanism. In this paper, we try to answer this question by exploring a mechanistic interpretation of LMs for multi-step reasoning tasks. Concretely, we hypothesize that the LM implicitly embeds a reasoning tree resembling the correct reasoning process within it. We test this hypothesis by introducing a new probing approach (called MechanisticProbe) that recovers the reasoning tree from the model\u2019s attention patterns. We use our probe to analyze two LMs: GPT-2 on a synthetic task (k-th smallest element), and LLaMA on two simple language-based reasoning tasks (ProofWriter \\& AI2 Reasoning Challenge). We show that MechanisticProbe is able to detect the information of the reasoning tree from the model\u2019s attentions for most examples, suggesting that the LM indeed is going through a process of multi-step reasoning within its architecture in many cases.", "keywords": "model interpretation;reasoning;attention mechanism;large language model", "primary_area": "", "supplementary_material": "", "author": "Yifan Hou;Jiaoda Li;Yu Fei;Alessandro Stolfo;Wangchunshu Zhou;Guangtao Zeng;Antoine Bosselut;Mrinmaya Sachan", "authorids": "~Yifan_Hou1;~Jiaoda_Li1;~Yu_Fei2;~Alessandro_Stolfo1;~Wangchunshu_Zhou1;~Guangtao_Zeng1;~Antoine_Bosselut1;~Mrinmaya_Sachan3", "gender": "M;M;M;M;M;M;M;M", "homepage": "https://yifan-h.github.io/;https://ai.ethz.ch/people/jiaoda-li.html;;https://alestolfo.github.io;https://michaelzhouwang.github.io;https://scholar.google.com/citations?user=ENYVQLQAAAAJ&hl=en;https://atcbosselut.github.io/;https://sites.google.com/site/mrinsachan/", "dblp": ";299/1900;;329/3838;245/8640.html;264/9714;184/3742;86/10440.html", "google_scholar": "Bm23WyIAAAAJ;;3UvXMKcAAAAJ;Fx50TZQAAAAJ;UebIjuQAAAAJ;ENYVQLQAAAAJ;XD9hkJwAAAAJ;Tpp9ZjoAAAAJ", "or_profile": "~Yifan_Hou1;~Jiaoda_Li1;~Yu_Fei2;~Alessandro_Stolfo1;~Wangchunshu_Zhou1;~Guangtao_Zeng1;~Antoine_Bosselut1;~MRINMAYA_SACHAN2", "aff": "Department of Computer Science, Swiss Federal Institute of Technology;ETHZ - ETH Zurich;University of California, Irvine;ETHZ - ETH Zurich;Department of Computer Science, ETHZ - ETH Zurich;Singapore University of Technology and Design;Swiss Federal Institute of Technology Lausanne;Swiss Federal Institute of Technology", "aff_domain": "inf.ethz.ch;ethz.ch;uci.edu;ethz.ch;inf.ethz.ch;sutd.edu.sg;epfl.ch;ethz.ch", "position": "PhD student;PhD student;PhD student;PhD student;PhD student;PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nhou2023towards,\ntitle={Towards a Mechanistic Interpretation of Multi-Step Reasoning Capabilities of Language Models},\nauthor={Yifan Hou and Jiaoda Li and Yu Fei and Alessandro Stolfo and Wangchunshu Zhou and Guangtao Zeng and Antoine Bosselut and Mrinmaya Sachan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hlqIu07ics}\n}", "github": "", "project": "", "reviewers": "F7aK;3L4M;jwyS", "site": "https://openreview.net/forum?id=hlqIu07ics", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "4;3;4", "reproducibility": "3;3;2", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-3197-4460;0000-0002-7691-4269;;;;;;", "linkedin": "yifanhou;;;alessandrostolfo/;;;;", "aff_unique_index": "0;1;2;1;1;3;4;0", "aff_unique_norm": "Swiss Federal Institute of Technology;ETH Zurich;University of California, Irvine;Singapore University of Technology and Design;Swiss Federal Institute of Technology Lausanne", "aff_unique_dep": "Department of Computer Science;;;;", "aff_unique_url": "https://www.ethz.ch;https://www.ethz.ch;https://www.uci.edu;https://www.sutd.edu.sg;https://www.epfl.ch", "aff_unique_abbr": "ETH Zurich;ETHZ;UCI;SUTD;EPFL", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Irvine;Zurich;Lausanne", "aff_country_unique_index": "0;0;1;0;0;2;0;0", "aff_country_unique": "Switzerland;United States;Singapore" }, { "id": "hmOwOZWzYE", "title": "GQA: Training Generalized Multi-Query Transformer Models from Multi-Head Checkpoints", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Multi-query attention (MQA), which only uses a single key-value head, drastically speeds up decoder inference. However, MQA can lead to quality degradation, and moreover it may not be desirable to train a separate model just for faster inference. We (1) propose a recipe for uptraining existing multi-head language model checkpoints into models with MQA using 5\\% of original pre-training compute, and (2) introduce grouped-query attention (GQA), a generalization of multi-query attention which uses an intermediate (more than one, less than number of query heads) number of key-value heads. We show that uptrained GQA achieves quality close to multi-head attention with comparable speed to MQA.", "keywords": "efficient nlp;multi-query attention;fast inference", "primary_area": "", "supplementary_material": "", "author": "Joshua Ainslie;James Lee-Thorp;Michiel de Jong;Yury Zemlyanskiy;Federico Lebron;Sumit Sanghai", "authorids": "~Joshua_Ainslie1;~James_Lee-Thorp1;~Michiel_de_Jong1;~Yury_Zemlyanskiy1;~Federico_Lebron1;~Sumit_Sanghai1", "gender": ";M;M;M;M;M", "homepage": ";;;https://urikz.github.io/;https://fedelebron.com;", "dblp": "263/3363;;223/0153;225/5302;;", "google_scholar": ";qsPv098AAAAJ;R7wXId8AAAAJ;fkkxyJUAAAAJ;;", "or_profile": "~Joshua_Ainslie1;~James_Lee-Thorp1;~Michiel_de_Jong1;~Yury_Zemlyanskiy1;~Federico_Lebron1;~Sumit_Sanghai1", "aff": "Google;Google;University of Southern California;;;Research, Google", "aff_domain": "google.com;google.com;usc.edu;;;research.google.com", "position": "Software Engineer;Researcher;PhD student;;;Researcher", "bibtex": "@inproceedings{\nainslie2023gqa,\ntitle={{GQA}: Training Generalized Multi-Query Transformer Models from Multi-Head Checkpoints},\nauthor={Joshua Ainslie and James Lee-Thorp and Michiel de Jong and Yury Zemlyanskiy and Federico Lebron and Sumit Sanghai},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hmOwOZWzYE}\n}", "github": "", "project": "", "reviewers": "B4s9;KbGZ;5bUv", "site": "https://openreview.net/forum?id=hmOwOZWzYE", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;2;2", "excitement": "3;4;3", "reproducibility": "4;3;4", "correctness": "4;4;2", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-6445-7155;;;;", "linkedin": ";;;yury-zemlyanskiy/;;sumit-sanghai-90961a5/", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Google;University of Southern California", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.usc.edu", "aff_unique_abbr": "Google;USC", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Mountain View;Los Angeles", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "hn0B3jTlwE", "title": "Goodtriever: Adaptive Toxicity Mitigation with Retrieval-augmented Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Considerable effort has been dedicated to mitigating toxicity, but existing methods often require drastic modifications to model parameters or the use of computationally intensive auxiliary models. Furthermore, previous approaches have often neglected the crucial factor of language's evolving nature over time. In this work, we present a comprehensive perspective on toxicity mitigation that takes into account its changing nature. We introduce Goodtriever, a flexible methodology that matches the current state-of-the-art toxicity mitigation while achieving 43% relative latency reduction during inference and being more computationally efficient. By incorporating a retrieval-based approach at decoding time, Goodtriever enables toxicity-controlled text generation. Our research advocates for an increased focus on adaptable mitigation techniques, which better reflect the data drift models face when deployed in the wild.", "keywords": "toxicity mitigation;retrieval-augmented;continual learning", "primary_area": "", "supplementary_material": "", "author": "Luiza Amador Pozzobon;Beyza Ermis;Patrick Lewis;Sara Hooker", "authorids": "~Luiza_Amador_Pozzobon1;~Beyza_Ermis1;~Patrick_Lewis2;~Sara_Hooker2", "gender": "F;F;M;", "homepage": ";https://www.cmpe.boun.edu.tr/people/beyza.ermi%C5%9F;https://patricklewis.io;https://www.sarahooker.me/", "dblp": ";117/9290;227/3197;210/2611", "google_scholar": "vaCOqncAAAAJ;v2cMiCAAAAAJ;JN7Zg-kAAAAJ;2xy6h3sAAAAJ", "or_profile": "~Luiza_Amador_Pozzobon1;~Beyza_Ermis1;~Patrick_Lewis2;~Sara_Hooker1", "aff": "Cohere For AI;Cohere AI;Cohere;Cohere For AI", "aff_domain": "cohere.com;cohere.com;cohere.ai;cohere.com", "position": "Researcher;Researcher;Research Scientist;Principal Researcher", "bibtex": "@inproceedings{\npozzobon2023goodtriever,\ntitle={Goodtriever: Adaptive Toxicity Mitigation with Retrieval-augmented Models},\nauthor={Luiza Amador Pozzobon and Beyza Ermis and Patrick Lewis and Sara Hooker},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hn0B3jTlwE}\n}", "github": "", "project": "", "reviewers": "47Kk;9NcL;NexP", "site": "https://openreview.net/forum?id=hn0B3jTlwE", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;3", "excitement": "3;4;3", "reproducibility": "4;4;4", "correctness": "4;4;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-2192-9543;", "linkedin": "luizapozzobon/;;patrick-s-h-lewis/;", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Cohere;Cohere AI", "aff_unique_dep": "Cohere AI;", "aff_unique_url": "https://cohere.ai;https://cohere.ai", "aff_unique_abbr": "Cohere;Cohere AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Canada" }, { "id": "hoO5anfnRk", "title": "EDIS: Entity-Driven Image Search over Multimodal Web Content", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Making image retrieval methods practical for real-world search applications requires significant progress in dataset scales, entity comprehension, and multimodal information fusion. In this work, we introduce Entity-Driven Image Search (EDIS), a challenging dataset for cross-modal image search in the news domain. EDIS consists of 1 million web images from actual search engine results and curated datasets, with each image paired with a textual description. Unlike datasets that assume a small set of single-modality candidates, EDIS reflects real-world web image search scenarios by including a million multimodal image-text pairs as candidates. EDIS encourages the development of retrieval models that simultaneously address cross-modal information fusion and matching. To achieve accurate ranking results, a model must: 1) understand named entities and events from text queries, 2) ground entities onto images or text descriptions, and 3) effectively fuse textual and visual representations. Our experimental results show that EDIS challenges state-of-the-art methods with dense entities and the large-scale candidate set. The ablation study also proves that fusing textual features with visual features is critical in improving retrieval results.", "keywords": "Image search; Cross-modal retrieval; Multimodality fusion", "primary_area": "", "supplementary_material": "", "author": "Siqi Liu;Weixi Feng;Tsu-Jui Fu;Wenhu Chen;William Yang Wang", "authorids": "~Siqi_Liu7;~Weixi_Feng2;~Tsu-Jui_Fu2;~Wenhu_Chen3;~William_Yang_Wang2", "gender": "F;M;M;M;M", "homepage": ";https://weixi-feng.github.io/;https://tsujuifu.github.io;https://wenhuchen.github.io/;https://www.cs.ucsb.edu/~william/", "dblp": ";322/1026;218/5366.html;136/0957.html;08/9282", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=7QRDcC0AAAAJ;https://scholar.google.co.jp/citations?user=U8ShbhUAAAAJ;gf8Ms_8AAAAJ", "or_profile": "~Siqi_Liu7;~Weixi_Feng2;~Tsu-Jui_Fu2;~wenhu_chen1;~William_Wang1", "aff": "Cornell University;University of California, Santa Barbara;UC Santa Barbara;University of Waterloo;UC Santa Barbara", "aff_domain": "cornell.edu;ucsb.edu;ucsb.edu;uwaterloo.ca;ucsb.edu", "position": "Undergrad student;PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nliu2023edis,\ntitle={{EDIS}: Entity-Driven Image Search over Multimodal Web Content},\nauthor={Siqi Liu and Weixi Feng and Tsu-Jui Fu and Wenhu Chen and William Yang Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hoO5anfnRk}\n}", "github": "", "project": "", "reviewers": "QYbK;q3bm;eaas", "site": "https://openreview.net/forum?id=hoO5anfnRk", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;3", "excitement": "3;4;3", "reproducibility": "5;4;3", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-7201-5688;;;", "linkedin": "emeraldliu/;weixifeng/;tsujuifu1996;;", "aff_unique_index": "0;1;1;2;1", "aff_unique_norm": "Cornell University;University of California, Santa Barbara;University of Waterloo", "aff_unique_dep": ";;", "aff_unique_url": "https://www.cornell.edu;https://www.ucsb.edu;https://uwaterloo.ca", "aff_unique_abbr": "Cornell;UCSB;UW", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Santa Barbara", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "United States;Canada" }, { "id": "hpUNou0UaJ", "title": "impact of sample selection on in-context learning for entity extraction from scientific writing", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Prompt-based usage of Large Language Models (LLMs) is an increasingly popular way to tackle many well-known natural language problems. This trend is due, in part, to the appeal of the In-Context Learning (ICL) prompt set-up, in which a few selected training examples are provided along with the inference request. ICL, a type of few-shot learning, is especially attractive for natural language processing (NLP) tasks defined for specialised domains, such as entity extraction from scientific documents, where the annotation is very costly due to expertise requirements for the annotators. In this paper, we present a comprehensive analysis of in-context sample selection methods for entity extraction from scientific documents using GPT-3.5 and compare these results against a fully supervised transformer-based baseline. Our results indicate that the effectiveness of the in-context sample selection methods is heavily domain-dependent, but the improvements are more notable for problems with a larger number of entity types. More in-depth analysis shows that ICL is more effective for low-resource set-ups of scientific information extraction", "keywords": "GPT-3.5;in-context learning;sample selection;entity;scientific", "primary_area": "", "supplementary_material": "", "author": "Necva B\u00f6l\u00fcc\u00fc;Maciej Rybinski;Stephen Wan", "authorids": "~Necva_B\u00f6l\u00fcc\u00fc1;~Maciej_Rybinski1;~Stephen_Wan1", "gender": "F;;", "homepage": ";;https://people.csiro.au/W/S/Stephen-Wan", "dblp": "200/8444;157/9497.html;w/StephenWan", "google_scholar": "Fuu0gaAAAAAJ;https://scholar.google.pl/citations?user=r_ztTJYAAAAJ;https://scholar.google.com.au/citations?user=YMRsSGcAAAAJ", "or_profile": "~Necva_B\u00f6l\u00fcc\u00fc1;~Maciej_Rybinski1;~Stephen_Wan1", "aff": "CSIRO;Commonwealth Scientific and Industrial Research Organisation, CSIRO;CSIRO", "aff_domain": "csiro.au;data61.csiro.au;csiro.au", "position": "Postdoc;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nb{\\\"o}l{\\\"u}c{\\\"u}2023impact,\ntitle={impact of sample selection on in-context learning for entity extraction from scientific writing},\nauthor={Necva B{\\\"o}l{\\\"u}c{\\\"u} and Maciej Rybinski and Stephen Wan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hpUNou0UaJ}\n}", "github": "", "project": "", "reviewers": "89Vn;v6zx;JH68", "site": "https://openreview.net/forum?id=hpUNou0UaJ", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;3", "excitement": "4;2;3", "reproducibility": "3;3;2", "correctness": "3;3;3", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-8121-3048;;0000-0001-7505-1417", "linkedin": ";;stephen-wan-a489631/", "aff_unique_index": "0;0;0", "aff_unique_norm": "Commonwealth Scientific and Industrial Research Organisation", "aff_unique_dep": "", "aff_unique_url": "https://www.csiro.au", "aff_unique_abbr": "CSIRO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "id": "hsjQHAM8MV", "title": "Can We Edit Factual Knowledge by In-Context Learning?", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Previous studies have shown that large language models (LLMs) like GPTs store massive factual knowledge in their parameters. However, the stored knowledge could be false or outdated. Traditional knowledge editing methods refine LLMs via fine-tuning on texts containing specific knowledge. However, with the increasing scales of LLMs, these gradient-based approaches bring large computation costs. The trend of model-as-a-service also makes it impossible to modify knowledge in black-box LMs. Inspired by in-context learning (ICL), a new paradigm based on demonstration contexts without parameter updating, we explore whether ICL can edit factual knowledge. To answer this question, we give a comprehensive empirical study of ICL strategies. Experiments show that in-context knowledge editing (IKE), without any gradient and parameter updating, achieves a competitive success rate compared to gradient-based methods on GPT-J (6B) but with much fewer side effects, including less over-editing on similar but unrelated facts and less knowledge forgetting on previously stored knowledge. We also apply the method to larger LMs with tens or hundreds of parameters like OPT-175B, which shows the scalability of our method. The code is available at \\url{https://github.com/pkunlp-icler/IKE}.", "keywords": "In-Context Learning;Knowledge Editing;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "Ce Zheng;Lei Li;Qingxiu Dong;Yuxuan Fan;Zhiyong Wu;Jingjing Xu;Baobao Chang", "authorids": "~Ce_Zheng2;~Lei_Li14;~Qingxiu_Dong1;~Yuxuan_Fan1;~Zhiyong_Wu3;~Jingjing_Xu1;~Baobao_Chang1", "gender": "M;;F;;;F;M", "homepage": ";;https://dqxiu.github.io/;;;;http://eecs.pku.edu.cn/EN/People/Faculty/Detail/?ID=6027", "dblp": "99/6967;;284/0673;227/4963;;25/624;91/6051", "google_scholar": "r7qFs7UAAAAJ;;ibcR7VkAAAAJ;kc7uuLoAAAAJ;;;LaKNyhQAAAAJ", "or_profile": "~Ce_Zheng2;~Lei_Li14;~Qingxiu_Dong1;~Yuxuan_Fan1;~Zhiyong_Wu3;~Jingjing_Xu1;~Baobao_Chang1", "aff": "Peking University;;Peking University;Peking University;;;Peking University", "aff_domain": "pku.edu.cn;;pku.edu.cn;pku.edu.cn;;;pku.edu.cn", "position": "MS student;;PhD student;PhD student;;;Associate Professor", "bibtex": "@inproceedings{\nzheng2023can,\ntitle={Can We Edit Factual Knowledge by In-Context Learning?},\nauthor={Ce Zheng and Lei Li and Qingxiu Dong and Yuxuan Fan and Zhiyong Wu and Jingjing Xu and Baobao Chang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hsjQHAM8MV}\n}", "github": "", "project": "", "reviewers": "3Kzz;i8Yq;fQT9", "site": "https://openreview.net/forum?id=hsjQHAM8MV", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "4;4;3", "reproducibility": "5;3;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;0000-0003-2824-6750", "linkedin": ";;qingxiu-dong-a3758a199/;;;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "hsptWISmi6", "title": "Post-hoc Utterance Refining Method by Entity Mining for Faithful Knowledge Grounded Conversations", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Despite the striking advances in recent language generation performance, model-generated responses have suffered from the chronic problem of hallucinations that are either untrue or unfaithful to a given source. Especially in the task of knowledge grounded conversation, the models are required to generate informative responses, but hallucinated utterances lead to miscommunication. In particular, entity-level hallucination that causes critical misinformation and undesirable conversation is one of the major concerns. To address this issue, we propose a post-hoc refinement method called REM. It aims to enhance the quality and faithfulness of hallucinated utterances by refining them based on the source knowledge. If the generated utterance has a low source-faithfulness score with the given knowledge, REM mines the key entities in the knowledge and implicitly uses them for refining the utterances. We verify that our method reduces entity hallucination in the utterance. Also, we show the adaptability and efficacy of REM with extensive experiments and generative results. Our code is available at https://github.com/YOONNAJANG/REM.", "keywords": "Utterance Refining;Knowledge Grounded Conversation;Entity Mining;Entity-level Hallucination", "primary_area": "", "supplementary_material": "", "author": "Yoonna Jang;Suhyune Son;Jeongwoo Lee;Junyoung Son;Yuna Hur;Jungwoo Lim;Hyeonseok Moon;Kisu Yang;Heuiseok Lim", "authorids": "~Yoonna_Jang1;~Suhyune_Son1;~Jeongwoo_Lee2;~Junyoung_Son1;~Yuna_Hur1;~Jungwoo_Lim1;~Hyeonseok_Moon1;~Kisu_Yang1;~Heuiseok_Lim1", "gender": "F;F;;M;F;F;M;M;M", "homepage": "https://yoonnajang.github.io/;;;https://rgop13.github.io/;https://scholar.google.com/citations?user=A0zJLEMAAAAJ&hl=en;https://dlawjddn803.github.io/;;https://github.com/kisuyang;http://nlp.korea.ac.kr", "dblp": "277/9316;309/6326;;243/9058;291/4254;277/9191;295/3184.html;243/7106;127/4881", "google_scholar": "https://scholar.google.com/citations?hl=ko;ay-JrF8AAAAJ;dGnIZfIAAAAJ;d6b5a34AAAAJ;A0zJLEMAAAAJ;ubIxtk8AAAAJ;queGQ5UAAAAJ;MVDopVwAAAAJ;HMTkz7oAAAAJ", "or_profile": "~Yoonna_Jang1;~Suhyune_Son1;~Jeongwoo_Lee2;~Junyoung_Son1;~Yuna_Hur1;~Jungwoo_Lim1;~Hyeonseok_Moon1;~Kisu_Yang1;~Heuiseok_Lim1", "aff": "Korea University;Korea University;Korea University;Korea University;Korea University;Korea University;Korea University;VAIV Company;Korea University", "aff_domain": "korea.ac.kr;korea.ac.kr;korea.ac.kr;korea.ac.kr;korea.ac.kr;korea.ac.kr;korea.ac.kr;vaiv.kr;korea.ac.kr", "position": "PhD student;PhD student;MS student;MS student;Postdoc;PhD student;PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\njang2023posthoc,\ntitle={Post-hoc Utterance Refining Method by Entity Mining for Faithful Knowledge Grounded Conversations},\nauthor={Yoonna Jang and Suhyune Son and Jeongwoo Lee and Junyoung Son and Yuna Hur and Jungwoo Lim and Hyeonseok Moon and Kisu Yang and Heuiseok Lim},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hsptWISmi6}\n}", "github": "", "project": "", "reviewers": "9ska;uiSe;fxJP", "site": "https://openreview.net/forum?id=hsptWISmi6", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "3;3;4", "reproducibility": "4;4;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-5450-6686;0000-0002-4142-6927;;0000-0001-8988-2270;0000-0002-0841-4262;0000-0003-4983-0307;", "linkedin": ";;jeongwoo-lee-b55723222/;junyoung-son-2836a2183/;;jungwoo-lim-3a5124202/;;kisu-yang/;", "aff_unique_index": "0;0;0;0;0;0;0;1;0", "aff_unique_norm": "Korea University;VAIV Company", "aff_unique_dep": ";", "aff_unique_url": "https://www.korea.ac.kr;", "aff_unique_abbr": "KU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "South Korea;" }, { "id": "htulPWUheU", "title": "Cross-Document Event Coreference Resolution on Discourse Structure", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Cross-document event coreference resolution (CD-ECR) is a task of clustering event mentions across multiple documents that refer to the same real-world events. Previous studies usually model the CD-ECR task as a pairwise similarity comparison problem by using different event mention features, and consider the highly similar event mention pairs in the same cluster as coreferent. In general, most of them only consider the local context of event mentions and ignore their implicit global information, thus failing to capture the interactions of long-distance event mentions. To address the above issue, we regard discourse structure as global information to further improve CD-ECR. First, we use a discourse rhetorical structure constructor to construct tree structures to represent documents. Then, we obtain shortest dependency paths from the tree structures to represent interactions between event mention pairs. Finally, we feed the above information to a multi-layer perceptron to capture the similarities of event mention pairs for resolving coreferent events. Experimental results on the ECB+ dataset show that our proposed model outperforms several baselines and achieves the competitive performance with the start-of-the-art baselines.", "keywords": "Event Coreference Resolution;Discourse Structure;Shortest Dependency Path", "primary_area": "", "supplementary_material": "", "author": "Xinyu Chen;Sheng Xu;PEIFENG LI;Qiaoming Zhu", "authorids": "~Xinyu_Chen10;~Sheng_Xu9;~PEIFENG_LI2;~Qiaoming_Zhu1", "gender": "M;;M;M", "homepage": "https://blog.csdn.net/weixin_45327971?type=blog;https://xiaosheng.blog/;http://web.suda.edu.cn/pfli/;https://scst.suda.edu.cn/0f/a2/c11250a528290/page.htm", "dblp": ";10/1887-6.html;00/1996.html;28/1279", "google_scholar": ";kEcZZPAAAAAJ;NY3GrVIAAAAJ;6BXGJK8AAAAJ", "or_profile": "~Xinyu_Chen10;~Sheng_Xu9;~PEIFENG_LI2;~Qiaoming_Zhu1", "aff": "Soochow University;Soochow University;Soochow University, China;Soochow University", "aff_domain": "suda.edu.cn;suda.edu.cn;suda.edu.cn;suda.edu.cn", "position": "PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nchen2023crossdocument,\ntitle={Cross-Document Event Coreference Resolution on Discourse Structure},\nauthor={Xinyu Chen and Sheng Xu and PEIFENG LI and Qiaoming Zhu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=htulPWUheU}\n}", "github": "", "project": "", "reviewers": "48CY;T5Xb;fuqf", "site": "https://openreview.net/forum?id=htulPWUheU", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "3;4;4", "reproducibility": "3;3;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-4850-3128;0000-0002-2708-8976", "linkedin": ";;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Soochow University", "aff_unique_dep": "", "aff_unique_url": "https://www.soochow.edu.cn", "aff_unique_abbr": "Soochow U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "hv3VpXDIh8", "title": "CodeTransOcean: A Comprehensive Multilingual Benchmark for Code Translation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recent code translation techniques exploit neural machine translation models to translate source code from one programming language to another to satisfy production compatibility or to improve efficiency of codebase maintenance. Most existing code translation datasets only focus on a single pair of popular programming languages. To advance research on code translation and meet diverse requirements of real-world applications, we construct **CodeTransOcean**, a large-scale comprehensive benchmark that supports the largest variety of programming languages for code translation. CodeTransOcean consists of three novel multilingual datasets, namely, **MultilingualTrans** supporting translations between multiple popular programming languages, **NicheTrans** for translating between niche programming languages and popular ones, and **LLMTrans** for evaluating executability of translated code by large language models (LLMs). CodeTransOcean also includes a novel cross-framework dataset, **DLTrans**, for translating deep learning code across different frameworks. We develop multilingual modeling approaches for code translation and demonstrate their great potential in improving the translation quality of both low-resource and high-resource language pairs and boosting the training efficiency. We also propose a novel evaluation metric **Debugging Success Rate@K** for program-level code translation. Last but not least, we evaluate LLM ChatGPT on our datasets and investigate its potential for fuzzy execution predictions. We build baselines for CodeTransOcean and analyze challenges of code translation for guiding future research. The CodeTransOcean datasets and code are publicly available at https://github.com/WeixiangYAN/CodeTransOcean.", "keywords": "Code translation;Multilingual datasets;Multilingual modeling;Large language models", "primary_area": "", "supplementary_material": "", "author": "Weixiang Yan;Yuchen Tian;Yunzhe Li;Qian Chen;Wen Wang", "authorids": "~Weixiang_Yan1;~Yuchen_Tian3;~Yunzhe_Li2;~Qian_Chen1;~Wen_Wang6", "gender": "M;;M;M;", "homepage": "https://weixiangyan.github.io/;;;https://scholar.google.com/citations?user=8eosmSQAAAAJ&hl=en;https://scholar.google.com/citations?user=85Tj1OwAAAAJ&hl=en", "dblp": "313/5975;;;11/1394-3;29/4680-1", "google_scholar": "SsCHL58AAAAJ;;;8eosmSQAAAAJ;85Tj1OwAAAAJ", "or_profile": "~Weixiang_Yan1;~Yuchen_Tian3;~Yunzhe_Li2;~Qian_Chen1;~Wen_Wang6", "aff": "University of California, Santa Barbara;University of Hong Kong;University of Illinois, Urbana-Champaign;Alibaba Group;Alibaba Group", "aff_domain": "ucsb.edu;hku.hk;cs.illinois.edu;alibaba-inc.com;alibaba-inc.com", "position": "MS student;MS student;PhD student;Researcher;Senior Staff Algorithm Engineer", "bibtex": "@inproceedings{\nyan2023codetransocean,\ntitle={CodeTransOcean: A Comprehensive Multilingual Benchmark for Code Translation},\nauthor={Weixiang Yan and Yuchen Tian and Yunzhe Li and Qian Chen and Wen Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hv3VpXDIh8}\n}", "github": "", "project": "", "reviewers": "ebpA;XoXb;RZux", "site": "https://openreview.net/forum?id=hv3VpXDIh8", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;5", "excitement": "3;3;3", "reproducibility": "4;4;3", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-4146-8542;0000-0001-6939-7438;0000-0002-0356-1968", "linkedin": "weixiang-yan/;yuchen-tian-006209274/;;;wen-wang-414b548/", "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "University of California, Santa Barbara;University of Hong Kong;University of Illinois;Alibaba Group", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ucsb.edu;https://www.hku.hk;https://illinois.edu;https://www.alibaba.com", "aff_unique_abbr": "UCSB;HKU;UIUC;Alibaba", "aff_campus_unique_index": "0;1;2", "aff_campus_unique": "Santa Barbara;Hong Kong SAR;Urbana-Champaign;", "aff_country_unique_index": "0;1;0;1;1", "aff_country_unique": "United States;China" }, { "id": "hxExXDMwcc", "title": "PlugMed: Improving Specificity in Patient-Centered Medical Dialogue Generation using In-Context Learning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The patient-centered medical dialogue systems strive to offer diagnostic interpretation services to users who are less knowledgeable about medical knowledge, through emphasizing the importance of providing responses specific to the patients. It is difficult for the large language models (LLMs) to guarantee the specificity of responses in spite of its promising performance even in some tasks in medical field.\nInspired by in-context learning, we propose PlugMed, a Plug-and-Play Medical Dialogue System, for addressing this challenge. PlugMed is equipped with two modules, the prompt generation (PG) module and the response ranking (RR) module, to enhances LLMs' dialogue strategies for improving the specificity of the dialogue. The PG module is designed to stimulate the imitative ability of LLMs by providing them with real dialogues from similar patients as prompts. The RR module incorporates fine-tuned small model as response filter to enable the selection of appropriate responses generated by LLMs. Furthermore, we introduce a new evaluation method based on matching both user's intent and high-frequency medical term to effectively assess the specificity of the responses. We conduct experimental evaluations on three medical dialogue datasets, and the results, including both automatic and human evaluation, demonstrate the effectiveness of our approach.", "keywords": "Medical;Dialogue Generation;In-context Learning", "primary_area": "", "supplementary_material": "", "author": "Chengfeng Dou;Zhi Jin;Wenpin Jiao;Haiyan Zhao;Yongqiang Zhao;Zhengwei Tao", "authorids": "~Chengfeng_Dou1;~Zhi_Jin1;~Wenpin_Jiao1;~Haiyan_Zhao1;~Yongqiang_Zhao2;~Zhengwei_Tao1", "gender": "M;F;M;F;M;M", "homepage": ";http://faculty.pku.edu.cn/zhijin/en/index.htm;https://cs.pku.edu.cn/info/1234/2124.htm;http://sei.pku.edu.cn/~zhhy/;;", "dblp": "273/2149;22/3510;96/6443;23/2644-1.html;;245/4700", "google_scholar": "vkfjflUAAAAJ;https://scholar.google.com.tw/citations?user=ZC7SObAAAAAJ;;https://scholar.google.com.tw/citations?user=z0bu0kIAAAAJ;;", "or_profile": "~Chengfeng_Dou1;~Zhi_Jin1;~Wenpin_Jiao1;~Haiyan_Zhao1;~Yongqiang_Zhao2;~Zhengwei_Tao1", "aff": "Peking University;Peking University;Peking University;Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "PhD student;Full Professor;Full Professor;Associate Professor;PhD student;PhD student", "bibtex": "@inproceedings{\ndou2023plugmed,\ntitle={PlugMed: Improving Specificity in Patient-Centered Medical Dialogue Generation using In-Context Learning},\nauthor={Chengfeng Dou and Zhi Jin and Wenpin Jiao and Haiyan Zhao and Yongqiang Zhao and Zhengwei Tao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hxExXDMwcc}\n}", "github": "", "project": "", "reviewers": "X2S1;V8yn;nsEF", "site": "https://openreview.net/forum?id=hxExXDMwcc", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "4;3;3", "reproducibility": "5;4;3", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-8135-0421;0000-0003-1087-226X;0000-0001-9374-3900;0000-0002-3600-8923;0000-0002-4121-246X;", "linkedin": ";;;;;", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "hyBwGem8OS", "title": "InteMATs: Integrating Granularity-Specific Multilingual Adapters for Cross-Lingual Transfer", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Multilingual language models (MLLMs) have achieved remarkable success in various cross-lingual transfer tasks. However, they suffer poor performance in zero-shot low-resource languages, particularly when dealing with longer contexts. Existing research mainly relies on full-model fine-tuning on large parallel datasets to enhance the cross-lingual alignment of MLLMs, which is computationally expensive. In this paper, we propose InteMATs, a novel approach that integrates multilingual adapters trained on texts of different levels of granularity. To achieve this, we curate a multilingual parallel dataset comprising 42 languages to pre-train sentence-level and document-level adapters under the contrastive learning framework. Extensive experiments demonstrate the effectiveness of InteMATs in improving the cross-lingual transfer performance of MLLMs, especially on low-resource languages. Finally, our comprehensive analyses and ablation studies provide a deep understanding of the high-quality representations derived by InteMATs.", "keywords": "Multilingual Language Model Enhancement;Cross-lingual Transfer;Parameter-Efficient Method", "primary_area": "", "supplementary_material": "", "author": "Meizhen Liu;Xu Guo;He Jiakai;Jianye Chen;Fengyu Zhou;Siu Cheung Hui", "authorids": "~Meizhen_Liu1;~Xu_Guo2;~He_Jiakai1;~Jianye_Chen1;~Fengyu_Zhou2;~Siu_Cheung_Hui1", "gender": "F;F;;M;M;M", "homepage": ";https://guoxuxu.github.io/;;;http://www.cirobot.org/;http://research.ntu.edu.sg/expertise/academicprofile/Pages/StaffProfile.aspx?ST_EMAILID=ASSCHUI&CategoryDescription=mathematics/", "dblp": "174/7546-1.html;46/5508-2;;;;65/3225", "google_scholar": ";https://scholar.google.com/citations?hl=en;;;;https://scholar.google.com.tw/citations?user=d4ZYx6gAAAAJ", "or_profile": "~Meizhen_Liu1;~Xu_Guo2;~He_Jiakai1;~Jianye_Chen1;~zhou_Fengyu1;~Siu_Hui1", "aff": "Shandong University;Nanyang Technological University;Shandong University;Shandong University;;Nanyang Technological University", "aff_domain": "sdu.edu.cn;ntu.edu.sg;sdu.edu.cn;sdu.edu.cn;;ntu.edu.sg", "position": "PhD student;PhD student;PhD student;MS student;;Associate Professor", "bibtex": "@inproceedings{\nliu2023intemats,\ntitle={Inte{MAT}s: Integrating Granularity-Specific Multilingual Adapters for Cross-Lingual Transfer},\nauthor={Meizhen Liu and Xu Guo and He Jiakai and Jianye Chen and Fengyu Zhou and Siu Cheung Hui},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=hyBwGem8OS}\n}", "github": "", "project": "", "reviewers": "27bY;JJrR;R9P3", "site": "https://openreview.net/forum?id=hyBwGem8OS", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;4", "excitement": "3;3;2", "reproducibility": "3;3;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-8859-5915;0000-0003-1034-9508;0000-0003-0216-2516;0000-0002-1925-8189;;", "linkedin": ";;;;;", "aff_unique_index": "0;1;0;0;1", "aff_unique_norm": "Shandong University;Nanyang Technological University", "aff_unique_dep": ";", "aff_unique_url": "http://www.sdu.edu.cn;https://www.ntu.edu.sg", "aff_unique_abbr": "SDU;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "China;Singapore" }, { "id": "i0RfSS9CUU", "title": "Active Learning Principles for In-Context Learning with Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The remarkable advancements in large language models (LLMs) have significantly enhanced predictive performance in few-shot learning settings. By using only a small number of labeled examples, referred to as demonstrations, LLMs can effectively perform the task at hand through in-context learning. However, the process of selecting demonstrations for maximizing performance has received limited attention in prior work. This paper addresses the issue of identifying the most informative demonstrations for few-shot learning by approaching it as a pool-based Active Learning (AL) problem over a single iteration. We compare standard AL algorithms based on uncertainty, diversity, and similarity, and consistently observe that the latter outperforms all other methods, including random sampling. Our extensive experimentation involving a diverse range of GPT and OPT models across $24$ classification and multi-choice tasks, coupled with thorough analysis, unambiguously demonstrates the importance of using demonstrations that are semantically similar to the domain of the test examples. In fact, we show higher average classification performance using ``similar'' demonstrations with GPT-2 ($124$M) than random demonstrations with GPT-Neox ($20$B). Notably, while diversity sampling shows promise, uncertainty sampling, despite its success in conventional supervised learning AL scenarios, performs poorly in in-context learning.", "keywords": "active learning;in-context learning;few-shot learning;large language models", "primary_area": "", "supplementary_material": "", "author": "Katerina Margatina;Timo Schick;Nikolaos Aletras;Jane Dwivedi-Yu", "authorids": "~Katerina_Margatina1;~Timo_Schick1;~Nikolaos_Aletras1;~Jane_Dwivedi-Yu1", "gender": "F;;;F", "homepage": "https://katerinamargatina.github.io/;http://timoschick.com;;https://janedwivedi.github.io/", "dblp": "227/2313;203/9176;118/9116;215/3352", "google_scholar": "517t5gEAAAAJ;;https://scholar.google.co.uk/citations?user=uxRWFhoAAAAJ;ev8Ilx0AAAAJ", "or_profile": "~Katerina_Margatina1;~Timo_Schick1;~Nikolaos_Aletras1;~Jane_Yu1", "aff": "University of Sheffield;Meta Facebook;Amazon;Meta AI ", "aff_domain": "sheffield.ac.uk;fb.com;amazon.com;meta.com", "position": "PhD student;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nmargatina2023active,\ntitle={Active Learning Principles for In-Context Learning with Large Language Models},\nauthor={Katerina Margatina and Timo Schick and Nikolaos Aletras and Jane Dwivedi-Yu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=i0RfSS9CUU}\n}", "github": "", "project": "", "reviewers": "BaZZ;87v7;RZBu", "site": "https://openreview.net/forum?id=i0RfSS9CUU", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;2", "excitement": "4;2;3", "reproducibility": "4;4;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "katerina-margatina/;;;janeaisleyyu/", "aff_unique_index": "0;1;2;1", "aff_unique_norm": "University of Sheffield;Meta;Amazon", "aff_unique_dep": ";Meta Platforms, Inc.;Amazon.com, Inc.", "aff_unique_url": "https://www.sheffield.ac.uk;https://meta.com;https://www.amazon.com", "aff_unique_abbr": "Sheffield;Meta;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "i0vMIpaEn4", "title": "Adaptive Policy with Wait-k Model for Simultaneous Translation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Simultaneous machine translation (SiMT) requires a robust read/write policy in conjunction with a high-quality translation model. \nTraditional methods rely on either a fixed wait-k policy coupled with a standalone wait-k translation model, or an adaptive policy jointly trained with the translation model. In this study, we propose a more flexible approach by decoupling the adaptive policy model from the translation model. Our motivation stems from the observation that a standalone multi-path wait-k model performs competitively with adaptive policies utilized in state-of-the-art SiMT approaches. Specifically, we introduce DaP, a divergence-based adaptive policy, that makes read/write decisions for any translation model based on the potential divergence in translation distributions resulting from future information. DaP extends a frozen wait-k model with lightweight parameters, and is both memory and computation efficient. Experimental results across various benchmarks demonstrate that our approach offers an improved trade-off between translation accuracy and latency, outperforming strong baselines.", "keywords": "simultaneous machine translation;wait-k;adaptive policy;read/write supervision signals", "primary_area": "", "supplementary_material": "", "author": "Libo Zhao;Kai Fan;Wei Luo;Wu Jing;Shushu Wang;Ziqian Zeng;Zhongqiang Huang", "authorids": "~Libo_Zhao1;~Kai_Fan1;~Wei_Luo8;~Wu_Jing1;~Shushu_Wang1;~Ziqian_Zeng1;~Zhongqiang_Huang1", "gender": ";M;M;F;F;F;M", "homepage": ";https://scholar.google.com/citations?user=SQqkcdgAAAAJ&hl=zh;;;;https://ziqianzeng.github.io;", "dblp": ";20/3825-2.html;;;;155/0168;10/3565", "google_scholar": ";SQqkcdgAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=WYE-Bw4AAAAJ;fuOr3nAAAAAJ;", "or_profile": "~Libo_Zhao1;~Kai_Fan1;~Wei_Luo8;~Wu_Jing1;~Shushu_Wang1;~Ziqian_Zeng1;~Zhongqiang_Huang1", "aff": ";Alibaba Group;;Alibaba Group;Zhejiang University;South China University of Technology;Alibaba Group", "aff_domain": ";alibaba-inc.com;;alibaba.com;zju.edu.cn;scut.edu.cn;alibaba-inc.com", "position": ";Researcher;;Researcher;MS student;Associate Professor;Senior Staff Engineer", "bibtex": "@inproceedings{\nzhao2023adaptive,\ntitle={Adaptive Policy with Wait-k Model for Simultaneous Translation},\nauthor={Libo Zhao and Kai Fan and Wei Luo and Wu Jing and Shushu Wang and Ziqian Zeng and Zhongqiang Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=i0vMIpaEn4}\n}", "github": "", "project": "", "reviewers": "sUJ2;Pp2p;VTic", "site": "https://openreview.net/forum?id=i0vMIpaEn4", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;4;4", "excitement": "3;4;4", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-8256-0807;;;;;", "linkedin": ";;;;;;", "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Alibaba Group;Zhejiang University;South China University of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.alibaba.com;https://www.zju.edu.cn;https://www.scut.edu.cn", "aff_unique_abbr": "Alibaba;ZJU;SCUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "i17SCD0YDI", "title": "KEBAP: Korean Error Explainable Benchmark Dataset for ASR and Post-processing", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Automatic Speech Recognition (ASR) systems are instrumental across various applications, with their performance being critically tied to user satisfaction. Conventional evaluation metrics for ASR systems produce a singular aggregate score, which is insufficient for understanding specific system vulnerabilities. Therefore, we aim to address the limitations of the previous ASR evaluation methods by introducing the Korean Error Explainable Benchmark Dataset for ASR and Post-processing (KEBAP). KEBAP enables comprehensive analysis of ASR systems at both speech- and text levels, thereby facilitating a more balanced assessment encompassing speech recognition accuracy and user readability. KEBAP provides 37 newly defined speech-level resources incorporating diverse noise environments and speaker characteristics categories, also presenting 13 distinct text-level error types. This paper demonstrates detailed statistical analyses of colloquial noise categories and textual error types. Furthermore, we conduct extensive validation and analysis on commercially deployed ASR systems, providing valuable insights into their performance. As a more fine-grained and real-world-centric evaluation method, KEBAP contributes to identifying and mitigating potential weaknesses in ASR systems.", "keywords": "Automatic Speech Recognition (ASR);Error Explainable Benchmark;Post-procssing;Recognition Accuracy;User Readability", "primary_area": "", "supplementary_material": "", "author": "Seonmin Koo;Chanjun Park;Jinsung Kim;Jaehyung Seo;Sugyeong Eo;Hyeonseok Moon;Heuiseok Lim", "authorids": "~Seonmin_Koo1;~Chanjun_Park1;~Jinsung_Kim2;~Jaehyung_Seo1;~Sugyeong_Eo1;~Hyeonseok_Moon1;~Heuiseok_Lim1", "gender": "F;M;M;M;F;M;M", "homepage": "https://github.com/seonminkoo/;http://parkchanjun.github.io/;https://jin62304.github.io;https://j-seo.github.io/;;;http://nlp.korea.ac.kr", "dblp": "324/3476;268/1379;;298/7721;295/3502;295/3184.html;127/4881", "google_scholar": "vQNCQb8AAAAJ;085jNAMAAAAJ;au6e9uUAAAAJ;V8bFAUIAAAAJ;https://scholar.google.co.kr/citations?user=s4GjpoEAAAAJ;queGQ5UAAAAJ;HMTkz7oAAAAJ", "or_profile": "~Seonmin_Koo1;~Chanjun_Park1;~Jinsung_Kim2;~Jaehyung_Seo1;~Sugyeong_Eo1;~Hyeonseok_Moon1;~Heuiseok_Lim1", "aff": "Korea University;Upstage;Korea University;Korea University;Korea University;Korea University;Korea University", "aff_domain": "korea.ac.kr;upstage.ai;korea.ac.kr;korea.ac.kr;korea.ac.kr;korea.ac.kr;korea.ac.kr", "position": "PhD student;Principal Researcher;PhD student;PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nkoo2023kebap,\ntitle={{KEBAP}: Korean Error Explainable Benchmark Dataset for {ASR} and Post-processing},\nauthor={Seonmin Koo and Chanjun Park and Jinsung Kim and Jaehyung Seo and Sugyeong Eo and Hyeonseok Moon and Heuiseok Lim},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=i17SCD0YDI}\n}", "github": "", "project": "", "reviewers": "yM4D;L3vD;chwF", "site": "https://openreview.net/forum?id=i17SCD0YDI", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;3", "excitement": "5;3;3", "reproducibility": "5;4;4", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0007-8575-2306;0000-0002-7200-9632;0000-0002-1587-0389;0000-0002-4761-9818;0000-0002-8008-6160;0000-0002-0841-4262;", "linkedin": ";bcj1210/;jinsung-kim-703195178/;jaehyungseo-datascientist/?originalSubdomain=kr;%EC%88%98%EA%B2%BD-%EC%96%B4-21a23015b/;;", "aff_unique_index": "0;1;0;0;0;0;0", "aff_unique_norm": "Korea University;Upstage", "aff_unique_dep": ";", "aff_unique_url": "https://www.korea.ac.kr;", "aff_unique_abbr": "KU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "South Korea;" }, { "id": "i1KSRMVlST", "title": "Cognitive Dissonance: Why Do Language Model Outputs Disagree with Internal Representations of Truthfulness?", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Neural language models (LMs) can be used to evaluate the truth of factual statements in two ways: they can be either queried for statement probabilities, or probed for internal representations of truthfulness. Past work has found that these two procedures sometimes disagree, and that probes tend to be more accurate than LM outputs. This has led some researchers to conclude that LMs \"lie\" or otherwise encode non-cooperative communicative intents. Is this an accurate description of today's LMs, or can query--probe disagreement arise in other ways? We identify three different classes of disagreement, which we term confabulation, deception, and heterogeneity. In many cases, the superiority of probes is simply attributable to better calibration on uncertain answers rather than a greater fraction of correct, high-confidence answers. In some cases, queries and probes perform better on different subsets of inputs, and accuracy can further be improved by ensembling the two.", "keywords": "language models;truthfulness;question answering;interpretability", "primary_area": "", "supplementary_material": "", "author": "Kevin Liu;Stephen Casper;Dylan Hadfield-Menell;Jacob Andreas", "authorids": "~Kevin_Liu3;~Stephen_Casper1;~Dylan_Hadfield-Menell2;~Jacob_Andreas1", "gender": "M;M;M;M", "homepage": ";https://stephencasper.com/;http://people.csail.mit.edu/dhm/;http://web.mit.edu/jda/www", "dblp": ";255/5295.html;135/8332;97/8154", "google_scholar": ";N4aglP4AAAAJ;4mVPFQ8AAAAJ;dnZ8udEAAAAJ", "or_profile": "~Kevin_Liu3;~Stephen_Casper1;~Dylan_Hadfield-Menell2;~Jacob_Andreas1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Microsoft", "aff_domain": "mit.edu;mit.edu;mit.edu;microsoft.com", "position": "Undergrad student;Graduate Student;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nliu2023cognitive,\ntitle={Cognitive Dissonance: Why Do Language Model Outputs Disagree with Internal Representations of Truthfulness?},\nauthor={Kevin Liu and Stephen Casper and Dylan Hadfield-Menell and Jacob Andreas},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=i1KSRMVlST}\n}", "github": "", "project": "", "reviewers": "WxBn;M8PW;aR1Y", "site": "https://openreview.net/forum?id=i1KSRMVlST", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;3", "excitement": "3;4;4", "reproducibility": "3;3;5", "correctness": "2;3;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-0084-1937;0000-0002-6168-4763;", "linkedin": "kevin-liu888;;;", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Massachusetts Institute of Technology;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://web.mit.edu;https://www.microsoft.com", "aff_unique_abbr": "MIT;Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "i65hZUPwuQ", "title": "Mirages. On Anthropomorphism in Dialogue Systems", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Automated dialogue or conversational systems are anthropomorphised by developers and personified by users. While a degree of anthropomorphism is inevitable, conscious and unconscious design choices can guide users to personify them to varying degrees. Encouraging users to relate to automated systems as if they were human can lead to transparency and trust issues, and high risk scenarios caused by over-reliance on their outputs. As a result, natural language processing researchers have investigated the factors that induce personification and develop resources to mitigate such effects. However, these efforts are fragmented, and many aspects of anthropomorphism have yet to be explored. In this paper, we discuss the linguistic factors that contribute to the anthropomorphism of dialogue systems and the harms that can arise thereof, including reinforcing gender stereotypes and conceptions of acceptable language. \nWe recommend that future efforts towards developing dialogue systems take particular care in their design, development, release, and description; and attend to the many linguistic cues that can elicit personification by users.", "keywords": "Dialogue systems;conversational AI;anthropomorphism;ethics", "primary_area": "", "supplementary_material": "", "author": "Gavin Abercrombie;Amanda Cercas Curry;Tanvi Dinkar;Verena Rieser;Zeerak Talat", "authorids": "~Gavin_Abercrombie1;~Amanda_Cercas_Curry1;~Tanvi_Dinkar1;~Verena_Rieser1;~Zeerak_Talat1", "gender": "M;F;;F;", "homepage": ";;;https://sites.google.com/site/verenateresarieser/home;", "dblp": "184/8685;185/0457;;75/5602;", "google_scholar": ";wJ3c5wMAAAAJ;;https://scholar.google.co.uk/citations?hl=en;", "or_profile": "~Gavin_Abercrombie1;~Amanda_Cercas_Curry1;~Tanvi_Dinkar1;~Verena_Rieser1;~Zeerak_Talat1", "aff": "Heriot-Watt University;Bocconi University;;Heriot-Watt University;", "aff_domain": "hw.ac.uk;unibocconi.it;;hw.ac.uk;", "position": "Postdoc;Postdoc;;Full Professor;", "bibtex": "@inproceedings{\nabercrombie2023mirages,\ntitle={Mirages. On Anthropomorphism in Dialogue Systems},\nauthor={Gavin Abercrombie and Amanda Cercas Curry and Tanvi Dinkar and Verena Rieser and Zeerak Talat},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=i65hZUPwuQ}\n}", "github": "", "project": "", "reviewers": "dUKy;n3SJ;2Rub", "site": "https://openreview.net/forum?id=i65hZUPwuQ", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "4;4;4", "reproducibility": "", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-8964-5790;;;", "linkedin": ";;;verena-rieser-3590b86/;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Heriot-Watt University;Bocconi University", "aff_unique_dep": ";", "aff_unique_url": "https://www.hw.ac.uk;https://www.bocconi.edu", "aff_unique_abbr": "HWU;Bocconi", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "United Kingdom;Italy" }, { "id": "i7ifZu49kW", "title": "Improving Neural Machine Translation by Multi-Knowledge Integration with Prompting", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Improving neural machine translation (NMT) systems with prompting has achieved significant progress in recent years. In this work, we focus on how to integrate multi-knowledge, multiple types of knowledge, into NMT models to enhance the performance with prompting. We propose a unified framework, which can integrate effectively multiple types of knowledge including sentences, terminologies/phrases and translation templates into NMT models. We utilize multiple types of knowledge as prefix-prompts of input for the encoder and decoder of NMT models to guide the translation process. The approach requires no changes to the model architecture and effectively adapts to domain-specific translation without retraining. The experiments on English-Chinese and English-German translation demonstrate that our approach significantly outperform strong baselines, achieving high translation quality and terminology match accuracy.", "keywords": "Machine Translation;Multi-Knowledge Integration;Prompting", "primary_area": "", "supplementary_material": "", "author": "Ke Wang;Jun Xie;Yuqi Zhang;Yu Zhao", "authorids": "~Ke_Wang17;~Jun_Xie9;~Yuqi_Zhang6;~Yu_Zhao15", "gender": "M;F;M;Not Specified", "homepage": "https://scholar.google.com/citations?user=1xuDUvkAAAAJ;;;", "dblp": ";13/3918.html;;", "google_scholar": "1xuDUvkAAAAJ;https://scholar.google.de/citations?user=hsaj9ZsAAAAJ;;YjuM2GsAAAAJ", "or_profile": "~Ke_Wang17;~Yuqi_Zhang6;~Yu_Zhao15;~jun_xie5", "aff": "Alibaba Group;Alibaba Damo Academy;Alibaba Group;Alibaba DAMO Academy", "aff_domain": "alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com", "position": "Researcher;Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nwang2023improving,\ntitle={Improving Neural Machine Translation by Multi-Knowledge Integration with Prompting},\nauthor={Ke Wang and Jun Xie and Yuqi Zhang and Yu Zhao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=i7ifZu49kW}\n}", "github": "", "project": "", "reviewers": "wucw;GTCU;m2EQ", "site": "https://openreview.net/forum?id=i7ifZu49kW", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;5;3", "excitement": "2;2;3", "reproducibility": "4;5;4", "correctness": "3;2;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 2.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 2.6666666666666665, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;https://www.linkedin.cn/incareer/in/ACoAABw1UPkBM2Vlf7nEWpIq_R8DJ-6dh1muEas;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Alibaba Group", "aff_unique_dep": "", "aff_unique_url": "https://www.alibaba.com", "aff_unique_abbr": "Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "iAeDYlEXrM", "title": "A Critical Analysis of Document Out-of-Distribution Detection", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large-scale pre-training is widely used in recent document understanding tasks. During deployment, one may expect that models should trigger a conservative fallback policy when encountering out-of-distribution (OOD) samples, which highlights the importance of OOD detection. However, most existing OOD detection methods focus on single-modal inputs such as images or texts. While documents are multi-modal in nature, it is underexplored if and how multi-modal information in documents can be exploited for OOD detection. In this work, we first provide a systematic and in-depth analysis on OOD detection for document understanding models. We study the effects of model modality, pre-training, and fine-tuning across various types of OOD inputs. In particular, we find that spatial information is critical for document OOD detection. To better exploit spatial information, we propose a spatial-aware adapter, which serves as a parameter-efficient add-on module to adapt transformer-based language models to the document domain. Extensive experiments show that adding the spatial-aware adapter significantly improves the OOD detection performance compared to directly using the language model and achieves superior performance compared to competitive baselines.", "keywords": "Document Understanding;Pretraining;Out-of-Distribution;Document intelligence;Robustness", "primary_area": "", "supplementary_material": "", "author": "Jiuxiang Gu;Yifei Ming;Yi Zhou;Jason Kuen;Vlad I Morariu;Handong Zhao;Ruiyi Zhang;Nikolaos Barmpalios;Anqi Liu;Yixuan Li;Tong Sun;Ani Nenkova", "authorids": "~Jiuxiang_Gu2;~Yifei_Ming1;~Yi_Zhou20;~Jason_Kuen1;~Vlad_I_Morariu1;~Handong_Zhao3;~Ruiyi_Zhang3;~Nikolaos_Barmpalios1;~Anqi_Liu2;~Yixuan_Li1;~Tong_Sun1;~Ani_Nenkova1", "gender": "M;M;;M;M;;;M;F;F;F;", "homepage": "http://gujiuxiang.com;https://alvinmingsf.github.io/;;http://jasonkuen.com/;https://research.adobe.com/person/vlad-morariu/;;;;https://anqiliu-ai.github.io/;http://pages.cs.wisc.edu/~sharonli/;https://research.adobe.com/person/tong-sun/;", "dblp": "173/4935.html;277/4125;;165/1403;27/6671;;;;;144/6087-1;;", "google_scholar": "https://scholar.google.com.sg/citations?user=zPxKV9EAAAAJ;Dh_4cyQAAAAJ;;e6u7GlQAAAAJ;oyWpVa8AAAAJ;;;Yp4dul4AAAAJ;Q8yp6zQAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;", "or_profile": "~Jiuxiang_Gu2;~Yifei_Ming1;~Yi_Zhou20;~Jason_Kuen1;~Vlad_I_Morariu1;~Handong_Zhao3;~Ruiyi_Zhang3;~Nikolaos_Barmpalios1;~Anqi_Liu2;~Yixuan_Li1;~Tong_Sun1;~Ani_Nenkova1", "aff": "Adobe Systems;University of Wisconsin - Madison;;Adobe Research;Adobe;;;Adobe Systems;University of Illinois, Chicago;Cornell University;Adobe Systems;", "aff_domain": "adobe.com;wisc.edu;;adobe.com;adobe.com;;;adobe.com;uic.edu;cornell.edu;adobe.com;", "position": "Researcher;PhD student;;Researcher;Senior Research Scientist;;;Senior Machine Learning Scientist;PhD student;Graduate Student;Director, Document Intelligence Lab;", "bibtex": "@inproceedings{\ngu2023a,\ntitle={A Critical Analysis of Document Out-of-Distribution Detection},\nauthor={Jiuxiang Gu and Yifei Ming and Yi Zhou and Jason Kuen and Vlad I Morariu and Handong Zhao and Ruiyi Zhang and Nikolaos Barmpalios and Anqi Liu and Yixuan Li and Tong Sun and Ani Nenkova},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=iAeDYlEXrM}\n}", "github": "", "project": "", "reviewers": "os2G;Rxki;M9aT", "site": "https://openreview.net/forum?id=iAeDYlEXrM", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;3;4", "reproducibility": "4;4;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 12, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;;0000-0002-0468-5698;;;", "linkedin": ";;;;;;;;;liyixuan;tong-sun/?trk=hb_tab_pro_top;", "aff_unique_index": "0;1;0;0;0;2;3;0", "aff_unique_norm": "Adobe;University of Wisconsin-Madison;University of Illinois at Chicago;Cornell University", "aff_unique_dep": "Adobe Systems Incorporated;;;", "aff_unique_url": "https://www.adobe.com;https://www.wisc.edu;https://www.uic.edu;https://www.cornell.edu", "aff_unique_abbr": "Adobe;UW-Madison;UIC;Cornell", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Madison;Chicago", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "iBv0M8WrFi", "title": "A Cheaper and Better Diffusion Language Model with Soft-Masked Noise", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Diffusion models that are based on iterative denoising have been recently proposed and leveraged in various generation tasks like image generation. Whereas, as a way inherently built for continuous data, existing diffusion models still have some limitations in modeling discrete data, e.g., languages. For example, the generally used Gaussian noise can not handle the discrete corruption well, and the objectives in continuous spaces fail to be stable for textual data in the diffusion process especially when the dimension is high. To alleviate these issues, we introduce a novel diffusion model for language modeling, Masked-Diffuse LM, with lower training cost and better performances, inspired by linguistic features in languages. Specifically, we design a linguistic-informed forward process which adds corruptions to the text through strategically soft-masking to better noise the textual data. Also, we directly predict the categorical distribution with cross-entropy loss function in every diffusion step to connect the continuous space and discrete space in a more efficient and straightforward way. Through experiments on 5 controlled generation tasks, we demonstrate that our Masked-Diffuse LM can achieve better generation quality than the state-of-the-art diffusion models with better efficiency.", "keywords": "Diffusion Language Model;Soft-masked Noise", "primary_area": "", "supplementary_material": "", "author": "Jiaao Chen;Aston Zhang;Mu Li;Alex Smola;Diyi Yang", "authorids": "~Jiaao_Chen2;~Aston_Zhang2;~Mu_Li4;~Alex_Smola1;~Diyi_Yang2", "gender": "M;;;M;F", "homepage": "https://cs.stanford.edu/people/jiaaoc/;;https://github.com/mli;http://alex.smola.org;https://cs.stanford.edu/~diyiy/", "dblp": "230/3663;;;s/AlexanderJSmola;70/11145", "google_scholar": "Pi9IVvUAAAAJ;;;Tb0ZrYwAAAAJ;j9jhYqQAAAAJ", "or_profile": "~Jiaao_Chen2;~Aston_Zhang2;~Mu_Li4;~Alex_Smola1;~Diyi_Yang2", "aff": "Georgia Institute of Technology;;Amazon;Boson AI;Stanford University", "aff_domain": "gatech.edu;;amazon.com;boson.ai;stanford.edu", "position": "PhD student;;Researcher;CEO;Assistant Professor", "bibtex": "@inproceedings{\nchen2023a,\ntitle={A Cheaper and Better Diffusion Language Model with Soft-Masked Noise},\nauthor={Jiaao Chen and Aston Zhang and Mu Li and Alex Smola and Diyi Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=iBv0M8WrFi}\n}", "github": "", "project": "", "reviewers": "4Jws;YAfc;h85U;Q6XL", "site": "https://openreview.net/forum?id=iBv0M8WrFi", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;3;5;4", "excitement": "4;2;4;3", "reproducibility": "4;2;2;3", "correctness": "4;3;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.25, "reproducibility_avg": 2.75, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;smola;", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Georgia Institute of Technology;Amazon;Boson AI;Stanford University", "aff_unique_dep": ";Amazon.com, Inc.;;", "aff_unique_url": "https://www.gatech.edu;https://www.amazon.com;https://www.boson.ai;https://www.stanford.edu", "aff_unique_abbr": "Georgia Tech;Amazon;Boson AI;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;China" }, { "id": "iCLJHkE5s1", "title": "TRAMS: Training-free Memory Selection for Long-range Language Modeling", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "The Transformer architecture is crucial for numerous AI models, but it still faces challenges in long-range language modeling. Though several specific transformer architectures have been designed to tackle issues of long-range dependencies, existing methods like Transformer-XL are plagued by a high percentage of ineffective memories. In this study, we present a plug-and-play strategy, known as TRAining-free Memory Selection (TRAMS), that selects tokens participating in attention calculation based on one simple metric. This strategy allows us to keep tokens that are likely to have a high attention score with the current queries and ignore the other ones. We have tested our approach on the word-level benchmark (WikiText-103) and the character-level benchmark (enwik8),\nand the results indicate an improvement without having additional training or adding additional parameters.", "keywords": "Language Model; Inference Strategy; Long-context Modeling", "primary_area": "", "supplementary_material": "", "author": "Haofei Yu;Cunxiang Wang;Yue Zhang;Wei Bi", "authorids": "~Haofei_Yu1;~Cunxiang_Wang1;~Yue_Zhang7;~Wei_Bi1", "gender": "M;Not Specified;M;F", "homepage": "https://www.haofeiyu.me;https://wangcunxiang.github.io/;http://frcchang.github.io;https://scholar.google.com.hk/citations?hl=en&user=aSJcgQMAAAAJ&view_op=list_works&sortby=pubdate#d=gsc_md_iad&u=%2Fcitations%3Fview_op%3Dimport_lookup%26hl%3Den%26imq%3DWei%2BBi%26json%3D%26btnA%3D1", "dblp": "156/1412;213/1862.html;47/722-4;38/1163", "google_scholar": "EL-QbZ4AAAAJ;https://scholar.google.com.sg/citations?hl=en;;https://scholar.google.com.hk/citations?hl=en", "or_profile": "~Haofei_Yu1;~Cunxiang_Wang1;~Yue_Zhang7;~Wei_Bi1", "aff": "Apple;Westlake University;Westlake University;Hong Kong University of Science and Technology", "aff_domain": "apple.com;westlake.edu.cn;westlake.edu.cn;ust.hk", "position": "Intern;PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nyu2023trams,\ntitle={{TRAMS}: Training-free Memory Selection for Long-range Language Modeling},\nauthor={Haofei Yu and Cunxiang Wang and Yue Zhang and Wei Bi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=iCLJHkE5s1}\n}", "github": "", "project": "", "reviewers": "Rdm5;XG1k;ncPF", "site": "https://openreview.net/forum?id=iCLJHkE5s1", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;3;4", "excitement": "3;3;3", "reproducibility": "4;3;4", "correctness": "4;3;4", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-5214-2268;0000-0001-8457-0630", "linkedin": "%E6%98%8A%E9%A3%9E-%E4%BA%8E-a04247188/;;;", "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Apple;Westlake University;Hong Kong University of Science and Technology", "aff_unique_dep": "Apple Inc.;;", "aff_unique_url": "https://www.apple.com;https://www.westlake.edu.cn;https://www.ust.hk", "aff_unique_abbr": "Apple;WU;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;China" }, { "id": "iCNoSVJl2y", "title": "CCIM: Cross-modal Cross-lingual Interactive Image Translation", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Text image machine translation (TIMT) which translates source language text images into target language texts has attracted intensive attention in recent years. Although the end-to-end TIMT model directly generates target translation from encoded text image features with an efficient architecture, it lacks the recognized source language information resulting in a decrease in translation performance. In this paper, we propose a novel Cross-modal Cross-lingual Interactive Model (CCIM) to incorporate source language information by synchronously generating source language and target language results through an interactive attention mechanism between two language decoders. Extensive experimental results have shown the interactive decoder significantly outperforms end-to-end TIMT models and has faster decoding speed with smaller model size than cascade models.", "keywords": "cross-modal cross-lingual interactive decoding;text image machine translation;text image recogntion", "primary_area": "", "supplementary_material": "", "author": "Cong MA;Yaping Zhang;Mei Tu;Yang Zhao;Yu Zhou;Chengqing Zong", "authorids": "~Cong_MA3;~Yaping_Zhang1;~Mei_Tu2;~Yang_Zhao26;~Yu_Zhou8;~Chengqing_Zong1", "gender": ";;F;;F;M", "homepage": ";https://aprilyapingzhang.github.io;;;;http://www.nlpr.ia.ac.cn/cip/english/zong.htm", "dblp": ";133/5803;136/8671.html;;36/2728-1.html;38/6093", "google_scholar": ";https://scholar.google.com.hk/citations?user=bAN6Lj0AAAAJ;;;DDpBW7wAAAAJ;l8lvKOQAAAAJ", "or_profile": "~Cong_MA3;~Yaping_Zhang1;~Mei_Tu2;~Yang_Zhao26;~Yu_Zhou8;~Chengqing_Zong1", "aff": ";Institute of Automation, Chinese Academy of Sciences;Samsung;;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences", "aff_domain": ";ia.ac.cn;samsung.com;;nlpr.ia.ac.cn;ia.ac.cn", "position": ";Assistant Professor;Researcher;;Full Professor;Researcher", "bibtex": "@inproceedings{\nma2023ccim,\ntitle={{CCIM}: Cross-modal Cross-lingual Interactive Image Translation},\nauthor={Cong MA and Yaping Zhang and Mei Tu and Yang Zhao and Yu Zhou and Chengqing Zong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=iCNoSVJl2y}\n}", "github": "", "project": "", "reviewers": "gHG5;jT2T;DKy3", "site": "https://openreview.net/forum?id=iCNoSVJl2y", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;3;3", "reproducibility": "4;4;4", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-6892-905X;;;;", "linkedin": ";;;;;", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Chinese Academy of Sciences;Samsung", "aff_unique_dep": "Institute of Automation;Samsung", "aff_unique_url": "http://www.ia.cas.cn;https://www.samsung.com", "aff_unique_abbr": "CAS;Samsung", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;South Korea" }, { "id": "iDBUssVu5Z", "title": "Text Fact Transfer", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Text style transfer is a prominent task that aims to control the style of text without inherently changing its factual content. To cover more text modification applications, such as adapting past news for current events and repurposing educational materials, we propose the task of text fact transfer, which seeks to transfer the factual content of a source text between topics without modifying its style. We find that existing language models struggle with text fact transfer, due to their inability to preserve the specificity and phrasing of the source text, and tendency to hallucinate errors. To address these issues, we design ModQGA, a framework that minimally modifies a source text with a novel combination of end-to-end question generation and specificity-aware question answering. Through experiments on four existing datasets adapted for text fact transfer, we show that ModQGA can accurately transfer factual content without sacrificing the style of the source text.", "keywords": "Text Generation;Factuality;Text Style Transfer;Information Extraction", "primary_area": "", "supplementary_material": "", "author": "Nishant Balepur;Jie Huang;Kevin Chang", "authorids": "~Nishant_Balepur1;~Jie_Huang3;~Kevin_Chang1", "gender": "M;;M", "homepage": "https://nbalepur.github.io/;https://jeffhj.github.io/;https://siebelschool.illinois.edu/about/people/faculty/kcchang", "dblp": "346/4871;29/6643-9;c/KCCChang", "google_scholar": "G8_fojUAAAAJ;GIoPkMoAAAAJ;https://scholar.google.com.tw/citations?user=sugWZ6MAAAAJ", "or_profile": "~Nishant_Balepur1;~Jie_Huang3;~Kevin_Chang1", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": "cs.illinois.edu;illinois.edu;illinois.edu", "position": "Undergrad student;PhD student;Full Professor", "bibtex": "@inproceedings{\nbalepur2023text,\ntitle={Text Fact Transfer},\nauthor={Nishant Balepur and Jie Huang and Kevin Chang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=iDBUssVu5Z}\n}", "github": "", "project": "", "reviewers": "KoEs;jgwJ;vEhb", "site": "https://openreview.net/forum?id=iDBUssVu5Z", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;3", "excitement": "3;4;4", "reproducibility": "4;4;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-0997-6803", "linkedin": "nishant-balepur-a03818107/;jie-huang-4b0104151/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "iDQBP0cvzX", "title": "Best of Both Worlds: Towards Improving Temporal Knowledge Base Question Answering via Targeted Fact Extraction", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Temporal question answering (QA) is a special category of complex question answering task that requires reasoning over facts asserting time intervals of events. Previous works have predominately relied on Knowledge Base Question Answering (KBQA) for temporal QA. One of the major challenges faced by these systems is their inability to retrieve all relevant facts due to factors such as incomplete KB and entity/relation linking errors. A failure to fetch even a single fact will block KBQA from computing the answer. Such cases of KB incompleteness are even more profound in the temporal context. To address this issue, we explore an interesting direction where a targeted temporal fact extraction technique is used to assist KBQA whenever it fails to retrieve temporal facts from the KB. We model the extraction problem as an open-domain question answering task using off-the-shelf language models. This way, we target to extract from textual resources those facts that failed to get retrieved from the KB. Experimental results on two temporal QA benchmarks show promising ~30% & ~10% relative improvements in answer accuracies without any additional training cost.", "keywords": "Temporal KBQA;Fact Extraction;Semantic Parsing;Questional Answering", "primary_area": "", "supplementary_material": "", "author": "Nithish Kannen;Udit Sharma;Sumit Neelam;Dinesh Khandelwal;Shajith Ikbal;Hima Karanam;L Venkata Subramaniam", "authorids": "~Nithish_Kannen1;~Udit_Sharma1;~Sumit_Neelam1;~Dinesh_Khandelwal2;~Shajith_Ikbal1;~Hima_Karanam1;~L_Venkata_Subramaniam1", "gender": "M;M;M;M;M;M;M", "homepage": "https://nitkannen.github.io/;;;https://research.ibm.com/people/dinesh-khandelwal;https://sites.google.com/site/shajithikbal/;;https://researcher.watson.ibm.com/researcher/view.php?person=in-lvsubram", "dblp": ";96/892;149/5874.html;177/0164;66/4370;230/8590;s/LVenkataSubramaniam", "google_scholar": "nPQMsWMAAAAJ;dkEtWZwAAAAJ;K_mOIxsAAAAJ;Pi-SqXwAAAAJ;yYN1agcAAAAJ;8HycjgoAAAAJ;https://scholar.google.co.in/citations?user=oV1WKFMAAAAJ", "or_profile": "~Nithish_Kannen1;~Udit_Sharma1;~Sumit_Neelam1;~Dinesh_Khandelwal2;~Shajith_Ikbal1;~Hima_Karanam1;~L_Venkata_Subramaniam1", "aff": "Indian Institute of Technology, Kharagpur;International Business Machines;International Business Machines;International Business Machines;IBM Research AI, India;International Business Machines;International Business Machines", "aff_domain": "iitkgp.ac.in;ibm.com;ibm.com;ibm.com;ibm.com;ibm.com;ibm.com", "position": "MS student;Software Engineer;Researcher;Researcher;Senior Research Scientist;Researcher;IBM Research", "bibtex": "@inproceedings{\nkannen2023best,\ntitle={Best of Both Worlds: Towards Improving Temporal Knowledge Base Question Answering via Targeted Fact Extraction},\nauthor={Nithish Kannen and Udit Sharma and Sumit Neelam and Dinesh Khandelwal and Shajith Ikbal and Hima Karanam and L Venkata Subramaniam},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=iDQBP0cvzX}\n}", "github": "", "project": "", "reviewers": "LLSL;r8QC;T8H6", "site": "https://openreview.net/forum?id=iDQBP0cvzX", "pdf_size": 0, "rating": "4;4;4", "confidence": "1;4;4", "excitement": "4;3;4", "reproducibility": "4;3;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;", "linkedin": "nithish-kannen-7a7823177/;uditsharma7/;;dinesh-khandelwal-68689420/;shajithikbal/;;lvsubramaniam/", "aff_unique_index": "0;1;1;1;2;1;1", "aff_unique_norm": "Indian Institute of Technology;International Business Machines Corporation;IBM", "aff_unique_dep": ";;AI", "aff_unique_url": "https://www.iitkgp.ac.in;https://www.ibm.com;https://www.ibm.com/research", "aff_unique_abbr": "IIT Kharagpur;IBM;IBM", "aff_campus_unique_index": "0", "aff_campus_unique": "Kharagpur;", "aff_country_unique_index": "0;1;1;1;0;1;1", "aff_country_unique": "India;United States" }, { "id": "iDZQG9aUGH", "title": "Bayesian Multi-Task Transfer Learning for Soft Prompt Tuning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Prompt tuning, in which prompts are optimized to adapt large-scale pre-trained language models to downstream tasks instead of fine-tuning the full model parameters, has been shown to be particularly effective when the prompts are trained in the multi-task transfer learning setting. These methods generally involve individually training prompts for each source task and then aggregating them to provide the initialization of the prompt for the target task. However, this approach critically \nignores the fact that some of the source tasks could be negatively or positively interfering with each other. We argue that when we extract knowledge from source tasks via training source prompts, we need to consider this correlation among source tasks for better transfer to target tasks. To this end, we propose a Bayesian approach where we work with the posterior distribution of prompts across source tasks. We obtain representative source prompts corresponding to the samples from \nthe posterior utilizing Stein Variational Gradient Descent, which are then aggregated to constitute the initial target prompt. We show extensive experimental results on the standard benchmark NLP tasks, where our Bayesian multi-task transfer learning approach outperforms the state-of-the-art methods in many settings. Furthermore, our approach requires no auxiliary models other than the prompt itself, achieving high degree of parameter-efficiency.", "keywords": "prompt tuning;parameter-efficient fine-tuning;transfer learning;bayesian method", "primary_area": "", "supplementary_material": "", "author": "Haeju Lee;Minchan Jeong;Se-Young Yun;Kee-Eung Kim", "authorids": "~Haeju_Lee1;~Minchan_Jeong1;~Se-Young_Yun1;~Kee-Eung_Kim2", "gender": "M;M;M;M", "homepage": ";http://osi.kaist.ac.kr/;https://fbsqkd.github.io;http://ailab.kaist.ac.kr", "dblp": "308/5914;;23/8862;35/6703", "google_scholar": ";DuxK5bMAAAAJ;X_IAjb8AAAAJ;https://scholar.google.com/citations?hl=ko", "or_profile": "~Haeju_Lee1;~Minchan_Jeong1;~Se-Young_Yun1;~Kee-Eung_Kim2", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;KAIST;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.ac.kr;kaist.ac.kr;kaist.ac.kr", "position": "MS student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nlee2023bayesian,\ntitle={Bayesian Multi-Task Transfer Learning for Soft Prompt Tuning},\nauthor={Haeju Lee and Minchan Jeong and Se-Young Yun and Kee-Eung Kim},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=iDZQG9aUGH}\n}", "github": "", "project": "", "reviewers": "f8vf;g8bk;AbeE;qA2o", "site": "https://openreview.net/forum?id=iDZQG9aUGH", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;3;3;5", "excitement": "3;4;3;3", "reproducibility": "3;4;3;4", "correctness": "3;4;2;3", "rating_avg": 3.0, "confidence_avg": 3.75, "excitement_avg": 3.25, "reproducibility_avg": 3.5, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "haeju-lee;minchan-jeong-5303b7268/;seyoung-yun-395130ab/;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "iEACF99lQz", "title": "Merging Generated and Retrieved Knowledge for Open-Domain QA", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Open-domain question answering (QA) systems are often built with retrieval modules. However, retrieving passages from a given source is known to suffer from insufficient knowledge coverage. Alternatively, prompting large language models (LLMs) to generate contextual passages based on their parametric knowledge has been shown to improve QA performance. Yet, LLMs tend to \"hallucinate\" content that conflicts with the retrieved knowledge. Based on the intuition that answers supported by both sources are more likely to be correct, we propose COMBO, a Compatibility-Oriented knowledge Merging for Better Open-domain QA framework, to effectively leverage the two sources of information. Concretely, we match LLM-generated passages with retrieved counterparts into compatible pairs, based on discriminators trained with silver compatibility labels. Then a Fusion-in-Decoder-based reader model handles passage pairs to arrive at the final answer. Experiments show that COMBO outperforms competitive baselines on three out of four tested open-domain QA benchmarks. Further analysis reveals that our proposed framework demonstrates greater efficacy in scenarios with a higher degree of knowledge conflicts.", "keywords": "large language model;retrieval-augmented language model;open-domain question answering", "primary_area": "", "supplementary_material": "", "author": "Yunxiang Zhang;Muhammad Khalifa;Lajanugen Logeswaran;Moontae Lee;Honglak Lee;Lu Wang", "authorids": "~Yunxiang_Zhang2;~Muhammad_Khalifa2;~Lajanugen_Logeswaran1;~Moontae_Lee1;~Honglak_Lee2;~Lu_Wang9", "gender": "M;M;M;;F;M", "homepage": "https://yunx-z.github.io/;https://mukhal.github.io;https://sites.google.com/umich.edu/llajan/;https://moontae.people.uic.edu;https://web.eecs.umich.edu/~wangluxy/;http://web.eecs.umich.edu/~honglak", "dblp": "160/6176-2.html;246/4401;157/3603;132/1761;49/3800-8;58/2562", "google_scholar": "pbvWlJwAAAAJ;tnmUr30AAAAJ;dcv4kpIAAAAJ;BMvYy9cAAAAJ;uczqEdUAAAAJ;fmSHtE8AAAAJ", "or_profile": "~Yunxiang_Zhang2;~Muhammad_Khalifa2;~Lajanugen_Logeswaran1;~Moontae_Lee1;~Lu_Wang9;~Honglak_Lee1", "aff": "University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;LG AI Research;University of Illinois, Chicago;University of Michigan;University of Michigan", "aff_domain": "umich.edu;umich.edu;lgresearch.ai;uic.edu;umich.edu;umich.edu", "position": "PhD student;PhD student;Researcher;Assistant Professor;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nzhang2023merging,\ntitle={Merging Generated and Retrieved Knowledge for Open-Domain {QA}},\nauthor={Yunxiang Zhang and Muhammad Khalifa and Lajanugen Logeswaran and Moontae Lee and Honglak Lee and Lu Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=iEACF99lQz}\n}", "github": "", "project": "", "reviewers": "rVZR;mizo;KaNa", "site": "https://openreview.net/forum?id=iEACF99lQz", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;3;3", "reproducibility": "5;4;4", "correctness": "4;4;5", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-5542-3463;;", "linkedin": "%E4%BA%91%E7%BF%94-%E5%BC%A0-a97859196/;muhammaad-khalifa-9a467b100/;;moontae-lee-975248123/;;", "aff_unique_index": "0;0;1;2;0;0", "aff_unique_norm": "University of Michigan;LG;University of Illinois at Chicago", "aff_unique_dep": ";LG AI Research;", "aff_unique_url": "https://www.umich.edu;https://www.lgaires.com;https://www.uic.edu", "aff_unique_abbr": "UM;LG AI;UIC", "aff_campus_unique_index": "0;0;2", "aff_campus_unique": "Ann Arbor;;Chicago", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;South Korea" }, { "id": "iHb4MOMyOd", "title": "Bipartite Graph Pre-training for Unsupervised Extractive Summarization with Graph Convolutional Auto-Encoders", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Pre-trained sentence representations are crucial for identifying significant sentences in unsupervised document extractive summarization.\nHowever, the traditional two-step paradigm of pre-training and sentence-ranking, creates a gap due to differing optimization objectives. \nTo address this issue, we argue that utilizing pre-trained embeddings derived from a process specifically designed to optimize informative and distinctive sentence representations helps rank significant sentences. \nTo do so, we propose a novel graph pre-training auto-encoder to obtain sentence embeddings by explicitly modelling intra-sentential distinctive features and inter-sentential cohesive features through sentence-word bipartite graphs. \nThese fine-tuned sentence embeddings are then utilized in a graph-based ranking algorithm for unsupervised summarization.\nOur method is a plug-and-play pre-trained model that produces predominant performance for unsupervised summarization frameworks by providing summary-worthy sentence representations. \nIt surpasses heavy BERT- or RoBERTa-based sentence representations in downstream tasks.", "keywords": "Bipartite Graph;Graph Pre-training;Unsupervised Extractive Summarization;Graph Convolutional Auto-Encoders", "primary_area": "", "supplementary_material": "", "author": "Qianren Mao;Shaobo Zhao;Jiarui Li;Xiaolei Gu;Shizhu He;Bo Li;Jianxin Li", "authorids": "~Qianren_Mao4;~Shaobo_Zhao1;~Jiarui_Li3;~Xiaolei_Gu1;~Shizhu_He2;~Bo_Li46;~Jianxin_Li3", "gender": "M;;M;M;M;M;M", "homepage": ";;https://github.com/ljr19231244;https://lhzforever.github.io/;https://heshizhu.github.io/;;http://myjianxin.github.io", "dblp": "234/5350;;;;136/8650;50/3402-5.html;l/JianxinLi-2.html", "google_scholar": "https://scholar.google.com.hk/citations?user=PnDqlPkAAAAJ;;;;zBPIt3QAAAAJ;;EY2lqD0AAAAJ", "or_profile": "~Qianren_Mao4;~Shaobo_Zhao1;~Jiarui_Li3;~Xiaolei_Gu1;~Shizhu_He2;~Bo_Li46;~Jianxin_Li3", "aff": "Beihang University;Beihang University;Beihang University;Beihang University;Institute of Automation, Chinese Academy of Sciences;Beihang University;Beihang University ", "aff_domain": "buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;buaa.edu.cn;ia.ac.cn;buaa.edu.cn;buaa.edu.cn", "position": "PhD student;Undergrad student;Undergrad student;Undergrad student;Associate Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nmao2023bipartite,\ntitle={Bipartite Graph Pre-training for Unsupervised Extractive Summarization with Graph Convolutional Auto-Encoders},\nauthor={Qianren Mao and Shaobo Zhao and Jiarui Li and Xiaolei Gu and Shizhu He and Bo Li and Jianxin Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=iHb4MOMyOd}\n}", "github": "", "project": "", "reviewers": "vmKw;VXBy;psgw", "site": "https://openreview.net/forum?id=iHb4MOMyOd", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "4;3;3", "reproducibility": "4;4;4", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-0780-0628;;;;;;0000-0001-5152-0055", "linkedin": "%E4%B9%BE%E4%BB%BB-%E6%AF%9B-574534326/;shaobo-zhao-31b52027b/;;;;;", "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "Beihang University;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Automation", "aff_unique_url": "http://www.buaa.edu.cn/;http://www.ia.cas.cn", "aff_unique_abbr": "BUAA;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "iIoHir5Hyg", "title": "Well Begun is Half Done: Generator-agnostic Knowledge Pre-Selection for Knowledge-Grounded Dialogue", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Accurate knowledge selection is critical in knowledge-grounded dialogue systems. \nTowards a closer look at it, we offer a novel perspective to organize existing literature, i.e., knowledge selection coupled with, after, and before generation. \nWe focus on the third under-explored category of study, which can not only select knowledge accurately in advance, but has the advantage to reduce the learning, adjustment, and interpretation burden of subsequent response generation models, especially LLMs.\nWe propose $\\tt{GATE}$, a generator-agnostic knowledge selection method, to prepare knowledge for subsequent response generation models by selecting context-related knowledge among different knowledge structures and variable knowledge requirements.\nExperimental results demonstrate the superiority of $\\tt{GATE}$, and indicate that knowledge selection before generation is a lightweight yet effective way to facilitate LLMs (e.g., ChatGPT) to generate more informative responses.", "keywords": "Knowledge-grounded Dialogue;Knowledge Selection;Generator-agnostic", "primary_area": "", "supplementary_material": "", "author": "Lang Qin;YAO ZHANG;Hongru Liang;Jun Wang;Zhenglu Yang", "authorids": "~Lang_Qin1;~YAO_ZHANG5;~Hongru_Liang2;~Jun_Wang25;~Zhenglu_Yang2", "gender": ";F;F;F;M", "homepage": "https://github.com/qkty-0104;;;;", "dblp": ";57/3892-10.html;218/0721;125/8189-23.html;43/5146", "google_scholar": ";https://scholar.google.com/citations?hl=en;NZLu27gAAAAJ;;https://scholar.google.com/citations?hl=ja", "or_profile": "~Lang_Qin1;~YAO_ZHANG5;~Hongru_Liang2;~Jun_Wang25;~Zhenglu_Yang2", "aff": "Nankai University;Nankai University;Sichuan University;Ludong University;Nankai University", "aff_domain": "nankai.edu.cn;nankai.edu.cn;scu.edu.cn;ldu.edu.cn;nankai.edu.cn", "position": "MS student;Lecturer;Postdoc;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nqin2023well,\ntitle={Well Begun is Half Done: Generator-agnostic Knowledge Pre-Selection for Knowledge-Grounded Dialogue},\nauthor={Lang Qin and YAO ZHANG and Hongru Liang and Jun Wang and Zhenglu Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=iIoHir5Hyg}\n}", "github": "", "project": "", "reviewers": "AqSZ;7Yzk;CwT6", "site": "https://openreview.net/forum?id=iIoHir5Hyg", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;3;3", "reproducibility": "4;4;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0003-3071-0513;;0000-0001-8932-6661;0000-0001-9528-965X", "linkedin": ";;;;", "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Nankai University;Sichuan University;Ludong University", "aff_unique_dep": ";;", "aff_unique_url": "http://www.nankai.edu.cn;https://www.scu.edu.cn;http://www.ldu.edu.cn", "aff_unique_abbr": "NKU;SCU;LDU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "iIpnncYQZb", "title": "Toward a Critical Toponymy Framework for Named Entity Recognition: A Case Study of Airbnb in New York City", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Critical toponymy examines the dynamics of power, capital, and resistance through place names and the sites to which they refer. Studies here have traditionally focused on the semantic content of toponyms and the top-down institutional processes that produce them. However, they have generally ignored the ways in which toponyms are used by ordinary people in everyday discourse, as well as the other strategies of geospatial description that accompany and contextualize toponymic reference. Here, we develop computational methods to measure how cultural and economic capital shape the ways in which people refer to places, through a novel annotated dataset of 47,440 New York City Airbnb listings from the 2010s. Building on this dataset, we introduce a new named entity recognition (NER) model able to identify important discourse categories integral to the characterization of place. Our findings point toward new directions for critical toponymy and to a range of previously understudied linguistic signals relevant to research on neighborhood status, housing and tourism markets, and gentrification.", "keywords": "critical toponymy;named entity recognition;geographic information science;gentrification;new york city;airbnb;place", "primary_area": "", "supplementary_material": "", "author": "Mikael Brunila;Jack LaViolette;Sky CH-Wang;Priyanka Verma;Clara F\u00e9r\u00e9;Grant McKenzie", "authorids": "~Mikael_Brunila1;~Jack_LaViolette1;~Sky_CH-Wang1;~Priyanka_Verma1;~Clara_F\u00e9r\u00e91;~Grant_McKenzie2", "gender": ";M;M;F;;M", "homepage": "https://maybemkl.github.io/;;https://skywang.me;https://orcid.org/0000-0002-2199-4994;;https://grantmckenzie.com", "dblp": ";242/9480;301/9138;;;32/11518", "google_scholar": "hBL6tGwAAAAJ;_DUGDowAAAAJ;6lHNfVoAAAAJ;;;G5UUTgYAAAAJ", "or_profile": "~Mikael_Brunila1;~Jack_LaViolette1;~Sky_CH-Wang1;~Priyanka_Verma1;~Clara_F\u00e9r\u00e91;~Grant_McKenzie2", "aff": "McGill University, McGill University;Columbia University;Columbia University;McGill University;McGill University;McGill University", "aff_domain": "mail.mcgill.ca;columbia.edu;columbia.edu;mcgill.ca;mcgill.ca;mcgill.ca", "position": "PhD student;PhD student;PhD student;PhD student;MS student;Associate Professor", "bibtex": "@inproceedings{\nbrunila2023toward,\ntitle={Toward a Critical Toponymy Framework for Named Entity Recognition: A Case Study of Airbnb in New York City},\nauthor={Mikael Brunila and Jack LaViolette and Sky CH-Wang and Priyanka Verma and Clara F{\\'e}r{\\'e} and Grant McKenzie},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=iIpnncYQZb}\n}", "github": "", "project": "", "reviewers": "B1cM;AMJs;QGQw", "site": "https://openreview.net/forum?id=iIpnncYQZb", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;4", "excitement": "4;4;4", "reproducibility": "5;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-5936-8251;;;0000-0002-2199-4994;;0000-0003-3247-2777", "linkedin": "mikael-brunila-bb642a142/;;skychwang/;;clara-fere-ba4ba9170;", "aff_unique_index": "0;1;1;0;0;0", "aff_unique_norm": "McGill University;Columbia University", "aff_unique_dep": ";", "aff_unique_url": "https://www.mcgill.ca;https://www.columbia.edu", "aff_unique_abbr": "McGill;Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0;0", "aff_country_unique": "Canada;United States" }, { "id": "iLTNcB3601", "title": "Improving End-to-End Speech Processing by Efficient Text Data Utilization with Latent Synthesis", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Training a high performance end-to-end speech (E2E) processing model requires an enormous amount of labeled speech data, especially in the era of data-centric artificial intelligence. However, labeled speech data are usually scarcer and more expensive for collection, compared to textual data.\nWe propose Latent Synthesis (LaSyn), an efficient textual data utilization framework for E2E speech processing models.\nWe train a latent synthesizer to convert textual data into an intermediate latent representation of a pre-trained speech model. These pseudo acoustic representations of textual data augment acoustic data for model training.\nWe evaluate LaSyn on low-resource automatic speech recognition (ASR) and spoken language understanding (SLU) tasks. \nFor ASR, LaSyn improves an E2E baseline trained on LibriSpeech train-clean-100, with relative word error rate reductions over 22.3\\% on different test sets. \nFor SLU, LaSyn improves our E2E baseline by absolute 4.1\\% for intent classification accuracy and 3.8\\% for slot filling SLU-F1 on SLURP, and absolute 4.49\\% and 2.25\\% for exact match (EM) and EM-Tree accuracies on STOP respectively.\nWith fewer parameters, the results of LaSyn are competitive to published state-of-the-art works. \nThe results demonstrate the quality of the augmented training data.", "keywords": "joint speech-text learning;spoken language understanding;speech recognition", "primary_area": "", "supplementary_material": "", "author": "Jianqiao Lu;Wenyong Huang;Nianzu Zheng;Xingshan Zeng;Yu Ting Yeung;Xiao Chen", "authorids": "~Jianqiao_Lu1;~Wenyong_Huang1;~Nianzu_Zheng1;~Xingshan_Zeng1;~Yu_Ting_Yeung1;~Xiao_Chen7", "gender": "M;M;M;M;;M", "homepage": "https://jianqiaolu.github.io/;;;;;", "dblp": "358/4791;;257/1336;220/2024;47/9236;05/3054-12", "google_scholar": "uIW6d6AAAAAJ;z8UoSOwAAAAJ;sdN6POIAAAAJ;Ca08I6AAAAAJ;kEJVbaYAAAAJ;", "or_profile": "~Jianqiao_Lu1;~Wenyong_Huang1;~Nianzu_Zheng1;~Xingshan_Zeng1;~Yu_Ting_Yeung1;~XIAO_CHEN5", "aff": "University of Hong Kong;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Technologies Ltd.;Huawei Noah's Ark Lab;Huawei Technologies Ltd.", "aff_domain": "hku.hk;huawei.com;huawei.com;huawei.com;huawei.com;huawei.com", "position": "PhD student;Researcher;Researcher;Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nlu2023improving,\ntitle={Improving End-to-End Speech Processing by Efficient Text Data Utilization with Latent Synthesis},\nauthor={Jianqiao Lu and Wenyong Huang and Nianzu Zheng and Xingshan Zeng and Yu Ting Yeung and Xiao Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=iLTNcB3601}\n}", "github": "", "project": "", "reviewers": "tecw;Paaz;1c8Z", "site": "https://openreview.net/forum?id=iLTNcB3601", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;5;4", "excitement": "4;2;3", "reproducibility": "4;4;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-6300-9684;;;", "linkedin": "jianqiao-lu-308620201/;;;;https://hk.linkedin.com/in/yeungyuting;xiao-chen-065185167/", "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "University of Hong Kong;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.hku.hk;https://www.huawei.com", "aff_unique_abbr": "HKU;Huawei", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "iMnwXQemEr", "title": "Discovering Universal Geometry in Embeddings with ICA", "track": "main", "status": "Long Main", "tldr": "", "abstract": "This study utilizes Independent Component Analysis (ICA) to unveil a consistent semantic structure within embeddings of words or images. Our approach extracts independent semantic components from the embeddings of a pre-trained model by leveraging anisotropic information that remains after the whitening process in Principal Component Analysis (PCA). We demonstrate that each embedding can be expressed as a composition of a few intrinsic interpretable axes and that these semantic axes remain consistent across different languages, algorithms, and modalities. The discovery of a universal semantic structure in the geometric patterns of embeddings enhances our understanding of the representations in embeddings.", "keywords": "Embeddings;Independent Component Analysis;Principal Component Analysis;Cross-lingual;Interpretability;Isotropy;Whitening", "primary_area": "", "supplementary_material": "", "author": "Hiroaki Yamagiwa;Momose Oyama;Hidetoshi Shimodaira", "authorids": "~Hiroaki_Yamagiwa1;~Momose_Oyama2;~Hidetoshi_Shimodaira1", "gender": "M;M;M", "homepage": "https://ymgw55.github.io/;https://momoseoyama.github.io/;http://stat.sys.i.kyoto-u.ac.jp/", "dblp": "333/0809;336/5078.html;19/3393", "google_scholar": "k5m5X-EAAAAJ;https://scholar.google.co.jp/citations?user=NWFbcG4AAAAJ;LvoIaIsAAAAJ", "or_profile": "~Hiroaki_Yamagiwa1;~Momose_Oyama2;~Hidetoshi_Shimodaira1", "aff": "Kyoto University, Kyoto University;Kyoto University;RIKEN", "aff_domain": "i.kyoto-u.ac.jp;i.kyoto-u.ac.jp;riken.jp", "position": "PhD student;MS student;Researcher", "bibtex": "@inproceedings{\nyamagiwa2023discovering,\ntitle={Discovering Universal Geometry in Embeddings with {ICA}},\nauthor={Hiroaki Yamagiwa and Momose Oyama and Hidetoshi Shimodaira},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=iMnwXQemEr}\n}", "github": "", "project": "", "reviewers": "QYwU;fVPb;PoMJ", "site": "https://openreview.net/forum?id=iMnwXQemEr", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "4;4;4", "reproducibility": "3;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-3371-7724", "linkedin": ";;shimo/", "aff_unique_index": "0;0;1", "aff_unique_norm": "Kyoto University;RIKEN", "aff_unique_dep": ";", "aff_unique_url": "https://www.kyoto-u.ac.jp;https://www.riken.jp", "aff_unique_abbr": "Kyoto U;RIKEN", "aff_campus_unique_index": "0", "aff_campus_unique": "Kyoto;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "id": "iO5YOddOyG", "title": "Is ChatGPT a Good Multi-Party Conversation Solver?", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large Language Models (LLMs) have emerged as influential instruments within the realm of natural language processing; nevertheless, their capacity to handle multi-party conversations (MPCs) \u2013 a scenario marked by the presence of multiple interlocutors involved in intricate information exchanges \u2013 remains uncharted. In this paper, we delve into the potential of generative LLMs such as ChatGPT and GPT-4 within the context of MPCs. An empirical analysis is conducted to assess the zero-shot learning capabilities of ChatGPT and GPT-4 by subjecting them to evaluation across three MPC datasets that encompass five representative tasks. The findings reveal that ChatGPT\u2019s performance on a number of evaluated MPC tasks leaves much to be desired, whilst GPT-4\u2019s results portend a promising future. Additionally, we endeavor to bolster performance through the incorporation of MPC structures, encompassing both speaker and addressee architecture. This study provides an exhaustive evaluation and analysis of applying generative LLMs to MPCs, casting a light upon the conception and creation of increasingly effective and robust MPC agents. Concurrently, this work underscores the challenges implicit in the utilization of LLMs for MPCs, such as deciphering graphical information flows and generating stylistically consistent responses.", "keywords": "large language models;multi-party conversations;zero-shot;in-context learning", "primary_area": "", "supplementary_material": "", "author": "Chao-Hong Tan;Jia-Chen Gu;Zhen-Hua Ling", "authorids": "~Chao-Hong_Tan1;~Jia-Chen_Gu1;~Zhen-Hua_Ling1", "gender": ";M;M", "homepage": ";https://jasonforjoy.github.io/;http://staff.ustc.edu.cn/~zhling/", "dblp": "282/0435;93/3604.html;70/5210", "google_scholar": "FkWdcrcAAAAJ;https://scholar.google.com/citations?hl=en;f8jRR3EAAAAJ", "or_profile": "~Chao-Hong_Tan1;~Jia-Chen_Gu1;~Zhen-Hua_Ling1", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;ustc.edu.cn;ustc.edu.cn", "position": "PhD student;Postdoc;Professor", "bibtex": "@inproceedings{\ntan2023is,\ntitle={Is Chat{GPT} a Good Multi-Party Conversation Solver?},\nauthor={Chao-Hong Tan and Jia-Chen Gu and Zhen-Hua Ling},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=iO5YOddOyG}\n}", "github": "", "project": "", "reviewers": "2d2w;CWvj;pXrH", "site": "https://openreview.net/forum?id=iO5YOddOyG", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;2;2", "reproducibility": "3;4;4", "correctness": "3;2;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 2.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ustc.edu.cn", "aff_unique_abbr": "USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "iRISsJCzTA", "title": "Controllable Chest X-Ray Report Generation from Longitudinal Representations", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Radiology reports are detailed text descriptions of the content of medical scans. Each report describes the presence/absence and location of relevant clinical findings, commonly including comparison with prior exams of the same patient to describe how they evolved. Radiology reporting is a time-consuming process, and scan results are often subject to delays. One strategy to speed up reporting is to integrate automated reporting systems, however clinical deployment requires high accuracy and interpretability. Previous approaches to automated radiology reporting generally do not provide the prior study as input, precluding comparison which is required for clinical accuracy in some types of scans, and offer only unreliable methods of interpretability. Therefore, leveraging an existing visual input format of anatomical tokens, we introduce two novel aspects: (1) longitudinal representation learning \u2012 we input the prior scan as an additional input, proposing a method to align, concatenate and fuse the current and prior visual information into a joint longitudinal representation which can be provided to the multimodal report generation model; (2) sentence-anatomy dropout \u2012 a training strategy for controllability in which the report generator model is trained to predict only sentences from the original report which correspond to the subset of anatomical regions given as input.\nWe show through in-depth experiments on the MIMIC-CXR dataset how the proposed approach achieves state-of-the-art results while enabling anatomy-wise controllable report generation.", "keywords": "Controllable Report Generation;Longitudinal Chest X-Rays;Multimodal Transformer", "primary_area": "", "supplementary_material": "", "author": "Francesco Dalla Serra;Chaoyang Wang;Fani Deligianni;Jeff Dalton;Alison Q O'Neil", "authorids": "~Francesco_Dalla_Serra1;~Chaoyang_Wang4;~Fani_Deligianni4;~Jeff_Dalton3;~Alison_Q_O'Neil1", "gender": ";;F;M;", "homepage": "https://www.gla.ac.uk/pgrs/francescodallaserra/;;https://www.gla.ac.uk/schools/computing/staff/fanideligianni/;https://www.gla.ac.uk/schools/computing/staff/jeffdalton/;", "dblp": ";;;05/2762-1;", "google_scholar": ";;https://scholar.google.co.uk/citations?user=Uw6VosgAAAAJ;mgwLi-EAAAAJ;", "or_profile": "~Francesco_Dalla_Serra1;~Chaoyang_Wang4;~Fani_Deligianni4;~Jeff_Dalton3;~Alison_Q_O'Neil1", "aff": "University of Glasgow;;University of Glasgow;University of Glasgow;", "aff_domain": "gla.ac.uk;;glasgow.ac.uk;glasgow.ac.uk;", "position": "PhD student;;Lecturer;Associate Professor;", "bibtex": "@inproceedings{\nserra2023controllable,\ntitle={Controllable Chest X-Ray Report Generation from Longitudinal Representations},\nauthor={Francesco Dalla Serra and Chaoyang Wang and Fani Deligianni and Jeff Dalton and Alison Q O'Neil},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=iRISsJCzTA}\n}", "github": "", "project": "", "reviewers": "1BQa;pwef;kJZG", "site": "https://openreview.net/forum?id=iRISsJCzTA", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;4;5", "reproducibility": "5;4;4", "correctness": "3;3;5", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-1306-5017;0000-0003-2422-8651;", "linkedin": ";;;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Glasgow", "aff_unique_dep": "", "aff_unique_url": "https://www.gla.ac.uk", "aff_unique_abbr": "Glasgow", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "iRIj0OvFG1", "title": "Intuitive Multilingual Audio-Visual Speech Recognition with a Single-Trained Model", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "We present a novel approach to multilingual audio-visual speech recognition tasks by introducing a single model on a multilingual dataset. Motivated by a human cognitive system where humans can intuitively distinguish different languages without any conscious effort or guidance, we propose a model that can capture which language is given as an input speech by distinguishing the inherent similarities and differences between languages. To do so, we design a prompt fine-tuning technique into the largely pre-trained audio-visual representation model so that the network can recognize the language class as well as the speech with the corresponding language. Our work contributes to developing robust and efficient multilingual audio-visual speech recognition systems, reducing the need for language-specific models.", "keywords": "audio-visual speech recognition;speech recognition;multimodal;multilingual", "primary_area": "", "supplementary_material": "", "author": "Joanna Hong;Se Jin Park;Yong Man Ro", "authorids": "~Joanna_Hong1;~Se_Jin_Park2;~Yong_Man_Ro3", "gender": "F;F;M", "homepage": "https://joannahong.github.io/;https://sites.google.com/view/sejinpark/sejinpark;https://www.ivllab.kaist.ac.kr/people/professor", "dblp": "255/6341;40/297;02/1221", "google_scholar": "https://scholar.google.com/citations?hl=en;X-SyELwAAAAJ;https://scholar.google.co.kr/citations?user=IPzfF7cAAAAJ", "or_profile": "~Joanna_Hong1;~Se_Jin_Park2;~Yong_Man_Ro1", "aff": "Korea Advanced Institute of Science & Technology;KAIST;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;ee.kaist.ac.kr;kaist.ac.kr", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nhong2023intuitive,\ntitle={Intuitive Multilingual Audio-Visual Speech Recognition with a Single-Trained Model},\nauthor={Joanna Hong and Se Jin Park and Yong Man Ro},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=iRIj0OvFG1}\n}", "github": "", "project": "", "reviewers": "oy3M;EVgV;djfB", "site": "https://openreview.net/forum?id=iRIj0OvFG1", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;2", "excitement": "4;2;2", "reproducibility": "3;3;3", "correctness": "4;2;2", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-4182-1000;0000-0001-8467-3576;0000-0001-5306-6853", "linkedin": "joanna-hong-330a751a2/;se-jin-park-497546266;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "iVINvItqhb", "title": "GradSim: Gradient-Based Language Grouping for Effective Multilingual Training", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Most languages of the world pose low-resource challenges to natural language processing models. With multilingual training, knowledge can be shared among languages. However, not all languages positively influence each other and it is an open research question how to select the most suitable set of languages for multilingual training and avoid negative interference among languages whose characteristics or data distributions are not compatible. In this paper, we propose GradSim, a language grouping method based on gradient similarity. Our experiments on three diverse multilingual benchmark datasets show that it leads to the largest performance gains compared to other similarity measures and it is better correlated with cross-lingual model performance. As a result, we set the new state of the art on AfriSenti, a benchmark dataset for sentiment analysis on low-resource African languages. In our extensive analysis, we further reveal that besides linguistic features, the topics of the datasets play an important role for language grouping and that lower layers of transformer models encode language-specific features while higher layers capture task-specific information.", "keywords": "Effective multilingual learning;language grouping;gradient-based similarity", "primary_area": "", "supplementary_material": "", "author": "Mingyang Wang;Heike Adel;Lukas Lange;Jannik Str\u00f6tgen;Hinrich Schuetze", "authorids": "~Mingyang_Wang1;~Heike_Adel1;~Lukas_Lange1;~Jannik_Str\u00f6tgen1;~Hinrich_Schuetze3", "gender": ";F;M;;M", "homepage": "https://mingyang-wang26.github.io/;https://sites.google.com/view/heikeadel;;https://sites.google.com/view/jannikstroetgen;https://www.cis.uni-muenchen.de/schuetze/", "dblp": ";132/6980;219/5288;28/8510;s/HinrichSchutze", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.de/citations?user=Fejbq9kAAAAJ;https://scholar.google.co.in/citations?user=yBM4CMcAAAAJ;https://scholar.google.de/citations?user=aQjqBSsAAAAJ;", "or_profile": "~Mingyang_Wang1;~Heike_Adel1;~Lukas_Lange1;~Jannik_Str\u00f6tgen1;~Hinrich_Schuetze3", "aff": "LMU Munich;Robert Bosch GmbH, Bosch;Robert Bosch GmbH, Bosch;Robert Bosch GmbH, Bosch;Center for Information and Language Processing", "aff_domain": "campus.lmu.de;de.bosch.com;de.bosch.com;de.bosch.com;lmu.de", "position": "PhD student;Research scientist;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nwang2023gradsim,\ntitle={GradSim: Gradient-Based Language Grouping for Effective Multilingual Training},\nauthor={Mingyang Wang and Heike Adel and Lukas Lange and Jannik Str{\\\"o}tgen and Hinrich Schuetze},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=iVINvItqhb}\n}", "github": "", "project": "", "reviewers": "5L1B;VWWR;J9J7", "site": "https://openreview.net/forum?id=iVINvItqhb", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;3", "excitement": "3;4;3", "reproducibility": "4;4;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "mingyang-wang-873a7a221/;;;;", "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "Ludwig Maximilian University of Munich;Robert Bosch GmbH;Center for Information and Language Processing", "aff_unique_dep": ";;", "aff_unique_url": "https://www.lmu.de;https://www.bosch.com;", "aff_unique_abbr": "LMU;Bosch;", "aff_campus_unique_index": "0", "aff_campus_unique": "Munich;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Germany;" }, { "id": "iWVpissNEP", "title": "Towards Building More Robust NER datasets: An Empirical Study on NER Dataset Bias from a Dataset Difficulty View", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recently, many studies have illustrated the robustness problem of Named Entity Recognition (NER) systems: the NER models often rely on superficial entity patterns for predictions, without considering evidence from the context. Consequently, even state-of-the-art NER models generalize poorly to out-of-domain scenarios when out-of-distribution (OOD) entity patterns are introduced. Previous research attributes the robustness problem to the existence of NER dataset bias, where simpler and regular entity patterns induce shortcut learning. In this work, we bring new insights into this problem by comprehensively investigating the NER dataset bias from a dataset difficulty view. We quantify the entity-context difficulty distribution in existing datasets and explain their relationship with model robustness. Based on our findings, we explore three potential ways to de-bias the NER datasets by altering entity-context distribution, and we validate the feasibility with intensive experiments. Finally, we show that the de-biased datasets can transfer to different models and even benefit existing model-based robustness-improving methods, indicating that building more robust datasets is fundamental for building more robust NER systems.", "keywords": "Robustness;OOD Generalization;Dataset Bias;NER", "primary_area": "", "supplementary_material": "", "author": "Ruotian Ma;Xiaolei Wang;Xin Zhou;Qi Zhang;Xuanjing Huang", "authorids": "~Ruotian_Ma1;~Xiaolei_Wang4;~Xin_Zhou6;~Qi_Zhang8;~Xuanjing_Huang1", "gender": "F;F;;M;F", "homepage": ";https://github.com/bigmomo-1;;http://qizhang.info;https://xuanjing-huang.github.io/", "dblp": "246/3164;;05/3403-12;52/323-1;05/6735-1", "google_scholar": "lD66qJYAAAAJ;;8AWfEb0AAAAJ;XfqR3yYAAAAJ;RGsMgZA4H78C", "or_profile": "~Ruotian_Ma1;~Xiaolei_Wang4;~Xin_Zhou6;~Qi_Zhang8;~Xuanjing_Huang1", "aff": "Fudan University;Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "PhD student;MS student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nma2023towards,\ntitle={Towards Building More Robust {NER} datasets: An Empirical Study on {NER} Dataset Bias from a Dataset Difficulty View},\nauthor={Ruotian Ma and Xiaolei Wang and Xin Zhou and Qi Zhang and Xuanjing Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=iWVpissNEP}\n}", "github": "", "project": "", "reviewers": "zR24;keJx;kCPH", "site": "https://openreview.net/forum?id=iWVpissNEP", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "4;4;3", "reproducibility": "3;5;4", "correctness": "3;4;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0001-9197-9426", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "iammae3CbG", "title": "Prototype-based HyperAdapter for Sample-Efficient Multi-task Tuning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Parameter-efficient fine-tuning (PEFT) has shown its effectiveness in adapting the pre-trained language models to downstream tasks while only updating a small number of parameters. Despite the success, most existing methods independently adapt to each task without considering knowledge transfer between tasks and are limited to low-data regimes. To overcome this issue, we propose Prototype-based HyperAdapter (PHA), a novel framework built on the adapter-tuning and hypernetwork. It introduces an instance-dense retriever and a prototypical hypernetwork to generate the conditional modules in a sample-efficient manner. This leads to comparable performance improvements against existing PEFT methods on multi-task learning and few-shot transfer learning. More importantly, when the available data size gets smaller, our method outperforms other strong baselines by a large margin. Based on our extensive empirical experiments across various datasets, we demonstrate that PHA strikes a better trade-off between trainable parameters, accuracy on stream tasks, and sample efficiency. Our code is publicly available at https://github.com/Bumble666/PHA", "keywords": "Multi-task learning;Fine-tuning;Sample-efficiency;Prototype learning", "primary_area": "", "supplementary_material": "", "author": "Hao Zhao;Jie Fu;Zhaofeng He", "authorids": "~Hao_Zhao5;~Jie_Fu2;~Zhaofeng_He1", "gender": ";;M", "homepage": ";;https://teacher.bupt.edu.cn/zhaofenghe/zh_CN/index.htm", "dblp": ";;13/3992", "google_scholar": ";;https://scholar.google.com.hk/citations?hl=zh-CN", "or_profile": "~Hao_Zhao5;~Jie_Fu2;~Zhaofeng_He1", "aff": ";;Beijing University of Post and Telecommunication", "aff_domain": ";;bupt.edu.cn", "position": ";;Full Professor", "bibtex": "@inproceedings{\nzhao2023prototypebased,\ntitle={Prototype-based HyperAdapter for Sample-Efficient Multi-task Tuning},\nauthor={Hao Zhao and Jie Fu and Zhaofeng He},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=iammae3CbG}\n}", "github": "", "project": "", "reviewers": "9tvA;Lu8U;rff6;JcAH", "site": "https://openreview.net/forum?id=iammae3CbG", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "4;3;4;4", "excitement": "4;4;3;4", "reproducibility": "4;4;4;4", "correctness": "4;4;4;4", "rating_avg": 5.0, "confidence_avg": 3.75, "excitement_avg": 3.75, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-3433-8435", "linkedin": ";;", "aff_unique_index": "0", "aff_unique_norm": "Beijing University of Posts and Telecommunications", "aff_unique_dep": "", "aff_unique_url": "http://www.bupt.edu.cn/", "aff_unique_abbr": "BUPT", "aff_campus_unique_index": "0", "aff_campus_unique": "Beijing", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "iaxdEnxgju", "title": "FaLA: Fast Linear Adaptation for Replacing Backbone Models on Edge Devices", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In this work, we study the language model backbone replacement problem for personalized downstream tasks in a non-stationary on-device scenario. In real world, company may periodically update the knowledge and architectures of backbones to keep the competitive in the market, meanwhile, to accommodate the users' own preference, models are personalized to fit users' own distribution locally. Traditional full model tuning or transfer learning for such replacements often incur considerable local device training costs and necessitate extensive backpropagation within deep transformer layers. Addressing this issue, we propose a novel, lightweight tuning method for personalized NLP classification tasks post-backbone replacement. Our approach leverages a personalized matrix calculated from documents corresponding to users' old and new backbones. This matrix facilitates top-layer parameter tuning, drastically reducing backpropagation computation. To further mitigate training costs associated with matrix linear optimization, we employ correlation clustering to curate a few examples from personalized cluster sets for individuals. Our method achieves over 1000 times computation reduction in Flops for backpropagation and brings the user-specific initialization for personal matrix yielding significant performance boost compared with popular transfer learning methods.", "keywords": "Foundation Models;Personalization;Efficient Parameter Tuning;On device", "primary_area": "", "supplementary_material": "", "author": "Shuo Huang;Lizhen Qu;Xingliang YUAN;Chunyang Chen", "authorids": "~Shuo_Huang3;~Lizhen_Qu2;~Xingliang_YUAN2;~Chunyang_Chen1", "gender": "M;M;;", "homepage": ";https://research.monash.edu/en/persons/lizhen-qu;http://xyuancs.github.io;https://chunyang-chen.github.io/", "dblp": "85/5969-4;58/3601;21/8884;180/7246.html", "google_scholar": "https://scholar.google.com.au/citations?user=GIwxbSAAAAAJ;https://scholar.google.com.au/citations?user=cHXZgHUAAAAJ;https://scholar.google.com.hk/citations?user=81yWaCoAAAAJ;3tyGlPsAAAAJ", "or_profile": "~Shuo_Huang3;~Lizhen_Qu2;~Xingliang_YUAN2;~Chunyang_Chen1", "aff": "Monash University;Monash University;Monash University;Monash University", "aff_domain": "monash.edu;monash.edu.au;monash.edu;monash.edu", "position": "PhD student;Lecturer;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nhuang2023fala,\ntitle={Fa{LA}: Fast Linear Adaptation for Replacing Backbone Models on Edge Devices},\nauthor={Shuo Huang and Lizhen Qu and Xingliang YUAN and Chunyang Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=iaxdEnxgju}\n}", "github": "", "project": "", "reviewers": "y97i;GxQw;MvrZ", "site": "https://openreview.net/forum?id=iaxdEnxgju", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;2", "excitement": "2;4;4", "reproducibility": "4;3;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0008-3736-3207;0000-0002-7764-431X;0000-0002-3701-4946;", "linkedin": ";lizhen-qu-50017717/;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Monash University", "aff_unique_dep": "", "aff_unique_url": "https://www.monash.edu", "aff_unique_abbr": "Monash", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Australia" }, { "id": "ifuvyCdLro", "title": "Medical Text Simplification: Optimizing for Readability with Unlikelihood Training and Reranked Beam Search Decoding", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Text simplification has emerged as an increasingly useful application of AI for bridging the communication gap in specialized fields such as medicine, where the lexicon is often dominated by technical jargon and complex constructs. Despite notable progress, methods in medical simplification sometimes result in the generated text having lower quality and diversity. In this work, we explore ways to further improve the readability of text simplification in the medical domain. We propose (1) a new unlikelihood loss that encourages generation of simpler terms and (2) a reranked beam search decoding method that optimizes for simplicity, which achieve better performance on readability metrics on three datasets. This study's findings offer promising avenues for improving text simplification in the medical field.", "keywords": "Medical Text;Simplification;Healthcare;Beam Search Decoding;Unlikelihood Learning", "primary_area": "", "supplementary_material": "", "author": "Lorenzo Jaime Yu Flores;Heyuan Huang;Kejian Shi;Sophie Chheang;Arman Cohan", "authorids": "~Lorenzo_Jaime_Yu_Flores1;~Heyuan_Huang1;~Kejian_Shi2;~Sophie_Chheang1;~Arman_Cohan1", "gender": "M;F;;F;M", "homepage": "https://ljyflores.github.io/;https://heyuan919.github.io/;;https://medicine.yale.edu/profile/sophie-chheang/;http://www.armancohan.com", "dblp": "310/1671;;;;160/1727", "google_scholar": "LEo5IBIAAAAJ;https://scholar.google.com/citations?hl=en;;;https://scholar.google.com/citations?hl=en", "or_profile": "~Lorenzo_Jaime_Yu_Flores1;~Heyuan_Huang1;~Kejian_Shi2;~Sophie_Chheang1;~Arman_Cohan1", "aff": ";Yale University;;Yale University;Allen Institute for Artificial Intelligence", "aff_domain": ";yale.edu;;yale.edu;allenai.org", "position": ";MS student;;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\nflores2023medical,\ntitle={Medical Text Simplification: Optimizing for Readability with Unlikelihood Training and Reranked Beam Search Decoding},\nauthor={Lorenzo Jaime Yu Flores and Heyuan Huang and Kejian Shi and Sophie Chheang and Arman Cohan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ifuvyCdLro}\n}", "github": "", "project": "", "reviewers": "8a31;jGDu;wDdq", "site": "https://openreview.net/forum?id=ifuvyCdLro", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;3;4", "excitement": "3;4;2", "reproducibility": "2;4;2", "correctness": "4;4;2", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-9727-665X;;;;", "linkedin": ";;;;", "aff_unique_index": "0;0;1", "aff_unique_norm": "Yale University;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.yale.edu;https://allenai.org", "aff_unique_abbr": "Yale;AI2", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "ii9ZoryPH2", "title": "DecoMT: Decomposed Prompting for Machine Translation Between Related Languages using Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "This study investigates machine translation between related languages i.e., languages within the same family that share linguistic characteristics such as word order and lexical similarity. Machine translation through few-shot prompting leverages a small set of translation pair examples to generate translations for test sentences. This procedure requires the model to learn how to generate translations while simultaneously ensuring that token ordering is maintained to produce a fluent and accurate translation. We propose that for related languages, the task of machine translation can be simplified by leveraging the monotonic alignment characteristic of such languages. We introduce DecoMT, a novel approach of few-shot prompting that decomposes the translation process into a sequence of word chunk translations. Through automatic and human evaluation conducted on multiple related language pairs across various language families, we demonstrate that our proposed approach of decomposed prompting surpasses multiple established few-shot baseline approaches. \nFor example, DecoMT outperforms the strong few-shot prompting BLOOM model with an average improvement of 8 chrF++ scores across the examined languages.", "keywords": "NLP;LLM;few-shot prompting;Machine Translation", "primary_area": "", "supplementary_material": "", "author": "Ratish Puduppully;Anoop Kunchukuttan;Raj Dabre;AiTi Aw;Nancy F. Chen", "authorids": "~Ratish_Puduppully1;~Anoop_Kunchukuttan1;~Raj_Dabre1;~AiTi_Aw1;~Nancy_F._Chen1", "gender": "M;;M;;", "homepage": "https://ratishsp.github.io/;http://anoopk.in/;;;http://alum.mit.edu/www/nancychen", "dblp": "165/0748;126/8631;127/0168;;84/8761", "google_scholar": "https://scholar.google.co.uk/citations?user=FrB_UMIAAAAJ;jnoUuGcAAAAJ;https://scholar.google.co.jp/citations?user=x91u618AAAAJ;;https://scholar.google.com.sg/citations?user=K3Z9UiAAAAAJ", "or_profile": "~Ratish_Puduppully1;~Anoop_Kunchukuttan1;~Raj_Dabre1;~AiTi_Aw1;~Nancy_F._Chen1", "aff": "A*STAR;Microsoft;National Institute of Information and Communications Technology (NICT), National Institute of Advanced Industrial Science and Technology;;I2R, A*STAR", "aff_domain": "a-star.edu.sg;microsoft.com;nict.go.jp;;i2r.a-star.edu.sg", "position": "Researcher;Senior Applied Researcher;Postdoc;;Principal Researcher", "bibtex": "@inproceedings{\npuduppully2023decomt,\ntitle={Deco{MT}: Decomposed Prompting for Machine Translation Between Related Languages using Large Language Models},\nauthor={Ratish Puduppully and Anoop Kunchukuttan and Raj Dabre and AiTi Aw and Nancy F. Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ii9ZoryPH2}\n}", "github": "", "project": "", "reviewers": "C2ZS;TKfW;F6Qp", "site": "https://openreview.net/forum?id=ii9ZoryPH2", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;3;2", "reproducibility": "4;4;3", "correctness": "3;4;2", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0003-0872-5877", "linkedin": "ratishsp/;anoopkunchukuttan/;;;nancy-chen-4644865/?originalSubdomain=sg", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Agency for Science, Technology and Research;Microsoft;National Institute of Information and Communications Technology;A*STAR", "aff_unique_dep": ";Microsoft Corporation;;Institute for Infocomm Research", "aff_unique_url": "https://www.a-star.edu.sg;https://www.microsoft.com;https://www.nict.go.jp/;https://www.a-star.edu.sg", "aff_unique_abbr": "A*STAR;Microsoft;NICT;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "Singapore;United States;Japan" }, { "id": "iiWP7khhwP", "title": "Long-Range Language Modeling with Selective Cache", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The computational cost of transformer-based language models grows quadratically with the sequence length. In this paper, we introduce the selective cache, which stores the selected key-value pairs from the previous context. By selecting important key-value pairs the model makes better use of the cache so that in limited cache size, a longer context history can be stored. We design three kinds of selection methods. The first is based on human language processing. The key-value pairs are selected if they correspond to tokens that are fixated longer, as recorded in eye-tracking-while-reading experiments. We also incorporate the cognitively-inspired selection process into the language model as a trainable process, resulting in two additional methods with improved performance. The selection task is converted into a pruning task so they can be trained with differentiable masks. We demonstrate that the proposed selective cache improves the language modeling performance across different datasets. With the same number of stored key-value pairs (cache size), our selective cache outperforms XL cache and compressive cache by considerable margins.", "keywords": "Language Modeling;Long Dependency;Long-term Memory", "primary_area": "", "supplementary_material": "", "author": "Xinting Huang;Nora Hollenstein", "authorids": "~Xinting_Huang2;~Nora_Hollenstein1", "gender": "M;F", "homepage": ";https://norahollenstein.github.io/", "dblp": "240/7147;154/4482", "google_scholar": "BpCALOYAAAAJ;https://scholar.google.ch/citations?user=vxvmkskAAAAJ", "or_profile": "~Xinting_Huang2;~Nora_Hollenstein1", "aff": "Copenhagen University;University of Copenhagen", "aff_domain": "ku.dk;ku.dk", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\nhuang2023longrange,\ntitle={Long-Range Language Modeling with Selective Cache},\nauthor={Xinting Huang and Nora Hollenstein},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=iiWP7khhwP}\n}", "github": "", "project": "", "reviewers": "WTsa;ziVP;voB6", "site": "https://openreview.net/forum?id=iiWP7khhwP", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;3;2", "excitement": "4;3;3", "reproducibility": "4;4;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-7936-4170", "linkedin": ";norahollenstein/", "aff_unique_index": "0;0", "aff_unique_norm": "University of Copenhagen", "aff_unique_dep": "", "aff_unique_url": "https://www.ku.dk", "aff_unique_abbr": "UCPH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Denmark" }, { "id": "iipuAqcPGL", "title": "Can Large Language Models Capture Dissenting Human Voices?", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) have shown impressive achievements in solving a broad range of tasks. Augmented by instruction fine-tuning, LLMs have also been shown to generalize in zero-shot settings as well. However, whether LLMs closely align with the human disagreement distribution has not been well-studied, especially within the scope of natural language inference (NLI). In this paper, we evaluate the performance and alignment of LLM distribution with humans using two different techniques to estimate the multinomial distribution: Monte Carlo Estimation (MCE) and Log Probability Estimation (LPE). As a result, we show LLMs exhibit limited ability in solving NLI tasks and simultaneously fail to capture human disagreement distribution. The inference and human alignment performances plunge even further on data samples with high human disagreement levels, raising concerns about their natural language understanding (NLU) ability and their representativeness to a larger human population.", "keywords": "Large Language Model;Natural Language Inference;Human Disagreement", "primary_area": "", "supplementary_material": "", "author": "Noah Lee;Na Min An;James Thorne", "authorids": "~Noah_Lee5;~Na_Min_An1;~James_Thorne1", "gender": ";F;", "homepage": "https://nlee-208.github.io/;https://namin-an.github.io/;https://jamesthorne.com", "dblp": ";348/4956;204/1380", "google_scholar": "8o7yxLAAAAAJ;71R1rCgAAAAJ;hao9RrgAAAAJ", "or_profile": "~Noah_Lee5;~Na_Min_An1;~James_Thorne1", "aff": "KAIST;KAIST;KAIST", "aff_domain": "kaist.ac.kr;ee.kaist.ac.kr;kaist.ac.kr", "position": "MS student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nlee2023can,\ntitle={Can Large Language Models Capture Dissenting Human Voices?},\nauthor={Noah Lee and Na Min An and James Thorne},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=iipuAqcPGL}\n}", "github": "", "project": "", "reviewers": "UZ9d;DgQC;qn54", "site": "https://openreview.net/forum?id=iipuAqcPGL", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "4;3;4", "reproducibility": "2;4;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0005-1566-0655;", "linkedin": "nlee228/;namin-an-0202/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "ilCMZV0Qdl", "title": "Exploiting Emotion-Semantic Correlations for Empathetic Response Generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Empathetic response generation aims to generate empathetic responses by understanding the speaker's emotional feelings from the language of dialogue. Recent methods capture emotional words in the language of communicators and construct them as static vectors to perceive nuanced emotions. However, linguistic research has shown that emotional words in language are dynamic and have correlations with other grammar semantic roles, i.e., words with semantic meanings, in grammar. Previous methods overlook these two characteristics, which easily lead to misunderstandings of emotions and neglect of key semantics.\n\nTo address this issue, we propose a dynamical Emotion-Semantic Correlation Model (ESCM) for empathetic dialogue generation tasks. ESCM constructs dynamic emotion-semantic vectors through the interaction of context and emotions. We introduce dependency trees to reflect the correlations between emotions and semantics. Based on dynamic emotion-semantic vectors and dependency trees, we propose a dynamic correlation graph convolutional network to guide the model in learning context meanings in dialogue and generating empathetic responses. Experimental results on the EMPATHETIC-DIALOGUES dataset show that ESCM understands semantics and emotions more accurately and expresses fluent and informative empathetic responses. Our analysis results also indicate that the correlations between emotions and semantics are frequently used in dialogues, which is of great significance for empathetic perception and expression.", "keywords": "Emotion;Semantic;Correlation;Empathetic;Dialogue;Generation", "primary_area": "", "supplementary_material": "", "author": "Zhou Yang;Zhaochun Ren;Wang Yufeng;Xiaofei Zhu;Zhihao Chen;Tiecheng Cai;Wu Yunbing;Yisong Su;Sibo Ju;Xiangwen Liao", "authorids": "~Zhou_Yang6;~Zhaochun_Ren1;~Wang_Yufeng3;~Xiaofei_Zhu2;~Zhihao_Chen5;~Tiecheng_Cai1;~Wu_Yunbing1;~Yisong_Su1;~Sibo_Ju1;~Xiangwen_Liao1", "gender": "M;M;M;M;M;M;M;M;;M", "homepage": "https://github.com/zhouzhouyang520/zhouzhouyang520.git;https://renzhaochun.github.io/;https://yjsy.fzu.edu.cn/;https://www.atailab.cn;;;https://ccds.fzu.edu.cn/info/1203/5019.htm;https://github.com/Thewillman;;", "dblp": ";58/10440;;23/8495.html;;;;;;50/6801", "google_scholar": "BkLn20wAAAAJ;fPcIPt0AAAAJ;;HggZ_tYAAAAJ;;https://scholar.google.com.hk/citations?user=O3Lv5PcAAAAJ;;;;", "or_profile": "~Zhou_Yang6;~Zhaochun_Ren1;~Wang_Yufeng3;~Xiaofei_Zhu2;~Zhihao_Chen5;~Tiecheng_Cai1;~Wu_Yunbing1;~Yisong_Su1;~Sibo_Ju1;~Xiangwen_Liao1", "aff": "Fuzhou University;Shandong University;Fuzhou University;Chongqing University of Technology;Fuzhou University;Fuzhou University;;Fuzhou University;;Fuzhou University", "aff_domain": "fzu.edu.cn;sdu.edu.cn;fzu.edu.cn;cqut.edu.cn;fzu.edu.cn;fzu.edu.cn;;fzu.edu.cn;;fzu.edu.cn", "position": "PhD student;Full Professor;MS student;Full Professor;PhD student;PhD student;;MS student;;Full Professor", "bibtex": "@inproceedings{\nyang2023exploiting,\ntitle={Exploiting Emotion-Semantic Correlations for Empathetic Response Generation},\nauthor={Zhou Yang and Zhaochun Ren and Wang Yufeng and Xiaofei Zhu and Zhihao Chen and Tiecheng Cai and Wu Yunbing and Yisong Su and Sibo Ju and Xiangwen Liao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ilCMZV0Qdl}\n}", "github": "", "project": "", "reviewers": "D6ne;jjZg;BTHY", "site": "https://openreview.net/forum?id=ilCMZV0Qdl", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "3;3;3", "reproducibility": "4;3;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 10, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0005-3741-0649;0000-0002-9076-6565;;0000-0001-8239-7176;0000-0002-5858-1034;;;;;", "linkedin": ";zhaochun-ren-460491296/?locale=nl_NL;;;;;;;;", "aff_unique_index": "0;1;0;2;0;0;0;0", "aff_unique_norm": "Fuzhou University;Shandong University;Chongqing University of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.fznu.edu.cn;http://www.sdu.edu.cn;http://www.cqut.edu.cn", "aff_unique_abbr": "FZU;SDU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "in5xvBrMHv", "title": "Complex Event Schema Induction with Knowledge-Enriched Diffusion Model", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The concept of a complex event schema pertains to the graph structure that represents real-world knowledge of events and their multi-dimensional relationships. However, previous studies on event schema induction have been hindered by challenges such as error propagation and data quality issues. To tackle these challenges, we propose a knowledge-enriched discrete diffusion model. Specifically, we distill the abundant event scenario knowledge of Large Language Models (LLMs) through an object-oriented Python style prompt. We incorporate this knowledge into the training data, enhancing its quality. Subsequently, we employ a discrete diffusion process to generate all nodes and links simultaneously in a non-auto-regressive manner to tackle the problem of error propagation. Additionally, we devise an entity relationship prediction module to complete entity relationships between event arguments. Experimental results demonstrate that our approach achieves outstanding performance across a range of evaluation metrics.", "keywords": "Complex Event Schema Induction;Diffusion Model;Large Language Model", "primary_area": "", "supplementary_material": "", "author": "Yupu Hao;Pengfei Cao;Yubo Chen;Kang Liu;Jiexin Xu;Huaijun Li;Xiaojian Jiang;Jun Zhao", "authorids": "~Yupu_Hao1;~Pengfei_Cao1;~Yubo_Chen1;~Kang_Liu1;~Jiexin_Xu1;~Huaijun_Li1;~Xiaojian_Jiang1;~Jun_Zhao4", "gender": "M;;M;M;F;M;M;M", "homepage": ";https://cpf-nlpr.github.io/;http://www.nlpr.ia.ac.cn/cip/yubochen/index.html;http://www.nlpr.ia.ac.cn/cip/~liukang/index.html;;;;http://nlpr-web.ia.ac.cn/cip/english/~junzhao/index.html", "dblp": ";182/7941;https://dblp.uni-trier.de/pid/90/7879.html;42/4903.html;270/0739;;72/7071;https://dblp.uni-trier.de/pid/47/2026-1.html", "google_scholar": "G8j_yVkAAAAJ;lP5_LJIAAAAJ;https://scholar.google.com.hk/citations?user=9z7GPxIAAAAJ;DtZCfl0AAAAJ;;;https://scholar.google.com.hk/citations?user=s_ih2cYAAAAJ;https://scholar.google.com.hk/citations?user=HljRttwAAAAJ", "or_profile": "~Yupu_Hao1;~Pengfei_Cao1;~Yubo_Chen1;~Kang_Liu1;~Jiexin_Xu1;~Huaijun_Li1;~Xiaojian_Jiang1;~Jun_Zhao4", "aff": "Beijing Institute of Technology;Institute of Automation, Chinese Academy of Sciences;Institute of automation, Chinese academy of science;Institute of Automation, Chinese Academy of Sciences;;;;Institute of automation, Chinese academy of science", "aff_domain": "bit.edu.cn;ia.ac.cn;nlpr.ia.ac.cn;ia.ac.cn;;;;nlpr.ia.ac.cn", "position": "Undergrad student;PhD student;Associate Professor;Professor;;;;Full Professor", "bibtex": "@inproceedings{\nhao2023complex,\ntitle={Complex Event Schema Induction with Knowledge-Enriched Diffusion Model},\nauthor={Yupu Hao and Pengfei Cao and Yubo Chen and Kang Liu and Jiexin Xu and Huaijun Li and Xiaojian Jiang and Jun Zhao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=in5xvBrMHv}\n}", "github": "", "project": "", "reviewers": "VM43;WC1S;XvjL", "site": "https://openreview.net/forum?id=in5xvBrMHv", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;5;4", "excitement": "3;3;3", "reproducibility": "3;4;3", "correctness": "2;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;", "linkedin": "%E7%85%9C%E6%9C%B4-%E9%83%9D-18a25327b/;;;;;%E6%80%80%E4%BF%8A-%E6%9D%8E-67240a27b/;;", "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Beijing Institute of Technology;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Automation", "aff_unique_url": "http://www.bit.edu.cn/;http://www.ia.cas.cn", "aff_unique_abbr": "BIT;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "inN4TdboJX", "title": "Noisy Exemplars Make Large Language Models More Robust: A Domain-Agnostic Behavioral Analysis", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Recent advances in prompt engineering enable large language models (LLMs) to solve multi-hop logical reasoning problems with impressive accuracy. However, there is little existing work investigating the robustness of LLMs with few-shot prompting techniques. Therefore, we introduce a systematic approach to test the robustness of LLMs in multi-hop reasoning tasks via domain-agnostic perturbations. We include perturbations at multiple levels of abstractions (e.g. lexical perturbations such as typos, and semantic perturbations such as the inclusion of intermediate reasoning steps in the questions) to conduct behavioral analysis on the LLMs. Throughout our experiments, we find that models are more sensitive to certain perturbations such as replacing words with their synonyms. We also demonstrate that increasing the proportion of perturbed exemplars in the prompts improves the robustness of few-shot prompting methods.", "keywords": "Large Language Models;Robustness;Perturbation Analysis;Few-shot Prompting;Chain-of-thought", "primary_area": "", "supplementary_material": "", "author": "Hongyi Zheng;Abulhair Saparov", "authorids": "~Hongyi_Zheng2;~Abulhair_Saparov1", "gender": "M;M", "homepage": ";http://asaparov.org", "dblp": ";117/6287", "google_scholar": ";TVNS71sAAAAJ", "or_profile": "~Hongyi_Zheng2;~Abulhair_Saparov1", "aff": "New York University;", "aff_domain": "nyu.edu;", "position": "Undergrad student;", "bibtex": "@inproceedings{\nzheng2023noisy,\ntitle={Noisy Exemplars Make Large Language Models More Robust: A Domain-Agnostic Behavioral Analysis},\nauthor={Hongyi Zheng and Abulhair Saparov},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=inN4TdboJX}\n}", "github": "", "project": "", "reviewers": "dKHZ;Scw6;5nja", "site": "https://openreview.net/forum?id=inN4TdboJX", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;3;3", "reproducibility": "5;3;5", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": "hongyizheng/;", "aff_unique_index": "0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "islVqaCzfa", "title": "InstructCoder: Empowering Language Models to Edit Code", "track": "main", "status": "Reject", "tldr": "", "abstract": "Code editing encompasses a variety of pragmatic tasks that developers deal with daily. Despite its relevance and practical usefulness, automatic code editing remains an underexplored area in the evolution of deep learning models, partly due to data scarcity. In this work, we explore the use of large language models (LLMs) to edit code based on user instructions, covering a broad range of implicit tasks such as comment insertion, code optimization, and code refactoring. To facilitate this, we introduce CodeInstruct, the first dataset designed to adapt LLMs for general-purpose code editing, containing high-diversity code-editing tasks. It consists of over 114,000 instruction-input-output triplets and covers multiple distinct code editing scenarios. The dataset is systematically expanded through an iterative process that commences with code editing data sourced from GitHub commits as seed tasks. Seed and generated tasks are used subsequently to prompt ChatGPT for more task data. Our experiments demonstrate that open-source LLMs fine-tuned on CodeInstruct can edit code correctly based on users' instructions most of the time, exhibiting unprecedented code-editing performance. Such results suggest that proficient instruction-finetuning can lead to significant amelioration in code-editing abilities.", "keywords": "Code Edit;Instruction Finetuning", "primary_area": "", "supplementary_material": "", "author": "Qisheng Hu;Kaixin Li;James Xu Zhao;Yuxi Xie;Tiedong Liu;Hui Chen;Michael Qizhe Xie;Junxian He", "authorids": "~Qisheng_Hu1;~Kaixin_Li1;~James_Xu_Zhao1;~Yuxi_Xie1;~Tiedong_Liu1;~Hui_Chen4;~Michael_Qizhe_Xie1;~Junxian_He1", "gender": "M;M;;F;M;;;M", "homepage": ";https://likaixin2000.github.io/;;https://yuxixie.github.io/;https://github.com/liutiedong;;;https://jxhe.github.io", "dblp": "359/6493;;;;;;;188/6127.html", "google_scholar": "2gyrZwQAAAAJ;crl_1igAAAAJ;;LNLECx0AAAAJ;;;;BIFGeoUAAAAJ", "or_profile": "~Qisheng_Hu1;~Kaixin_Li1;~James_Xu_Zhao1;~Yuxi_Xie1;~Tiedong_Liu1;~Hui_Chen4;~Michael_Qizhe_Xie1;~Junxian_He1", "aff": "National University of Singapore;National University of Singapore;;National University of Singapore;National University of Singapore;;;Hong Kong University of Science and Technology", "aff_domain": "nus.edu;u.nus.edu;;u.nus.edu;nus.edu;;;ust.hk", "position": "MS student;PhD student;;PhD student;PhD student;;;Assistant Professor", "bibtex": "@misc{\nhu2023instructcoder,\ntitle={InstructCoder: Empowering Language Models to Edit Code},\nauthor={Qisheng Hu and Kaixin Li and James Xu Zhao and Yuxi Xie and Tiedong Liu and Hui Chen and Michael Qizhe Xie and Junxian He},\nyear={2023},\nurl={https://openreview.net/forum?id=islVqaCzfa}\n}", "github": "", "project": "", "reviewers": "YC8o;H5fp;XsAu", "site": "https://openreview.net/forum?id=islVqaCzfa", "pdf_size": 0, "rating": "", "confidence": "5;4;4", "excitement": "4;4;4", "reproducibility": "4;5;4", "correctness": "3;3;3", "rating_avg": 0, "confidence_avg": 4.333333333333333, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0, "corr_rating_correctness": 0, "orcid": "0009-0005-1589-809X;;;;;;;", "linkedin": "qishenghu;;;yuxi-xie-494265181;;;;", "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "National University of Singapore;Hong Kong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.ust.hk", "aff_unique_abbr": "NUS;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "Singapore;China" }, { "id": "ivSJdhcuTi", "title": "Out-of-Distribution Generalization in Natural Language Processing: Past, Present, and Future", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Machine learning (ML) systems in natural language processing (NLP) face significant challenges in generalizing to out-of-distribution (OOD) data, where the test distribution differs from the training data distribution. This poses important questions about the robustness of NLP models and their high accuracy, which may be artificially inflated due to their underlying sensitivity to systematic biases. Despite these challenges, there is a lack of comprehensive surveys on the generalization challenge from an OOD perspective in natural language understanding. Therefore, this paper aims to fill this gap by presenting the first comprehensive review of recent progress, methods, and evaluations on this topic. We further discuss the challenges involved and potential future research directions. By providing convenient access to existing work, we hope this survey will encourage future research in this area.", "keywords": "Out-of-Distribution Generalization; OOD Robustness", "primary_area": "", "supplementary_material": "", "author": "Linyi Yang;Yaoxian Song;Xuan Ren;Chenyang Lyu;Yidong Wang;Jingming Zhuo;Lingqiao Liu;Jindong Wang;Jennifer Foster;Yue Zhang", "authorids": "~Linyi_Yang1;~Yaoxian_Song2;~Xuan_Ren1;~Chenyang_Lyu1;~Yidong_Wang1;~Jingming_Zhuo1;~Lingqiao_Liu3;~Jindong_Wang1;~Jennifer_Foster2;~Yue_Zhang7", "gender": ";M;M;M;M;M;M;F;M;M", "homepage": "https://yanglinyi.github.io/;;;https://lyuchenyang.github.io;https://qianlanwyd.github.io/;https://jingmingzhuo.github.io/;https://sites.google.com/site/lingqiaoliu83/;https://www.computing.dcu.ie/~jfoster;http://frcchang.github.io;https://jd92.wang/", "dblp": "218/8007;241/9412.html;;248/1663;59/6759.html;362/7844;45/7776;14/3001;47/722-4;19/2969-1", "google_scholar": "go3sFxcAAAAJ;Qp7qOyUAAAAJ;VR_Y3T0AAAAJ;;;C7RtP4oAAAAJ;Y2xu62UAAAAJ;SC2xBNwAAAAJ;;hBZ_tKsAAAAJ", "or_profile": "~Linyi_Yang1;~Yaoxian_Song2;~Xuan_Ren1;~Chenyang_Lyu1;~Yidong_Wang1;~Jingming_Zhuo1;~Lingqiao_Liu3;~Jennifer_Foster2;~Yue_Zhang7;~Jindong_Wang4", "aff": "Westlake University;Westlake University;University of Adelaide;Dublin City University;Peking University;Jilin University;The University of Adelaide;Dublin City University;Westlake University;Microsoft Research", "aff_domain": "westlake.edu.cn;westlake.edu.cn;adelaide.edu.au;dcu.ie;pku.edu.cn;mails.jlu.edu.cn;adelaide.edu.au;dcu.ie;westlake.edu.cn;microsoft.com", "position": "Researcher;PhD student;PhD student;PhD student;PhD student;Undergrad student;Assistant Professor;Lecturer;Full Professor;Researcher", "bibtex": "@inproceedings{\nyang2023outofdistribution,\ntitle={Out-of-Distribution Generalization in Natural Language Processing: Past, Present, and Future},\nauthor={Linyi Yang and Yaoxian Song and Xuan Ren and Chenyang Lyu and Yidong Wang and Jingming Zhuo and Lingqiao Liu and Jindong Wang and Jennifer Foster and Yue Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ivSJdhcuTi}\n}", "github": "", "project": "", "reviewers": "Z5rz;t4je;DYuH", "site": "https://openreview.net/forum?id=ivSJdhcuTi", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "3;4;4", "reproducibility": "2;0;0", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 0.6666666666666666, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 10, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-1536-3344;;;;;;0000-0002-5214-2268;0000-0002-4833-0880", "linkedin": ";yxsong-aaron;;;;;;;;jindong-wang/", "aff_unique_index": "0;0;1;2;3;4;1;2;0;5", "aff_unique_norm": "Westlake University;University of Adelaide;Dublin City University;Peking University;Jilin University;Microsoft", "aff_unique_dep": ";;;;;Microsoft Research", "aff_unique_url": "https://www.westlake.edu.cn;https://www.adelaide.edu.au;https://www.dcu.ie;http://www.pku.edu.cn;http://www.jlu.edu.cn;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "WU;Adelaide;DCU;Peking U;JLU;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;2;0;0;1;2;0;3", "aff_country_unique": "China;Australia;Ireland;United States" }, { "id": "iw4zUlc5OF", "title": "On the Zero-Shot Generalization of Machine-Generated Text Detectors", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "The rampant proliferation of large language models, fluent enough to generate text indistinguishable from human-written language, gives unprecedented importance to the detection of machine-generated text. This work is motivated by an important research question: How will the detectors of machine-generated text perform on outputs of a new generator, that the detectors were not trained on? We begin by collecting generation data from a wide range of LLMs, and train neural detectors on data from each generator and test its performance on held-out generators. While none of the detectors can generalize to all generators, we observe a consistent and interesting pattern that the detectors trained on data from a medium-size LLM can zero-shot generalize to the larger version. As a concrete application, we demonstrate that robust detectors can be built on an ensemble of training data from medium-sized models.", "keywords": "detection;NLG;zero-shot generalization", "primary_area": "", "supplementary_material": "", "author": "Xiao Pu;Jingyu Zhang;Xiaochuang Han;Yulia Tsvetkov;Tianxing He", "authorids": "~Xiao_Pu2;~Jingyu_Zhang2;~Xiaochuang_Han1;~Yulia_Tsvetkov1;~Tianxing_He1", "gender": "F;;M;F;M", "homepage": ";https://jackz.io/;https://xhan77.github.io/;https://homes.cs.washington.edu/~yuliats/;https://cloudygoose.github.io/", "dblp": "91/4650-3;92/3672.html;216/6755;75/8157;149/0111", "google_scholar": "rRazhgkAAAAJ;9EC0sDMAAAAJ;GamSVF0AAAAJ;SEDPkrsAAAAJ;egmfjjwAAAAJ", "or_profile": "~Xiao_Pu2;~Jingyu_Zhang2;~Xiaochuang_Han1;~Yulia_Tsvetkov1;~Tianxing_He1", "aff": ";Johns Hopkins University;Department of Computer Science, University of Washington;Department of Computer Science, University of Washington;University of Washington", "aff_domain": ";cs.jhu.edu;cs.washington.edu;cs.washington.edu;cs.washington.edu", "position": ";Undergrad student;PhD student;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\npu2023on,\ntitle={On the Zero-Shot Generalization of Machine-Generated Text Detectors},\nauthor={Xiao Pu and Jingyu Zhang and Xiaochuang Han and Yulia Tsvetkov and Tianxing He},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=iw4zUlc5OF}\n}", "github": "", "project": "", "reviewers": "GhLs;wjLU;FNgs", "site": "https://openreview.net/forum?id=iw4zUlc5OF", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "2;4;4", "reproducibility": "4;3;4", "correctness": "2;4;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-4634-7128;", "linkedin": ";;;;", "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Johns Hopkins University;University of Washington", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www.jhu.edu;https://www.washington.edu", "aff_unique_abbr": "JHU;UW", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "ix6h7Bkq62", "title": "Standardizing Distress Analysis: Emotion-Driven Distress Identification and Cause Extraction (DICE) in Multimodal Online Posts", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Due to its growing impact on public opinion, hate speech on social media has garnered increased attention. While automated methods for identifying hate speech have been presented in the past, they have mostly been limited to analyzing textual content. The interpretability of such models has received very little attention, despite the social and legal consequences of erroneous predictions. In this work, we present a novel problem of \\textit{Distress Identification and Cause Extraction (DICE)} from multimodal online posts. We develop a multi-task deep framework for the simultaneous detection of distress content and identify connected causal phrases from the text using emotional information. The emotional information is incorporated into the training process using a zero-shot strategy, and a novel mechanism is devised to fuse the features from the multimodal inputs. Furthermore, we introduce the first-of-its-kind \\textit{Distress and Cause annotated Multimodal (DCaM)} dataset of 20,764 social media posts. We thoroughly evaluate our proposed method by comparing it to several existing benchmarks. Empirical assessment and comprehensive qualitative analysis demonstrate that our proposed method works well on distress detection and cause extraction tasks, improving F1 and ROS scores by 1.95\\% and 3\\%, respectively, relative to the best-performing baseline. The code and the dataset can be accessed from the following link: \\url{https://www.iitp.ac.in/~ai-nlp-ml/resources.html\\#DICE}.", "keywords": "Hate speech;Social media;Multimodal online posts;Distress content;Causal phrases;Emotional information;Zero-shot strategy", "primary_area": "", "supplementary_material": "", "author": "gopendra Vikram singh;Soumitra Ghosh;Atul Verma;Chetna Painkra;Asif Ekbal", "authorids": "~gopendra_Vikram_singh1;~Soumitra_Ghosh2;~Atul_Verma1;~Chetna_Painkra1;~Asif_Ekbal1", "gender": "M;M;;;M", "homepage": ";https://sites.google.com/view/soumitra-ghosh;;;https://ekbalasif.github.io", "dblp": "258/4363.html;https://dblp.uni-trier.de/pid/21/5014.html;;;11/3590", "google_scholar": ";Ki55FosAAAAJ;;;https://scholar.google.co.in/citations?user=IAL_F04AAAAJ", "or_profile": "~gopendra_Vikram_singh1;~Soumitra_Ghosh2;~Atul_Verma1;~Chetna_Painkra1;~Asif_Ekbal1", "aff": "Indian Institute of Technology, Patna, Dhirubhai Ambani Institute Of Information and Communication Technology;Indian Institute of Technology Patna, India;;;Indian Institute of Technology, Patna", "aff_domain": "iitp.ac.in;iitp.ac.in;;;iitp.ac.in", "position": "PhD student;PhD student;;;Associate Professor", "bibtex": "@inproceedings{\nsingh2023standardizing,\ntitle={Standardizing Distress Analysis: Emotion-Driven Distress Identification and Cause Extraction ({DICE}) in Multimodal Online Posts},\nauthor={gopendra Vikram singh and Soumitra Ghosh and Atul Verma and Chetna Painkra and Asif Ekbal},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ix6h7Bkq62}\n}", "github": "", "project": "", "reviewers": "RS7U;rcDd;7DRN", "site": "https://openreview.net/forum?id=ix6h7Bkq62", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;2;3", "reproducibility": "1;2;4", "correctness": "2;2;5", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-1910-4320;;;0000-0003-3612-8834", "linkedin": ";soumitra-ghosh-410ba37a/;;;asif-ekbal-3b8a4517/?originalSubdomain=in", "aff_unique_index": "0;1;1", "aff_unique_norm": "Indian Institute of Technology, Patna;Indian Institute of Technology Patna", "aff_unique_dep": ";", "aff_unique_url": "https://www.iitp.ac.in;https://www.iitp.ac.in", "aff_unique_abbr": "IIT Patna;IIT Patna", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Patna", "aff_country_unique_index": "0;0;0", "aff_country_unique": "India" }, { "id": "iytcEQ5I5v", "title": "SDOH-NLI: a Dataset for Inferring Social Determinants of Health from Clinical Notes", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Social and behavioral determinants of health (SDOH) play a significant role in shaping health outcomes, and extracting these determinants from clinical notes is a first step to help healthcare providers systematically identify opportunities to provide appropriate care and address disparities. Progress on using NLP methods for this task has been hindered by the lack of high-quality publicly available labeled data, largely due to the privacy and regulatory constraints on the use of real patients' information. This paper introduces a new dataset, SDOH-NLI, that is based on publicly available notes and which we release publicly. We formulate SDOH extraction as a natural language inference task, and provide binary textual entailment labels obtained from human raters for a cross product of a set of social history snippets as premises and SDOH factors as hypotheses. Our dataset differs from standard NLI benchmarks in that our premises and hypotheses are obtained independently. We evaluate both \"off-the-shelf\" entailment models as well as models fine-tuned on our data, and highlight the ways in which our dataset appears more challenging than commonly used NLI datasets.", "keywords": "social determinants of health;natural language inference;nli;clinical notes;dataset", "primary_area": "", "supplementary_material": "", "author": "Adam D Lelkes;Eric Loreaux;Tal Schuster;Ming-Jun Chen;Alvin Rajkomar", "authorids": "~Adam_D_Lelkes1;~Eric_Loreaux1;~Tal_Schuster1;~Ming-Jun_Chen1;~Alvin_Rajkomar1", "gender": "M;M;Not Specified;;M", "homepage": "https://research.google/people/AdamLelkes/;https://www.linkedin.com/in/ericloreaux/;https://people.csail.mit.edu/tals/;;", "dblp": "147/5184;;190/7491;;", "google_scholar": "PAAAaI4AAAAJ;;oo8QRmIAAAAJ;;IC0aUB4AAAAJ", "or_profile": "~Adam_D_Lelkes1;~Eric_Loreaux1;~Tal_Schuster1;~Ming-Jun_Chen1;~Alvin_Rajkomar1", "aff": "Google;Google;Google;;University of California, San Francisco", "aff_domain": "google.com;google.com;google.com;;ucsf.edu", "position": "Researcher;Research Engineer;Researcher;;Assistant Professor", "bibtex": "@inproceedings{\nlelkes2023sdohnli,\ntitle={{SDOH}-{NLI}: a Dataset for Inferring Social Determinants of Health from Clinical Notes},\nauthor={Adam D Lelkes and Eric Loreaux and Tal Schuster and Ming-Jun Chen and Alvin Rajkomar},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=iytcEQ5I5v}\n}", "github": "", "project": "", "reviewers": "Yo1y;sNnf;YiJH", "site": "https://openreview.net/forum?id=iytcEQ5I5v", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;5;4", "excitement": "3;3;3", "reproducibility": "4;5;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "adamlelkes;;;;", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Google;University of California, San Francisco", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.ucsf.edu", "aff_unique_abbr": "Google;UCSF", "aff_campus_unique_index": "0;0;0;1", "aff_campus_unique": "Mountain View;San Francisco", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "j2bP0STpw7", "title": "The Vault: A Comprehensive Multilingual Dataset for Advancing Code Understanding and Generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We present The Vault, an open-source dataset of high quality code-text pairs in multiple programming languages for training large language models to understand and generate code. We propose methods for thoroughly extracting samples that use both rules and deep learning to ensure that they contain high-quality pairs of code and text, resulting in a dataset of 43 million high-quality code-text pairs. We thoroughly evaluated this dataset and discovered that when used to train common code language models (such as CodeT5, CodeBERT, and CodeGen), it outperforms the same models train on other datasets such as CodeSearchNet. These evaluations included common coding tasks such as code generation, code summarization, and code search. The Vault can be used by researchers and practitioners to train a wide range of big language models that understand code. Alternatively, researchers can use our data cleaning methods and scripts to improve their own datasets. We anticipate that using The Vault to train large language models will improve their ability to understand and generate code, propelling AI research and software development forward. We are releasing our source code and a framework to make it easier for others to replicate our results.", "keywords": "deep-learning;ai4code;dataset;benchmark;code-understanding;code-generation", "primary_area": "", "supplementary_material": "", "author": "Dung Manh Nguyen;Le Hai Nam;Anh T. V. Dau;Anh Minh Nguyen;Khanh Nghiem;Jin L.C. Guo;Nghi D. Q. Bui", "authorids": "~Dung_Manh_Nguyen1;~Le_Hai_Nam1;~Anh_T._V._Dau1;~Anh_Minh_Nguyen1;~Khanh_Nghiem1;~Jin_L.C._Guo1;~Nghi_D._Q._Bui1", "gender": "M;M;M;M;;M;F", "homepage": "http://manhdung20112000.github.io/aboutme;https://www.facebook.com/profile.php?id=100006608718814;https://github.com/minhna1112;;http://jguo-web.com;https://bdqnghi.github.io/;", "dblp": "317/0155.html;;123/8183;;;207/7870;", "google_scholar": ";https://scholar.google.com.vn/citations?user=1R5vE1UAAAAJ;xetJ05YAAAAJ;;;QwybxYsAAAAJ;https://scholar.google.com.vn/citations?user=vHa2yksAAAAJ", "or_profile": "~Dung_Manh_Nguyen1;~Le_Hai_Nam1;~Anh_Minh_Nguyen1;~Khanh_Nghiem1;~Jin_L.C._Guo1;~Nghi_D._Q._Bui1;~Anh_D._T._Van1", "aff": ";Hanoi University of Science and Technology;FPT;;, McGill University;SalesForce.com;FPT AI Center", "aff_domain": ";hust.edu.vn;fpt.com;;cs.mcgill.ca;salesforce.com;fpt.com", "position": ";Researcher;Researcher;;Assistant Professor;Researcher;Researcher", "bibtex": "@inproceedings{\nnguyen2023the,\ntitle={The Vault: A Comprehensive Multilingual Dataset for Advancing Code Understanding and Generation},\nauthor={Dung Manh Nguyen and Le Hai Nam and Anh T. V. Dau and Anh Minh Nguyen and Khanh Nghiem and Jin L.C. Guo and Nghi D. Q. Bui},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=j2bP0STpw7}\n}", "github": "", "project": "", "reviewers": "EqfE;rHmi;H3Qp", "site": "https://openreview.net/forum?id=j2bP0STpw7", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;5", "excitement": "3;2;3", "reproducibility": "4;3;5", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0009-0005-5091-7401;;;;", "linkedin": "nmd2000/;;anh-minh-nguyen-44016717b/;khanhnghiem;;;vananh0905/", "aff_unique_index": "0;1;2;3;1", "aff_unique_norm": "Hanoi University of Science and Technology;FPT Corporation;McGill University;Salesforce", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.hust.edu.vn;https://www.fpt.com.vn;https://www.mcgill.ca;https://www.salesforce.com", "aff_unique_abbr": "HUST;FPT;McGill;Salesforce", "aff_campus_unique_index": "0", "aff_campus_unique": "Hanoi;", "aff_country_unique_index": "0;0;1;2;0", "aff_country_unique": "Vietnam;Canada;United States" }, { "id": "j48JCRagwR", "title": "Improving Contrastive Learning of Sentence Embeddings with Focal InfoNCE", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "The recent success of SimCSE has greatly advanced state-of-the-art sentence representations. However, the original formulation of SimCSE does not fully exploit the potential of hard negative samples in contrastive learning. This study introduces an unsupervised contrastive learning framework that combines SimCSE with hard negative mining, aiming to enhance the quality of sentence embeddings. The proposed focal-InfoNCE function introduces self-paced modulation terms in the contrastive objective, downweighting the loss associated with easy negatives and encouraging the model focusing on hard negatives. Experimentation on various STS benchmarks shows that our method improves sentence embeddings in terms of Spearman's correlation and representation alignment and uniformity.", "keywords": "Contrastive Learning;Sentence Textual Similarity;Sentence Embdding;Negative Sample Reweighing", "primary_area": "", "supplementary_material": "", "author": "Pengyue Hou;Xingyu Li", "authorids": "~Pengyue_Hou1;~Xingyu_Li3", "gender": ";", "homepage": ";https://apps.ualberta.ca/directory/person/xingyu", "dblp": ";", "google_scholar": "2GCGEaMAAAAJ;V8OICzYAAAAJ", "or_profile": "~Pengyue_Hou1;~Xingyu_Li3", "aff": "University of Alberta;University of Alberta", "aff_domain": "ualberta.ca;ualberta.ca", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nhou2023improving,\ntitle={Improving Contrastive Learning of Sentence Embeddings with Focal Info{NCE}},\nauthor={Pengyue Hou and Xingyu Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=j48JCRagwR}\n}", "github": "", "project": "", "reviewers": "Crvb;15wZ;qweB", "site": "https://openreview.net/forum?id=j48JCRagwR", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;2;2", "excitement": "4;4;3", "reproducibility": "3;4;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "University of Alberta", "aff_unique_dep": "", "aff_unique_url": "https://www.ualberta.ca", "aff_unique_abbr": "UAlberta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "id": "j61Sx05QRj", "title": "NeuSTIP: A Neuro-Symbolic Model for Link and Time Prediction in Temporal Knowledge Graphs", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Neuro-symbolic (NS) models for knowledge graph completion (KGC) combine the benefits of symbolic models (interpretable inference) with those of distributed representations (parameter sharing, high accuracy). While several NS models exist for KGs with static facts, there is limited work on temporal KGC (TKGC) for KGs where a fact is associated with a time interval. In response, we propose a novel NS model for TKGC called NeuSTIP, which performs link prediction and time interval prediction in a TKG. NeuSTIP learns temporal rules with Allen predicates, which ensure temporal consistency between neighboring predicates in the rule body. We further design a unique scoring function that evaluates the confidence of the candidate answers while performing link and time interval predictions by utilizing the learned rules. Our empirical evaluation on two time interval based TKGC datasets shows that our model shows competitive performance on link prediction and establishes a new state of the art on time prediction.", "keywords": "Information Extraction;Neuro-Symbolic Knowledge Graph Completion;Temporal Knowledge Graph Completion", "primary_area": "", "supplementary_material": "", "author": "Ishaan Singh;Navdeep Kaur;Garima Gaur;Mausam .", "authorids": "~Ishaan_Singh1;~Navdeep_Kaur1;~Garima_Gaur1;~Mausam_.1", "gender": "M;F;F;M", "homepage": ";;;http://www.cse.iitd.ac.in/~mausam", "dblp": ";;207/9943;30/6391.html", "google_scholar": ";duv4D7wAAAAJ;https://scholar.google.co.in/citations?user=thOvLMkAAAAJ;https://scholar.google.co.in/citations?hl=en", "or_profile": "~Ishaan_Singh1;~Navdeep_Kaur1;~Garima_Gaur1;~Mausam_Mausam2", "aff": "Indian Institute of Technology, Delhi;Indian Institute of Technology, Delhi;Indian Institute of Technology, Delhi;Indian Institute of Technology Delhi", "aff_domain": "iitd.ac.in;iitd.ac.in;iitd.ac.in;iitd.ac.in", "position": "Undergrad student;Researcher;Postdoc;Full Professor", "bibtex": "@inproceedings{\nsingh2023neustip,\ntitle={Neu{STIP}: A Neuro-Symbolic Model for Link and Time Prediction in Temporal Knowledge Graphs},\nauthor={Ishaan Singh and Navdeep Kaur and Garima Gaur and Mausam .},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=j61Sx05QRj}\n}", "github": "", "project": "", "reviewers": "juCQ;N5Ch;Yomj", "site": "https://openreview.net/forum?id=j61Sx05QRj", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "4;3;3", "reproducibility": "4;4;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-1683-9287;;0000-0003-4088-4296", "linkedin": "ishaan-singh-9bb8231aa;navdeep-kaur-293119124/;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Indian Institute of Technology Delhi", "aff_unique_dep": "", "aff_unique_url": "https://www.iitdelhi.ac.in", "aff_unique_abbr": "IIT Delhi", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Delhi", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "India" }, { "id": "j6g3qwoQKU", "title": "POE: Process of Elimination for Multiple Choice Reasoning", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Language models (LMs) are capable of conducting in-context learning for multiple choice reasoning tasks, but the options in these tasks are treated equally. As humans often first eliminate wrong options before picking the final correct answer, we argue a similar two-step strategy can make LMs better at these tasks. To this end, we present the Process of Elimination (POE), a two-step scoring method. In the first step, POE scores each option, and eliminates seemingly wrong options. In the second step, POE masks these wrong options, and makes the final prediction from the remaining options. Zero-shot experiments on 8 reasoning tasks illustrate the effectiveness of POE, and a following analysis finds our method to be especially performant on logical reasoning tasks. We further analyze the effect of masks, and show that POE applies to few-shot settings and large language models (LLMs) like ChatGPT.", "keywords": "language models;prompting;scoring;multiple choice reasoning", "primary_area": "", "supplementary_material": "", "author": "Chenkai Ma;Xinya Du", "authorids": "~Chenkai_Ma1;~Xinya_Du1", "gender": "M;M", "homepage": "https://chenkai-ma.github.io/;https://xinyadu.github.io", "dblp": ";200/8114", "google_scholar": "uf7NNvUAAAAJ;R-lKQqkAAAAJ", "or_profile": "~Chenkai_Ma1;~Xinya_Du1", "aff": "University of Electronic Science and Technology of China;University of Texas at Dallas", "aff_domain": "uestc.edu.cn;utdallas.edu", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\nma2023poe,\ntitle={{POE}: Process of Elimination for Multiple Choice Reasoning},\nauthor={Chenkai Ma and Xinya Du},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=j6g3qwoQKU}\n}", "github": "", "project": "", "reviewers": "dmiM;Q4aG;utpz", "site": "https://openreview.net/forum?id=j6g3qwoQKU", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "3;3;3", "reproducibility": "5;5;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.666666666666667, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": "chenkai-ma-17955a352/;", "aff_unique_index": "0;1", "aff_unique_norm": "University of Electronic Science and Technology of China;University of Texas at Dallas", "aff_unique_dep": ";", "aff_unique_url": "https://www.uestc.edu.cn;https://www.utdallas.edu", "aff_unique_abbr": "UESTC;UT Dallas", "aff_campus_unique_index": "1", "aff_campus_unique": ";Dallas", "aff_country_unique_index": "0;1", "aff_country_unique": "China;United States" }, { "id": "j9E9xLlTmB", "title": "Analyzing Cognitive Plausibility of Subword Tokenization", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Subword tokenization has become the de-facto standard for tokenization although comparative evaluations of their quality across languages are scarce. Existing evaluation studies focus on the effect of a tokenization algorithm on the performance in downstream tasks, or on engineering criteria such as the compression rate. We present a new evaluation paradigm that focuses on the cognitive plausibility of subword tokenization. We analyze the correlation of the tokenizer output with the reading time and accuracy of human responses on a lexical decision task. We compare three tokenization algorithms across several languages and vocabulary sizes. Our results indicate that the Unigram algorithm yields less cognitively plausible tokenization behavior and a worse coverage of derivational morphemes, in contrast with prior work.", "keywords": "subword tokenization;subword segmentation;cognitive signals;cognitive plausibility;lexical decision;vocabulary size;morphological segmentation", "primary_area": "", "supplementary_material": "", "author": "Lisa Beinborn;Yuval Pinter", "authorids": "~Lisa_Beinborn1;~Yuval_Pinter1", "gender": "F;M", "homepage": "https://beinborn.eu/;http://www.yuvalpinter.com", "dblp": "https://dblp.uni-trier.de/pid/154/8216;153/5384", "google_scholar": "https://scholar.google.de/citations?user=Mh5y8L0AAAAJ;aYAcXccAAAAJ", "or_profile": "~Lisa_Beinborn1;~Yuval_Pinter1", "aff": "Vrije Universiteit Amsterdam;Amazon Science", "aff_domain": "vu.nl;amazon.com", "position": "Assistant Professor;Visiting Academic", "bibtex": "@inproceedings{\nbeinborn2023analyzing,\ntitle={Analyzing Cognitive Plausibility of Subword Tokenization},\nauthor={Lisa Beinborn and Yuval Pinter},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=j9E9xLlTmB}\n}", "github": "", "project": "", "reviewers": "6Wn8;1agx;ye9p", "site": "https://openreview.net/forum?id=j9E9xLlTmB", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;4;3", "reproducibility": "4;3;4", "correctness": "4;3;3", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-3174-1621", "linkedin": ";yuvalpinter", "aff_unique_index": "0;1", "aff_unique_norm": "Vrije Universiteit Amsterdam;Amazon", "aff_unique_dep": ";Amazon Science", "aff_unique_url": "https://www.vu.nl;https://www.amazon.science", "aff_unique_abbr": "VU Amsterdam;Amazon Science", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Netherlands;United States" }, { "id": "j9e3WVc49w", "title": "Knowledge Distillation \u2248 Label Smoothing: Fact or Fallacy?", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Originally proposed as a method for knowledge transfer from one model to another, some recent studies have suggested that knowledge distillation (KD) is in fact a form of regularization. Perhaps the strongest argument of all for this new perspective comes from its apparent similarities with label smoothing (LS). Here we re-examine this stated equivalence between the two methods by comparing the predictive confidences of the models they train. Experiments on four text classification tasks involving models of different sizes show that: (a) In most settings, KD and LS drive model confidence in completely opposite directions, and (b) In KD, the student inherits not only its knowledge but also its confidence from the teacher, reinforcing the classical knowledge transfer view.", "keywords": "knowledge distillation;label smoothing;regularization;interpretation of knowledge distillation", "primary_area": "", "supplementary_material": "", "author": "Md Arafat Sultan", "authorids": "~Md_Arafat_Sultan1", "gender": "M", "homepage": "https://ma-sultan.github.io/", "dblp": "77/11514", "google_scholar": "lDB1ul4AAAAJ", "or_profile": "~Md_Arafat_Sultan1", "aff": "International Business Machines", "aff_domain": "ibm.com", "position": "Researcher", "bibtex": "@inproceedings{\nsultan2023knowledge,\ntitle={Knowledge Distillation \\ensuremath{\\approx} Label Smoothing: Fact or Fallacy?},\nauthor={Md Arafat Sultan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=j9e3WVc49w}\n}", "github": "", "project": "", "reviewers": "FFtC;qYKw;39Vd", "site": "https://openreview.net/forum?id=j9e3WVc49w", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;5", "excitement": "4;3;3", "reproducibility": "4;4;4", "correctness": "4;4;2", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 1, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "", "linkedin": "", "aff_unique_index": "0", "aff_unique_norm": "International Business Machines Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.ibm.com", "aff_unique_abbr": "IBM", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "jAf0gd0ez4", "title": "Interactive Text Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Users interact with text, image, code, or other editors on a daily basis. However, machine learning models are rarely trained in the settings that reflect the interactivity between users and their editor. This is understandable as training AI models with real users is not only slow and costly, but what these models learn may be specific to user interface design choices. Unfortunately, this means most of the research on text, code, and image generation has focused on non-interactive settings, whereby the model is expected to get everything right without accounting for any input from a user who may be willing to help.\n\nWe introduce a new Interactive Text Generation task that allows training generation models interactively without the costs of involving real users, by using user simulators that provide edits that guide the model towards a given target text. We train our interactive models using Imitation Learning, and our experiments against competitive non-interactive generation models show that models trained interactively are superior to their non-interactive counterparts, even when all models are given the same budget of user inputs or edits.", "keywords": "Interactivity;Text Generation;RL;IL", "primary_area": "", "supplementary_material": "", "author": "Felix Faltings;Michel Galley;Kiant\u00e9 Brantley;Baolin Peng;Weixin Cai;Yizhe Zhang;Jianfeng Gao;Bill Dolan", "authorids": "~Felix_Faltings1;~Michel_Galley1;~Kiant\u00e9_Brantley2;~Baolin_Peng2;~Weixin_Cai1;~Yizhe_Zhang2;~Jianfeng_Gao1;~Bill_Dolan1", "gender": "M;M;;M;M;M;M;", "homepage": ";http://research.microsoft.com/~mgalley;;;http://wilsoncai1992.github.io;https://dreasysnail.github.io;https://www.microsoft.com/en-us/research/people/jfgao/;https://www.microsoft.com/en-us/research/people/billdol", "dblp": "277/1699;05/3289;;144/2759;229/3739.html;132/4966-2.html;92/5339;13/486", "google_scholar": "r3FyXZoAAAAJ;rs1M7CAAAAAJ;;u1CNjgwAAAAJ;OwL-21MAAAAJ;WDVMfggAAAAJ;https://scholar.google.com/citations?hl=en;KbD1YlQAAAAJ", "or_profile": "~Felix_Faltings1;~Michel_Galley1;~Kiant\u00e9_Brantley2;~Baolin_Peng2;~Weixin_Cai1;~Yizhe_Zhang2;~Jianfeng_Gao1;~Bill_Dolan1", "aff": "Massachusetts Institute of Technology;Microsoft;;Tencent AI Lab;Microsoft;Apple;Microsoft Research;", "aff_domain": "mit.edu;microsoft.com;;tencent.com;microsoft.com;apple.com;microsoft.com;", "position": "PhD student;Researcher;;Researcher;Researcher;Researcher;Principal Researcher;", "bibtex": "@inproceedings{\nfaltings2023interactive,\ntitle={Interactive Text Generation},\nauthor={Felix Faltings and Michel Galley and Kiant{\\'e} Brantley and Baolin Peng and Weixin Cai and Yizhe Zhang and Jianfeng Gao and Bill Dolan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jAf0gd0ez4}\n}", "github": "", "project": "", "reviewers": "sfhR;HjwP;tSro", "site": "https://openreview.net/forum?id=jAf0gd0ez4", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;2;4", "excitement": "4;3;4", "reproducibility": "4;3;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-3310-1831;;;0000-0003-2680-3066;;;", "linkedin": "felix-faltings-73b886127;michelgalley;;;;;;bill-dolan-292a373", "aff_unique_index": "0;1;2;1;3;1", "aff_unique_norm": "Massachusetts Institute of Technology;Microsoft;Tencent;Apple", "aff_unique_dep": ";Microsoft Corporation;Tencent AI Lab;Apple Inc.", "aff_unique_url": "https://web.mit.edu;https://www.microsoft.com;https://ai.tencent.com;https://www.apple.com", "aff_unique_abbr": "MIT;Microsoft;Tencent AI Lab;Apple", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;China" }, { "id": "jImeNRfAy2", "title": "Self-Detoxifying Language Models via Toxification Reversal", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Language model detoxification aims to minimize the risk of generating offensive or harmful content in pretrained language models (PLMs) for safer deployment. Existing methods can be roughly categorized as finetuning-based and decoding-based. However, the former is often resource-intensive, while the latter relies on additional components and potentially compromises the generation fluency. In this paper, we propose a more lightweight approach that enables the PLM itself to achieve ``self-detoxification''. Our method is built upon the observation that prepending a negative steering prompt can effectively induce PLMs to generate toxic content. At the same time, we are inspired by the recent research in the interpretability field, which formulates the evolving contextualized representations within the PLM as an information stream facilitated by the attention layers. Drawing on this idea, we devise a method to identify the toxification direction from the normal generation process to the one prompted with the negative prefix, and then steer the generation to the reversed direction by manipulating the information movement within the attention layers. Experimental results show that our approach, without any fine-tuning or extra components, can achieve comparable performance with state-of-the-art methods.", "keywords": "Language Model Detoxification;Language Model Safety;Natural Languge Generation", "primary_area": "", "supplementary_material": "", "author": "Chak Tou Leong;Yi Cheng;Jiashuo WANG;Jian Wang;Wenjie Li", "authorids": "~Chak_Tou_Leong1;~Yi_Cheng3;~Jiashuo_WANG1;~Jian_Wang18;~Wenjie_Li1", "gender": "M;F;F;M;F", "homepage": ";;http://www4.comp.polyu.edu.hk/~csjwang/;https://iwangjian.github.io/;https://web.comp.polyu.edu.hk/cswjli/", "dblp": "358/9146;;204/7570;39/449-54.html;33/3999-2.html", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;4FWRdrAAAAAJ;uklMlHkAAAAJ;HUtas_0gHGIC;Rx5swD4AAAAJ", "or_profile": "~Chak_Tou_Leong1;~Yi_Cheng3;~Jiashuo_WANG1;~Jian_Wang18;~Wenjie_Li1", "aff": "Hong Kong Polytechnic University;The Hong Kong Polytechnic University;The Hong Kong Polytechnic University, Hong Kong Polytechnic University;The Hong Kong Polytechnic University;The Hong Kong Polytechnic University, The Hong Kong Polytechnic University", "aff_domain": "polyu.edu.hk;polyu.edu.hk;comp.polyu.edu.hk;polyu.edu.hk;comp.polyu.edu.hk", "position": "PhD student;PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nleong2023selfdetoxifying,\ntitle={Self-Detoxifying Language Models via Toxification Reversal},\nauthor={Chak Tou Leong and Yi Cheng and Jiashuo WANG and Jian Wang and Wenjie Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jImeNRfAy2}\n}", "github": "", "project": "", "reviewers": "tv99;rKEz;fNXE", "site": "https://openreview.net/forum?id=jImeNRfAy2", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;4;4", "excitement": "4;4;4", "reproducibility": "5;4;3", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-8254-8138;0000-0002-8992-8336;0000-0002-7360-8864", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Hong Kong Polytechnic University", "aff_unique_dep": "", "aff_unique_url": "https://www.polyu.edu.hk", "aff_unique_abbr": "PolyU", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "jLEnVo0RW3", "title": "Retrieving Multimodal Information for Augmented Generation: A Survey", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "As Large Language Models (LLMs) become popular, there emerged an important trend of using multimodality to augment the LLMs' generation ability, which enables LLMs to better interact with the world. However, there lacks a unified perception of at which stage and how to incorporate different modalities. In this survey, we review methods that assist and augment generative models by retrieving multimodal knowledge, whose formats range from images, codes, tables, graphs, to audio. Such methods offer a promising solution to important concerns such as factuality, reasoning, interpretability, and robustness. By providing an in-depth review, this survey is expected to provide scholars with a deeper understanding of the methods' applications and encourage them to adapt existing techniques to the fast-growing field of LLMs.", "keywords": "Retrieval-augmented language models;Multimodality", "primary_area": "", "supplementary_material": "", "author": "Ruochen Zhao;Hailin Chen;Weishi Wang;Fangkai Jiao;Do Xuan Long;Chengwei Qin;Bosheng Ding;Xiaobao Guo;Minzhi Li;Xingxuan Li;Shafiq Joty", "authorids": "~Ruochen_Zhao1;~Hailin_Chen1;~Weishi_Wang2;~Fangkai_Jiao1;~Do_Xuan_Long1;~Chengwei_Qin1;~Bosheng_Ding1;~Xiaobao_Guo1;~Minzhi_Li1;~Xingxuan_Li1;~Shafiq_Joty1", "gender": "F;;M;M;M;M;M;F;F;M;M", "homepage": ";;;https://sparkjiao.github.io/;https://dxlong2000.github.io/;;;;https://www.linkedin.com/in/minzhi-li-b16930183/;https://xingxuanli.github.io/;https://raihanjoty.github.io/", "dblp": "253/2147;36/8249;;264/9981;317/0657.html;195/2732;277/9378;246/5846;;222/9407;62/2078", "google_scholar": ";oE4KrU0AAAAJ;P8TGNcoAAAAJ;_u8lwyIAAAAJ;uZyF8wwAAAAJ;;Bp8u4lgAAAAJ;WBkcnkEAAAAJ;;IqVxTDAAAAAJ;hR249csAAAAJ", "or_profile": "~Ruochen_Zhao1;~Hailin_Chen1;~Weishi_Wang2;~Fangkai_Jiao1;~Do_Xuan_Long1;~Chengwei_Qin1;~Bosheng_Ding1;~Xiaobao_Guo1;~Minzhi_Li1;~Xingxuan_Li1;~Shafiq_Joty1", "aff": "Nanyang Technological University;National Technological University;Nanyang Technological University;A*STAR;Nanyang Technological University ;Nanyang Technological University;Alibaba Group;Nanyang Technological University;I2R, A*STAR;Alibaba Group;SalesForce.com", "aff_domain": "ntu.edu.sg;ntu.edu;ntu.edu.sg;astar.edu.sg;e.ntu.edu.sg;ntu.edu.sg;alibaba-inc.com;ntu.edu.sg;i2r.a-star.edu.sg;alibaba-inc.com;salesforce.com", "position": "PhD student;PhD student;PhD student;PhD student;Undergrad student;PhD student;Researcher;PhD student;PhD student;PhD student;Principal Researcher", "bibtex": "@inproceedings{\nzhao2023retrieving,\ntitle={Retrieving Multimodal Information for Augmented Generation: A Survey},\nauthor={Ruochen Zhao and Hailin Chen and Weishi Wang and Fangkai Jiao and Do Xuan Long and Chengwei Qin and Bosheng Ding and Xiaobao Guo and Minzhi Li and Xingxuan Li and Shafiq Joty},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jLEnVo0RW3}\n}", "github": "", "project": "", "reviewers": "yUiZ;ojn8;4Ruw", "site": "https://openreview.net/forum?id=jLEnVo0RW3", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "4;3;4", "reproducibility": "", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 11, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-0670-6990;;;;0000-0002-3427-8540;;;", "linkedin": "esther-ruochen-zhao-855357150/;chenhailin/;;;;chengwei-qin-3401a1107/;ding-bosheng/;;;;", "aff_unique_index": "0;1;0;2;0;0;3;0;4;3;5", "aff_unique_norm": "Nanyang Technological University;National Technological University;Agency for Science, Technology and Research;Alibaba Group;A*STAR;Salesforce", "aff_unique_dep": ";;;;Institute for Infocomm Research;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.ntu.edu;https://www.a-star.edu.sg;https://www.alibaba.com;https://www.a-star.edu.sg;https://www.salesforce.com", "aff_unique_abbr": "NTU;NTU;A*STAR;Alibaba;A*STAR;Salesforce", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0;2;0;0;2;1", "aff_country_unique": "Singapore;United States;China" }, { "id": "jLmSsybvkR", "title": "Dior-CVAE: Pre-trained Language Models and Diffusion Priors for Variational Dialog Generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Current variational dialog models have employed pre-trained language models (PLMs) to parameterize the likelihood and posterior distributions. However, the Gaussian assumption made on the prior distribution is incompatible with these distributions, thus restricting the diversity of generated responses. These models also suffer from posterior collapse, i.e., the decoder tends to ignore latent variables and directly access information captured in the encoder through the cross-attention mechanism. In this work, we propose Dior-CVAE, a hierarchical conditional variational autoencoder (CVAE) with diffusion priors to address these challenges. We employ a diffusion model to increase the complexity of the prior distribution and its compatibility with the distributions produced by a PLM. Also, we propose memory dropout to the cross-attention mechanism, which actively encourages the use of latent variables for response generation. Overall, experiments across two commonly used open-domain dialog datasets show that our method can generate more diverse responses without large-scale dialog pre-training. Code is available at https://github.com/UKPLab/dior-cvae.", "keywords": "Dialogue Generation;Diffusion Model;Variational Auto-encoder", "primary_area": "", "supplementary_material": "", "author": "Tianyu Yang;Thy Thy Tran;Iryna Gurevych", "authorids": "~Tianyu_Yang3;~Thy_Thy_Tran1;~Iryna_Gurevych1", "gender": "M;F;", "homepage": ";;", "dblp": "120/8076-4;261/5373;", "google_scholar": "h73P9F0AAAAJ;TMoIBtoAAAAJ;", "or_profile": "~Tianyu_Yang3;~Thy_Thy_Tran1;~Iryna_Gurevych1", "aff": "Technische Universit\u00e4t Darmstadt;TU Darmstadt;", "aff_domain": "tu-darmstadt.de;tu-darmstadt.de;", "position": "Intern;Postdoc;", "bibtex": "@inproceedings{\nyang2023diorcvae,\ntitle={Dior-{CVAE}: Pre-trained Language Models and Diffusion Priors for Variational Dialog Generation},\nauthor={Tianyu Yang and Thy Thy Tran and Iryna Gurevych},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jLmSsybvkR}\n}", "github": "", "project": "", "reviewers": "YFb3;UuVZ;xnUx", "site": "https://openreview.net/forum?id=jLmSsybvkR", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "3;3;2", "reproducibility": "3;4;2", "correctness": "3;3;2", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-5645-7059;0000-0002-0627-9706;", "linkedin": ";;", "aff_unique_index": "0;0", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt", "aff_unique_dep": "", "aff_unique_url": "https://www.tu-darmstadt.de", "aff_unique_abbr": "TUD", "aff_campus_unique_index": "1", "aff_campus_unique": ";Darmstadt", "aff_country_unique_index": "0;0", "aff_country_unique": "Germany" }, { "id": "jMwvnqKTBG", "title": "Mind the Gap: Automated Corpus Creation for Enthymeme Detection and Reconstruction in Learner Arguments", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Writing strong arguments can be challenging for learners. It requires to select and arrange multiple argumentative discourse units (ADUs) in a logical and coherent way as well as to decide which ADUs to leave implicit, so called enthymemes. However, when important ADUs are missing, readers might not be able to follow the reasoning or understand the argument's main point. This paper introduces two new tasks for learner arguments: to identify gaps in arguments (enthymeme detection) and to fill such gaps (enthymeme reconstruction). Approaches to both tasks may help learners improve their argument quality. We study how corpora for these tasks can be created automatically by deleting ADUs from an argumentative text that are central to the argument and its quality, while maintaining the text's naturalness. Based on the ICLEv3 corpus of argumentative learner essays, we create 40,089 argument instances for enthymeme detection and reconstruction. Through manual studies, we provide evidence that the proposed corpus creation process leads to the desired quality reduction, and results in arguments that are similarly natural to those written by learners. Finally, first baseline approaches to enthymeme detection and reconstruction demonstrate the corpus' usefulness.", "keywords": "dataset;argumentation;argument mining;enthymeme", "primary_area": "", "supplementary_material": "", "author": "Maja Stahl;Nick D\u00fcsterhus;Mei-Hua Chen;Henning Wachsmuth", "authorids": "~Maja_Stahl1;~Nick_D\u00fcsterhus1;~Mei-Hua_Chen1;~Henning_Wachsmuth1", "gender": "M;F;;F", "homepage": "https://scholar.google.com/citations?user=rwv7LRAAAAAJ&hl=de&oi=ao;;https://www.ai.uni-hannover.de/en/institute/research-groups/nlp;https://www.ai.uni-hannover.de/de/", "dblp": ";127/0783;73/9281;331/4457", "google_scholar": ";;kPps-H8AAAAJ;https://scholar.google.de/citations?user=Yf5mImsAAAAJ", "or_profile": "~Nick_D\u00fcsterhus1;~Mei-Hua_Chen1;~Henning_Wachsmuth1;~Maja_Brinkmann1", "aff": "Universit\u00e4t Paderborn;Department of Foreign Languages and Literature, Tunghai University;Leibniz Universit\u00e4t Hannover;Leibniz Universit\u00e4t Hannover", "aff_domain": "uni-paderborn.de;thu.edu.tw;uni-hannover.de;uni-hannover.de", "position": "MS student;Associate Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nstahl2023mind,\ntitle={Mind the Gap: Automated Corpus Creation for Enthymeme Detection and Reconstruction in Learner Arguments},\nauthor={Maja Stahl and Nick D{\\\"u}sterhus and Mei-Hua Chen and Henning Wachsmuth},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jMwvnqKTBG}\n}", "github": "", "project": "", "reviewers": "P3fJ;432F;7rF4", "site": "https://openreview.net/forum?id=jMwvnqKTBG", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;2;4", "excitement": "3;4;4", "reproducibility": "4;4;4", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-3990-0404;0000-0003-2792-621X;0000-0002-5982-726X", "linkedin": ";meihua-chen-36335535/;;", "aff_unique_index": "0;1;2;2", "aff_unique_norm": "University of Paderborn;Tunghai University;Leibniz Universit\u00e4t Hannover", "aff_unique_dep": ";Department of Foreign Languages and Literature;", "aff_unique_url": "https://www.uni-paderborn.de;https://www.thu.edu.tw;https://www.leibniz.uni-hannover.de/", "aff_unique_abbr": "UPB;THU;LUH", "aff_campus_unique_index": "1", "aff_campus_unique": ";Taiwan", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Germany;China" }, { "id": "jPrl18r4RA", "title": "Meta-Learning Online Adaptation of Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models encode impressively broad world knowledge in their parameters. However, the knowledge in static language models falls out of date, limiting the model\u2019s effective \u201cshelf life.\u201d While online fine-tuning can reduce this degradation, we find that naively fine-tuning on a stream of documents leads to a low level of information uptake. We hypothesize that online fine-tuning does not sufficiently attend to important information. That is, the gradient signal from important tokens representing factual information is drowned out by the gradient from inherently noisy tokens, suggesting that a dynamic, context-aware learning rate may be beneficial. We therefore propose learning which tokens to upweight. We meta-train a small, autoregressive model to reweight the language modeling loss for each token during online fine-tuning, with the objective of maximizing the out-of-date base question-answering model\u2019s ability to answer questions about a document after a single weighted gradient step. We call this approach Context-aware Meta-learned Loss Scaling (CaMeLS). Across three different distributions of documents, our experiments find that CaMeLS provides substantially improved information uptake on streams of thousands of documents compared with standard fine-tuning and baseline heuristics for reweighting token losses.", "keywords": "meta-learning;question-answering;online learning;knowledge;adaptation", "primary_area": "", "supplementary_material": "", "author": "Nathan Zixia Hu;Eric Mitchell;Christopher D Manning;Chelsea Finn", "authorids": "~Nathan_Zixia_Hu1;~Eric_Mitchell1;~Christopher_D_Manning1;~Chelsea_Finn1", "gender": "M;M;M;F", "homepage": ";https://ericmitchell.ai;https://nlp.stanford.edu/~manning/;https://ai.stanford.edu/~cbfinn/", "dblp": ";238/0419;m/ChristopherDManning;131/1783", "google_scholar": ";q77J4fgAAAAJ;1zmDOdwAAAAJ;vfPE6hgAAAAJ", "or_profile": "~Nathan_Zixia_Hu1;~Eric_Mitchell1;~Christopher_D_Manning1;~Chelsea_Finn1", "aff": "Stanford University;Stanford University;Computer Science Department, Stanford University;Google", "aff_domain": "stanford.edu;stanford.edu;cs.stanford.edu;google.com", "position": "Undergrad student;PhD student;Full Professor;Research Scientist", "bibtex": "@inproceedings{\nhu2023metalearning,\ntitle={Meta-Learning Online Adaptation of Language Models},\nauthor={Nathan Zixia Hu and Eric Mitchell and Christopher D Manning and Chelsea Finn},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jPrl18r4RA}\n}", "github": "", "project": "", "reviewers": "2YUV;jgxp;kJwz;dDnG", "site": "https://openreview.net/forum?id=jPrl18r4RA", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "3;4;3;3", "excitement": "3;3;4;4", "reproducibility": "3;4;2;3", "correctness": "3;3;3;4", "rating_avg": 5.0, "confidence_avg": 3.25, "excitement_avg": 3.5, "reproducibility_avg": 3.0, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-7487-1744;0000-0001-6155-649X;", "linkedin": "nathan-hu-6598111a9/;;christopher-manning-011575/;", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Stanford University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.stanford.edu;https://www.google.com", "aff_unique_abbr": "Stanford;Google", "aff_campus_unique_index": "0;0;0;1", "aff_campus_unique": "Stanford;Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "jQcShOpcfM", "title": "DPP-TTS: Diversifying prosodic features of speech via determinantal point processes", "track": "main", "status": "Long Main", "tldr": "", "abstract": "With the rapid advancement in deep generative models, recent neural Text-To-Speech(TTS) models have succeeded in synthesizing human-like speech. There have been some efforts to generate speech with various prosody beyond monotonous prosody patterns.\nHowever, previous works have several limitations. First, typical TTS models depend on the scaled sampling temperature for boosting the diversity of prosody. Speech samples generated at high sampling temperatures often lack perceptual prosodic diversity, which can adversely affect the naturalness of the speech. Second, the diversity among samples is neglected since the sampling procedure often focuses on a single speech sample rather than multiple ones. In this paper, we propose DPP-TTS: a text-to-speech model based on Determinantal Point Processes (DPPs) with a prosody diversifying module. Our TTS model is capable of generating speech samples that simultaneously consider perceptual diversity in each sample and among multiple samples. We demonstrate that DPP-TTS generates speech samples with more diversified prosody than baselines in the side-by-side comparison test considering the naturalness of speech at the same time.", "keywords": "Speech prosody; prosodic segmentation; text-to-speech", "primary_area": "", "supplementary_material": "", "author": "Seongho Joo;Hyukhun Koh;Kyomin Jung", "authorids": "~Seongho_Joo1;~Hyukhun_Koh1;~Kyomin_Jung1", "gender": ";Not Specified;M", "homepage": "https://sites.google.com/view/jsh1006/%ED%99%88;https://hyukhunkoh-ai.github.io/;http://milab.snu.ac.kr/kjung/index.html", "dblp": "359/4619.html;344/0846;48/3867", "google_scholar": ";;https://scholar.google.co.kr/citations?user=u3uMl4MAAAAJ", "or_profile": "~Seongho_Joo1;~Hyukhun_Koh1;~Kyomin_Jung1", "aff": "Seoul National University;;Seoul National University", "aff_domain": "snu.ac.kr;;snu.ac.kr", "position": "PhD student;;Full Professor", "bibtex": "@inproceedings{\njoo2023dpptts,\ntitle={{DPP}-{TTS}: Diversifying prosodic features of speech via determinantal point processes},\nauthor={Seongho Joo and Hyukhun Koh and Kyomin Jung},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jQcShOpcfM}\n}", "github": "", "project": "", "reviewers": "a3rZ;QLVm;gArD", "site": "https://openreview.net/forum?id=jQcShOpcfM", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;3", "excitement": "4;3;4", "reproducibility": "3;4;4", "correctness": "3;3;3", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";https://www.linkedin.com/hyukhun-koh-593283283;", "aff_unique_index": "0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "South Korea" }, { "id": "jQozdfjJSZ", "title": "MingOfficial: A Ming Official Career Dataset and a Historical Context-Aware Representation Learning Framework", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In Chinese studies, understanding the nuanced traits of historical figures, often not explicitly evident in biographical data, has been a key interest. \nHowever, identifying these traits can be challenging due to the need for domain expertise, specialist knowledge, and context-specific insights, making the process time-consuming and difficult to scale. \nOur focus on studying officials from China's Ming Dynasty is no exception.\nTo tackle this challenge, we propose MingOfficial, a large-scale multi-modal dataset consisting of both structured (career records, annotated personnel types) and text (historical texts) data for $9,376$ officials.\nWe further couple the dataset with a a graph neural network (GNN) to combine both modalities in order to allow investigation of social structures and provide features to boost down-stream tasks.\nExperiments show that our proposed MingOfficial could enable exploratory analysis of official identities, and also significantly boost performance in tasks such as identifying nuance identities (e.g.\\ civil officials holding military power) from $24.6\\%$ to $98.2\\%$ F$_1$ score in hold-out test set. By making MingOfficial publicly available (see main text for the URL) as both a dataset and an interactive tool, we aim to stimulate further research into the role of social context and representation learning in identifying individual characteristics, and hope to provide inspiration for computational approaches in other fields beyond Chinese studies.", "keywords": "graph representation learning;graph neural network;Ming Dynasty", "primary_area": "", "supplementary_material": "", "author": "You-Jun Chen;Hsin-Yi Hsieh;Yu Tung Lin;Yingtao Tian;Bert Chan;Yu-Sin Liu;Yi-Hsuan Lin;Richard Tzong-Han Tsai", "authorids": "~You-Jun_Chen1;~Hsin-Yi_Hsieh1;~Yu_Tung_Lin1;~Yingtao_Tian1;~Bert_Chan1;~Yu-Sin_Liu1;~Yi-Hsuan_Lin1;~Richard_Tzong-Han_Tsai1", "gender": "F;F;F;;F;;M;", "homepage": ";https://www.linkedin.com/in/hsinmosyi;;https://alantian.net/;;https://github.com/juliaouo;;https://chakazul.github.io/", "dblp": ";;;180/5335;;;t/TzongHanTsai;232/3924", "google_scholar": ";;;17Fe5K0AAAAJ;;;;", "or_profile": "~You-Jun_Chen1;~Hsin-Yi_Hsieh1;~Yu_Tung_Lin1;~Yingtao_Tian1;~Yu-Sin_Liu1;~Yi-Hsuan_Lin1;~Richard_Tzong-Han_Tsai1;~Bert_Wang-Chak_Chan1", "aff": "New York University;National Central University;National Central University;Google;National Central University;National Central University;National Central University;Google", "aff_domain": "nyu.edu;ncu.edu.tw;ncu.edu.tw;google.com;ncu.edu.tw;ncu.edu.tw;ncu.edu.tw;google.com", "position": "MS student;MS student;MS student;Research Scientist;Undergrad student;Undergrad student;Full Professor;Researcher", "bibtex": "@inproceedings{\nchen2023mingofficial,\ntitle={MingOfficial: A Ming Official Career Dataset and a Historical Context-Aware Representation Learning Framework},\nauthor={You-Jun Chen and Hsin-Yi Hsieh and Yu Tung Lin and Yingtao Tian and Bert Chan and Yu-Sin Liu and Yi-Hsuan Lin and Richard Tzong-Han Tsai},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jQozdfjJSZ}\n}", "github": "", "project": "", "reviewers": "zST9;qxZu;ZgKy", "site": "https://openreview.net/forum?id=jQozdfjJSZ", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;4;4", "reproducibility": "5;4;4", "correctness": "4;4;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0009-0009-6190-105X;;;0009-0006-5398-9053", "linkedin": "naomiyjchen/;;judy-lin-3b9771247/;;yusinliu/;;;", "aff_unique_index": "0;1;1;2;1;1;1;2", "aff_unique_norm": "New York University;National Central University;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.nyu.edu;https://www.ncu.edu.tw;https://www.google.com", "aff_unique_abbr": "NYU;NCU;Google", "aff_campus_unique_index": "1;1;2;1;1;1;2", "aff_campus_unique": ";Taiwan;Mountain View", "aff_country_unique_index": "0;1;1;0;1;1;1;0", "aff_country_unique": "United States;China" }, { "id": "jSu7hAIZM0", "title": "Preserving Privacy Through Dememorization: An Unlearning Technique For Mitigating Memorization Risks In Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large Language models (LLMs) are trained on vast amounts of data, including sensitive information that poses a risk to personal privacy if exposed. LLMs have shown the ability to memorize and reproduce portions of their training data when prompted by adversaries. Prior research has focused on addressing this memorization issue and preventing verbatim replication through techniques like knowledge unlearning and data pre-processing. However, these methods have limitations regarding the number of protected samples, limited privacy types, and potentially lower-quality generative models. To tackle this challenge more effectively, we propose \u201cDeMem,\u201d a novel unlearning approach that utilizes an efficient reinforcement learning feedback loop via proximal policy optimization. By fine-tuning the language model with a negative similarity score as a reward signal, we incentivize the LLMs to learn a paraphrasing policy to unlearn the pre-training data. Our experiments demonstrate that DeMem surpasses strong baselines and state-of-the-art methods in terms of its ability to generalize and strike a balance between maintaining privacy and LLM performance.", "keywords": "Large langauge models;privacy;memorization", "primary_area": "", "supplementary_material": "", "author": "Aly M. Kassem;Omar Mahmoud;Sherif Saad", "authorids": "~Aly_M._Kassem1;~Omar_Mahmoud1;~Sherif_Saad1", "gender": "M;M;M", "homepage": "https://www.uwindsor.ca/science/computerscience/85202/dr-sherif-saad-ahmed;;", "dblp": "40/9034;336/9109;243/5284", "google_scholar": "https://scholar.google.ca/citations?user=GcpOMcQAAAAJ;KszgLh0AAAAJ;7P9VJEMAAAAJ", "or_profile": "~Sherif_Saad1;~Ali_M._Kassem1;~Omar_Mohamed_Ahmed2", "aff": "University of Windsor;University of Windsor;Deakin University", "aff_domain": "uwindsor.ca;uwindsor.ca;deakin.edu.au", "position": "Associate Professor;MS student;PhD student", "bibtex": "@inproceedings{\nkassem2023preserving,\ntitle={Preserving Privacy Through Dememorization: An Unlearning Technique For Mitigating Memorization Risks In Language Models},\nauthor={Aly M. Kassem and Omar Mahmoud and Sherif Saad},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jSu7hAIZM0}\n}", "github": "", "project": "", "reviewers": "yMwJ;N84C;mR2L", "site": "https://openreview.net/forum?id=jSu7hAIZM0", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;3;2", "excitement": "3;2;3", "reproducibility": "3;1;3", "correctness": "2;2;3", "rating_avg": 3.0, "confidence_avg": 2.3333333333333335, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 2.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "ebinsaad/;aly-kassem/;omarmohamed88/", "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Windsor;Deakin University", "aff_unique_dep": ";", "aff_unique_url": "https://www.uwindsor.ca;https://www.deakin.edu.au", "aff_unique_abbr": "UWindsor;Deakin", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Canada;Australia" }, { "id": "jTiJPDv82w", "title": "ToxicChat: Unveiling Hidden Challenges of Toxicity Detection in Real-World User-AI Conversation", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Despite remarkable advances that large language models have achieved in chatbots nowadays, maintaining a non-toxic user-AI interactive environment has become increasingly critical nowadays. However, previous efforts in toxicity detection have been mostly based on benchmarks derived from social media contents, leaving the unique challenges inherent to real-world user-AI interactions insufficiently explored. In this work, we introduce ToxicChat, a novel benchmark constructed based on real user queries from an open-source chatbot. This benchmark contains the rich, nuanced phenomena that can be tricky for current toxicity detection models to identify, revealing a significant domain difference when compared to social media contents. Our systematic evaluation of models trained on existing toxicity datasets has shown their shortcomings when applied to this unique domain of ToxicChat. Our work illuminates the potentially overlooked challenges of toxicity detection in real-world user-AI conversations. In the future, ToxicChat can be a valuable resource to drive further advancements toward building a safe and healthy environment for user-AI interactions.", "keywords": "Toxicity;Real-World User-AI Interaction;Domain Adaptation;LLM-based Chatbots", "primary_area": "", "supplementary_material": "", "author": "Zi Lin;Zihan Wang;Yongqi Tong;Yangkun Wang;Yuxin Guo;Yujia Wang;Jingbo Shang", "authorids": "~Zi_Lin1;~Zihan_Wang1;~Yongqi_Tong2;~Yangkun_Wang1;~Yuxin_Guo3;~Yujia_Wang4;~Jingbo_Shang2", "gender": "F;M;M;;F;F;M", "homepage": "https://zi-lin.com/;https://zihanwangki.github.io/;;;https://www.linkedin.com/in/yuxin-guo-63a019204/;;https://shangjingbo1226.github.io/", "dblp": "81/2999;152/5077-1;;;;;151/3145.html", "google_scholar": "kgZYttUAAAAJ;6UWtYZQAAAAJ;;;;;0SkFI4MAAAAJ", "or_profile": "~Zi_Lin1;~Zihan_Wang1;~Yongqi_Tong2;~Yangkun_Wang1;~Yuxin_Guo3;~Yujia_Wang4;~Jingbo_Shang2", "aff": "University of California, San Diego;University of California, San Diego;University of California, San Diego;;University of California, San Diego;University of California, San Diego;University of California, San Diego", "aff_domain": "ucsd.edu;ucsd.edu;ucsd.edu;;ucsd.edu;ucsd.edu;ucsd.edu", "position": "Graduate student;PhD student;MS student;;Undergrad student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nlin2023toxicchat,\ntitle={ToxicChat: Unveiling Hidden Challenges of Toxicity Detection in Real-World User-{AI} Conversation},\nauthor={Zi Lin and Zihan Wang and Yongqi Tong and Yangkun Wang and Yuxin Guo and Yujia Wang and Jingbo Shang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jTiJPDv82w}\n}", "github": "", "project": "", "reviewers": "v1Tj;vR6f;nmU3", "site": "https://openreview.net/forum?id=jTiJPDv82w", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;4;4", "excitement": "2;3;3", "reproducibility": "3;1;3", "correctness": "2;2;3", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 2.3333333333333335, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;", "linkedin": "zi-lin/;;yongqi-tong-35118517b/;;;yujia-wang-joy/;", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "jUgBvYwc50", "title": "ZARA: Improving Few-Shot Self-Rationalization for Small Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Language models (LMs) that jointly generate end-task answers as well as free-text rationales are known as self-rationalization models.\nRecent works demonstrate great performance gain for self-rationalization by few-shot prompting LMs with rationale-augmented exemplars.\nHowever, the ability to benefit from explanations only emerges with large-scale LMs, which have poor accessibility.\nIn this work, we explore the less-studied setting of leveraging explanations for small LMs to improve few-shot self-rationalization.\nWe first revisit the relationship between rationales and answers.\nInspired by the implicit mental process of how human beings assess explanations, we present a novel approach, Zero-shot Augmentation of Rationale-Answer pairs (ZARA), to automatically construct pseudo-parallel data for self-training by reducing the problem of plausibility judgement to natural language inference.\nExperimental results show ZARA achieves SOTA performance on the FEB benchmark, for both the task accuracy and the explanation metric.\nIn addition, we conduct human and quantitative evaluation validating ZARA's ability to automatically identify plausible and accurate rationale-answer pairs.", "keywords": "free-text explanation;rationale;self-rationalization", "primary_area": "", "supplementary_material": "", "author": "Wei-Lin Chen;An-Zi Yen;Cheng-Kuang Wu;Hen-Hsen Huang;Hsin-Hsi Chen", "authorids": "~Wei-Lin_Chen1;~An-Zi_Yen1;~Cheng-Kuang_Wu1;~Hen-Hsen_Huang1;~Hsin-Hsi_Chen2", "gender": ";F;M;M;M", "homepage": "https://wlchen0206.github.io/;https://azyen0522.github.io/;https://brian-ckwu.github.io/;https://homepage.iis.sinica.edu.tw/pages/hhhuang/;http://nlg.csie.ntu.edu.tw/advisor.php", "dblp": "72/7187;204/3583;88/415;23/10489;84/3130.html", "google_scholar": "https://scholar.google.com.tw/citations?user=Hrbne1wAAAAJ;https://scholar.google.com/citations?hl=zh-TW;hc_e7rsAAAAJ;https://scholar.google.com/citations?hl=en;CRth4q4AAAAJ", "or_profile": "~Wei-Lin_Chen1;~An-Zi_Yen1;~Cheng-Kuang_Wu1;~Hen-Hsen_Huang1;~Hsin-Hsi_Chen2", "aff": "National Taiwan University;Department of Computer Science, National Yang Ming Chiao Tung University;National Taiwan University;Institute of Information Science, Academia Sinica;National Taiwan University", "aff_domain": "ntu.edu.tw;nycu.edu.tw;csie.ntu.edu.tw;iis.sinica.edu.tw;ntu.edu.tw", "position": "MS student;Assistant Professor;MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nchen2023zara,\ntitle={{ZARA}: Improving Few-Shot Self-Rationalization for Small Language Models},\nauthor={Wei-Lin Chen and An-Zi Yen and Cheng-Kuang Wu and Hen-Hsen Huang and Hsin-Hsi Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jUgBvYwc50}\n}", "github": "", "project": "", "reviewers": "x5eQ;JA6T;wxRY", "site": "https://openreview.net/forum?id=jUgBvYwc50", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;3;4", "reproducibility": "3;3;4", "correctness": "4;2;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-0740-0846;0000-0001-9169-3081;0000-0001-9757-9423", "linkedin": ";;cheng-kuang-wu-062214219/;;", "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "National Taiwan University;National Yang Ming Chiao Tung University;Academia Sinica", "aff_unique_dep": ";Department of Computer Science;Institute of Information Science", "aff_unique_url": "https://www.ntu.edu.tw;https://www.nctu.edu.tw;https://www.sinica.edu.tw", "aff_unique_abbr": "NTU;NYCU;AS", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "jUkDEaE0fK", "title": "LDM$^2$: A Large Decision Model Imitating Human Cognition with Dynamic Memory Enhancement", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "With the rapid development of large language models (LLMs), it is highly demanded that LLMs can be adopted to make decisions to enable the artificial general intelligence. \nMost approaches leverage manually crafted examples to prompt the LLMs to imitate the decision process of human. However, designing optimal prompts is difficult and the patterned prompts can hardly be generalized to more complex environments. \nIn this paper, we propose a novel model named Large Decision Model with Memory (LDM$^2$), which leverages a dynamic memory mechanism to construct dynamic prompts, guiding the LLMs in making proper decisions according to the faced state.\nLDM$^2$ consists of two stages: memory formation and memory refinement.\nIn the former stage, human behaviors are decomposed into state-action tuples utilizing the powerful summarizing ability of LLMs. Then, these tuples are stored in the memory, whose indices are generated by the LLMs, to facilitate the retrieval of the most relevant subset of memorized tuples based on the current state.\nIn the latter stage, our LDM$^2$ employs tree exploration to discover more suitable decision processes and enrich the memory by adding valuable state-action tuples.\nThe dynamic circle of exploration and memory enhancement provides LDM$^2$ a better understanding of the global environment.\nExtensive experiments conducted in two interactive environments have shown that our LDM$^2$ outperforms the baselines in terms of both score and success rate, which demonstrates its effectiveness.", "keywords": "Large Language Model;Decision Making;Memory Enhanced", "primary_area": "", "supplementary_material": "", "author": "Xingjin Wang;Linjing Li;Daniel Dajun Zeng", "authorids": "~Xingjin_Wang1;~Linjing_Li1;~Daniel_Dajun_Zeng1", "gender": "M;M;M", "homepage": "https://people.ucas.edu.cn/~ljli;;", "dblp": "41/9180;z/DanielDajunZeng;", "google_scholar": "7QO2H6wAAAAJ;d-tAMlYAAAAJ;", "or_profile": "~Linjing_Li1;~Daniel_Dajun_Zeng1;~Wang_Xingjin1", "aff": "Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;ia.ac.cn;ia.ac.cn", "position": "Full Professor;Full Professor;PhD student", "bibtex": "@inproceedings{\nwang2023ldm,\ntitle={{LDM}\\${\\textasciicircum}2\\$: A Large Decision Model Imitating Human Cognition with Dynamic Memory Enhancement},\nauthor={Xingjin Wang and Linjing Li and Daniel Dajun Zeng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jUkDEaE0fK}\n}", "github": "", "project": "", "reviewers": "c7Lp;gqCr;X6GX", "site": "https://openreview.net/forum?id=jUkDEaE0fK", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "3;2;3", "reproducibility": "5;3;3", "correctness": "3;2;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 2.6666666666666665, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-8737-099X;0000-0002-9046-222X;0000-0002-2354-9632", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation", "aff_unique_url": "http://www.ia.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "jUrRIcedTN", "title": "Modeling Highlighting of Metaphors in Multitask Contrastive Learning Paradigms", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Metaphorical language, such as ``spending time together'', projects meaning from a source domain (here, $\\textit{money}$) to a target domain ($\\textit{time}$). Thereby, it highlights certain aspects of the target domain, such as the $\\textit{effort}$ behind the time investment. Highlighting aspects with metaphors (while hiding others) bridges the two domains and is the core of metaphorical meaning construction. For metaphor interpretation, linguistic theories stress that identifying the highlighted aspects is important for a better understanding of metaphors. However, metaphor research in NLP has not yet dealt with the phenomenon of highlighting. In this paper, we introduce the task of identifying the main aspect highlighted in a metaphorical sentence. Given the inherent interaction of source domains and highlighted aspects, we propose two multitask approaches - a joint learning approach and a continual learning approach - based on a finetuned contrastive learning model to jointly predict highlighted aspects and source domains. We further investigate whether (predicted) information about a source domain leads to better performance in predicting the highlighted aspects, and vice versa. Our experiments on an existing corpus suggest that, with the corresponding information, the performance to predict the other improves in terms of model accuracy in predicting highlighted aspects and source domains notably compared to the single-task baselines.", "keywords": "Metaphor;Highlighted Aspect;Source Domain;Multitask Learning;Contrastive Learning", "primary_area": "", "supplementary_material": "", "author": "Meghdut Sengupta;Milad Alshomary;Ingrid Scharlau;Henning Wachsmuth", "authorids": "~Meghdut_Sengupta1;~Milad_Alshomary1;~Ingrid_Scharlau1;~Henning_Wachsmuth1", "gender": "M;M;F;", "homepage": ";;;https://www.ai.uni-hannover.de/en/institute/research-groups/nlp", "dblp": "362/8515;160/8727;;73/9281", "google_scholar": "lWKkt3cAAAAJ;mD9n_KgAAAAJ;;kPps-H8AAAAJ", "or_profile": "~Meghdut_Sengupta1;~Milad_Alshomary1;~Ingrid_Scharlau1;~Henning_Wachsmuth1", "aff": "Universit\u00e4t Hannover;;Paderborn University;Leibniz Universit\u00e4t Hannover", "aff_domain": "uni-hannover.de;;upb.de;uni-hannover.de", "position": "PhD student;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nsengupta2023modeling,\ntitle={Modeling Highlighting of Metaphors in Multitask Contrastive Learning Paradigms},\nauthor={Meghdut Sengupta and Milad Alshomary and Ingrid Scharlau and Henning Wachsmuth},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jUrRIcedTN}\n}", "github": "", "project": "", "reviewers": "u9Nz;hYfy;onsm", "site": "https://openreview.net/forum?id=jUrRIcedTN", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "4;2;2", "reproducibility": "5;4;3", "correctness": "4;4;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-2364-9489;0000-0003-2792-621X", "linkedin": "meghdut-sengupta-22b59a84/;;;", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Hanover;Paderborn University;Leibniz Universit\u00e4t Hannover", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-hannover.de;https://www.upb.de/;https://www.leibniz.uni-hannover.de/", "aff_unique_abbr": "Uni Hanover;UPB;LUH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "jVa7tFQw9N", "title": "Automatic Evaluation of Attribution by Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "A recent focus of large language model (LLM) development, as exemplified by generative search engines, is to incorporate external references to generate and support its claims. However, evaluating the attribution, i.e., verifying whether the generated statement is fully supported by the cited reference, remains an open problem. Although human evaluation is common practice, it is costly and time-consuming. In this paper, we investigate automatic evaluation of attribution given by LLMs. We begin by defining different types of attribution errors, and then explore two approaches for automatic evaluation: prompting LLMs and fine-tuning smaller LMs. The fine-tuning data is repurposed from related tasks such as question answering, fact-checking, natural language inference, and summarization. We manually curate a set of test examples covering 12 domains from a generative search engine, New Bing. Our results on this curated test set and simulated examples from existing benchmarks highlight both promising signals and challenges. We hope our problem formulation, testbeds, and findings will help lay the foundation for future studies on this important problem.", "keywords": "Large Language Models; Attribution Evaluation; Attribution of LLMs; Evaluation of LLMs;", "primary_area": "", "supplementary_material": "", "author": "Xiang Yue;Boshi Wang;Ziru Chen;Kai Zhang;Yu Su;Huan Sun", "authorids": "~Xiang_Yue1;~Boshi_Wang2;~Ziru_Chen1;~Kai_Zhang10;~Yu_Su2;~Huan_Sun1", "gender": ";M;M;M;M;F", "homepage": ";https://boshi-wang.github.io/;https://ronch99.github.io/;https://drogozhang.github.io;http://ysu1989.github.io;https://u.osu.edu/ihudas/people/", "dblp": ";216/7905;200/8335;55/957-33;38/1070-1;33/2952-1.html", "google_scholar": ";https://scholar.google.com/citations?hl=en;1-pt7zMAAAAJ;sDnAIsgAAAAJ;rIh5OqoAAAAJ;wIFkulcAAAAJ", "or_profile": "~Xiang_Yue1;~Boshi_Wang2;~Ziru_Chen1;~Kai_Zhang10;~Yu_Su2;~Huan_Sun1", "aff": ";Ohio State University;Ohio State University, Columbus;Google DeepMind;Microsoft;The Ohio State University, Columbus", "aff_domain": ";osu.edu;osu.edu;google.com;microsoft.com;osu.edu", "position": ";PhD student;PhD student;Student Researcher;Senior Researcher;Associate Professor", "bibtex": "@inproceedings{\nyue2023automatic,\ntitle={Automatic Evaluation of Attribution by Large Language Models},\nauthor={Xiang Yue and Boshi Wang and Ziru Chen and Kai Zhang and Yu Su and Huan Sun},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jVa7tFQw9N}\n}", "github": "", "project": "", "reviewers": "DLKU;82hu;XrPK;BA69", "site": "https://openreview.net/forum?id=jVa7tFQw9N", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "2;4;3;3", "excitement": "3;4;3;3", "reproducibility": "4;4;4;3", "correctness": "3;3;3;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.25, "reproducibility_avg": 3.75, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";;;kai-zhang-43774b196/;;huan-sun-81527924/?originalSubdomain=cn", "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Ohio State University;Google;Microsoft", "aff_unique_dep": ";Google DeepMind;Microsoft Corporation", "aff_unique_url": "https://www.osu.edu;https://deepmind.com;https://www.microsoft.com", "aff_unique_abbr": "OSU;DeepMind;Microsoft", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Columbus", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "jWL2GhQw5D", "title": "More than Votes? Voting and Language based Partisanship in the US Supreme Court", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Understanding the prevalence and dynamics of justice partisanship and ideology in the US Supreme Court is critical in studying jurisdiction. Most research quantifies partisanship based on voting behavior, and oral arguments in the courtroom --- the last essential procedure before the final case outcome --- have not been well studied for this purpose. To address this gap, we present a framework for analyzing the language of justices in the courtroom for partisan signals, and study how partisanship in speech aligns with voting patterns. \nOur results show that the affiliated party of justices can be predicted reliably from their oral contributions. We further show a strong correlation between language partisanship and voting ideology.", "keywords": "Fairness and Bias; Sociolinguistic; Cultural Analysis; Partisanship Analysis", "primary_area": "", "supplementary_material": "", "author": "Biaoyan Fang;Trevor Cohn;Timothy Baldwin;Lea Frermann", "authorids": "~Biaoyan_Fang1;~Trevor_Cohn1;~Timothy_Baldwin1;~Lea_Frermann2", "gender": ";M;;Not Specified", "homepage": "https://biaoyanf.github.io/;https://people.eng.unimelb.edu.au/tcohn/;https://eltimster.github.io/www/;http://www.frermann.de", "dblp": "262/6265;66/4613;65/4863;117/4041", "google_scholar": "3t8vpdsAAAAJ;https://scholar.google.com.au/citations?user=FCom398AAAAJ;wjBD1dkAAAAJ;https://scholar.google.co.uk/citations?user=y3l6y4IAAAAJ", "or_profile": "~Biaoyan_Fang1;~Trevor_Cohn1;~Timothy_Baldwin1;~Lea_Frermann2", "aff": "University of Melbourne;The University of Melbourne;The University of Melbourne;University of Melbourne", "aff_domain": "unimelb.edu;unimelb.edu.au;unimelb.edu.au;unimelb.edu", "position": "Postdoc;Professor;Full Professor;Lecturer", "bibtex": "@inproceedings{\nfang2023more,\ntitle={More than Votes? Voting and Language based Partisanship in the {US} Supreme Court},\nauthor={Biaoyan Fang and Trevor Cohn and Timothy Baldwin and Lea Frermann},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jWL2GhQw5D}\n}", "github": "", "project": "", "reviewers": "NGhd;2Gwe;HTaT;9L3i", "site": "https://openreview.net/forum?id=jWL2GhQw5D", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;4;4;4", "excitement": "3;3;3;3", "reproducibility": "4;4;5;4", "correctness": "4;4;3;4", "rating_avg": 3.0, "confidence_avg": 3.75, "excitement_avg": 3.0, "reproducibility_avg": 4.25, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-4525-6950;", "linkedin": ";;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Melbourne", "aff_unique_dep": "", "aff_unique_url": "https://www.unimelb.edu.au", "aff_unique_abbr": "UniMelb", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Australia" }, { "id": "jWqkEB3wJP", "title": "RobustEmbed: Robust Sentence Embeddings Using Self-Supervised Contrastive Pre-Training", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Pre-trained language models (PLMs) have demonstrated their exceptional performance across a wide range of natural language processing tasks. The utilization of PLM-based sentence embeddings enables the generation of contextual representations that capture rich semantic information. However, despite their success with unseen samples, current PLM-based representations suffer from poor robustness in adversarial scenarios. In this paper, we propose RobustEmbed, a self-supervised sentence embedding framework that enhances both generalization and robustness in various text representation tasks and against diverse adversarial attacks. By generating high-risk adversarial perturbations to promote higher invariance in the embedding space and leveraging the perturbation within a novel contrastive objective approach, RobustEmbed effectively learns high-quality sentence embeddings. Our extensive experiments validate the superiority of RobustEmbed over previous state-of-the-art self-supervised representations in adversarial settings, while also showcasing relative improvements in seven semantic textual similarity (STS) tasks and six transfer tasks. Specifically, our framework achieves a significant reduction in attack success rate from 75.51% to 39.62% for the BERTAttack attack technique, along with enhancements of 1.20% and 0.40% in STS tasks and transfer tasks, respectively.", "keywords": "Trustworthy Machine Learning;Text Representation;Adversarial Attacks;Self-Supervised Contrastive Learning", "primary_area": "", "supplementary_material": "", "author": "Javad Rafiei Asl;Eduardo Blanco;Daniel Takabi", "authorids": "~Javad_Rafiei_Asl1;~Eduardo_Blanco1;~Daniel_Takabi1", "gender": "M;M;", "homepage": "https://www.linkedin.com/in/javad-rafiei-asl-034b22a2/;https://eduardoblanco.github.io/;", "dblp": "204/0907.html;32/369-2;", "google_scholar": "rXfZo-8AAAAJ;AqGa3-MAAAAJ;", "or_profile": "~Javad_Rafiei_Asl1;~Eduardo_Blanco1;~Daniel_Takabi1", "aff": "Georgia State University;University of Arizona;", "aff_domain": "gsu.edu;arizona.edu;", "position": "PhD student;Associate Professor;", "bibtex": "@inproceedings{\nasl2023robustembed,\ntitle={RobustEmbed: Robust Sentence Embeddings Using Self-Supervised Contrastive Pre-Training},\nauthor={Javad Rafiei Asl and Eduardo Blanco and Daniel Takabi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jWqkEB3wJP}\n}", "github": "", "project": "", "reviewers": "hADL;ZN7y;a5gT", "site": "https://openreview.net/forum?id=jWqkEB3wJP", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;5;4", "excitement": "3;2;3", "reproducibility": "4;4;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-6154-1068;;", "linkedin": "javad-rafiei-asl-034b22a2/;;", "aff_unique_index": "0;1", "aff_unique_norm": "Georgia State University;University of Arizona", "aff_unique_dep": ";", "aff_unique_url": "https://www.gsu.edu;https://www.arizona.edu", "aff_unique_abbr": "GSU;UA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "jZXjHnzPyk", "title": "TrojanSQL: SQL Injection against Natural Language Interface to Database", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The technology of text-to-SQL has significantly enhanced the efficiency of accessing and manipulating databases.\nHowever, limited research has been conducted to study its vulnerabilities emerging from malicious user interaction. \nBy proposing TrojanSQL, a backdoor-based SQL injection framework for text-to-SQL systems,\nwe show how state-of-the-art text-to-SQL parsers can be easily misled to produce harmful SQL statements\nthat can invalidate user queries or compromise sensitive information about the database.\nThe study explores two specific injection attacks, namely $\\textit{boolean-based injection}$ and $\\textit{union-based injection}$,\nwhich use different types of triggers to achieve distinct goals in compromising the parser.\nExperimental results demonstrate that both medium-sized models based on fine-tuning and\nLLM-based parsers using prompting techniques are vulnerable to this type of attack,\nwith attack success rates as high as 99\\% and 89\\%, respectively.\nWe hope that this study will raise more concerns about the potential security risks of building natural language interfaces to databases.", "keywords": "text-to-SQL;NLIDB;security;SQL Injection;NL2Code", "primary_area": "", "supplementary_material": "", "author": "Jinchuan Zhang;Yan Zhou;Binyuan Hui;Yaxin Liu;Ziming Li;Songlin Hu", "authorids": "~Jinchuan_Zhang1;~Yan_Zhou8;~Binyuan_Hui1;~Yaxin_Liu3;~Ziming_Li3;~Songlin_Hu2", "gender": "M;F;F;;M;M", "homepage": ";;https://huybery.github.io/;;;http://people.ucas.ac.cn/~0000967?language=en", "dblp": ";;246/4699;;;67/4108-1.html", "google_scholar": "IoYMnlkAAAAJ;;RBb3ItMAAAAJ;0x7BYVwAAAAJ;;", "or_profile": "~Jinchuan_Zhang1;~Yan_Zhou8;~Binyuan_Hui1;~Yaxin_Liu3;~Ziming_Li3;~Songiln_Hu1", "aff": "University of Chinese Academy of Sciences;Institute of Information Engineering\uff0cChinese Academy of Sciences;Alibaba Group;Institute of Information Engineering, Chinese Academy of Sciences;University of Chinese Academy of Sciences;Institute of Information Engineering, Chinese Academy of Sciences", "aff_domain": "ucas.ac.cn;iie.ac.cn;alibaba-inc.com;iie.ac.cn;ucas.edu.cn;iie.ac.cn", "position": "PhD student;Assistant Professor;Researcher;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nzhang2023trojansql,\ntitle={Trojan{SQL}: {SQL} Injection against Natural Language Interface to Database},\nauthor={Jinchuan Zhang and Yan Zhou and Binyuan Hui and Yaxin Liu and Ziming Li and Songlin Hu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jZXjHnzPyk}\n}", "github": "", "project": "", "reviewers": "4wMe;ggYZ;28we", "site": "https://openreview.net/forum?id=jZXjHnzPyk", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "4;3;4", "reproducibility": "3;5;3", "correctness": "3;3;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-4960-0392;;;0000-0003-4111-5798;;", "linkedin": ";;;;;", "aff_unique_index": "0;1;2;1;0;1", "aff_unique_norm": "University of Chinese Academy of Sciences;Chinese Academy of Sciences;Alibaba Group", "aff_unique_dep": ";Institute of Information Engineering;", "aff_unique_url": "http://www.ucas.ac.cn;http://www.cas.cn;https://www.alibaba.com", "aff_unique_abbr": "UCAS;CAS;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "jbicunmyXh", "title": "LATENTLOGIC: Learning Logic Rules in Latent Space over Knowledge Graphs", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Learning logic rules for knowledge graph reasoning is essential as such rules provide interpretable explanations for reasoning and can be generalized to different domains. \nHowever, existing methods often face challenges such as searching in a vast search space (e.g., enumeration of relational paths or multiplication of high-dimensional matrices) and inefficient optimization (e.g., techniques\nbased on reinforcement learning or EM algorithm).\nTo address these limitations, this paper proposes a novel framework called LatentLogic to efficiently mine logic rules by controllable generation in the latent space.\nSpecifically, to map the discrete relational paths into the latent space, we leverage a pre-trained VAE and employ a discriminator to establish an energy-based distribution. \nAdditionally, we incorporate a sampler based on ordinary differential equations, enabling the efficient generation of logic rules in our approach.\nExtensive experiments on benchmark datasets demonstrate the effectiveness and efficiency of our proposed method.", "keywords": "Knowledge Graph;Reasoning;Logic Rule", "primary_area": "", "supplementary_material": "", "author": "Junnan Liu;Qianren Mao;Chenghua Lin;Yangqiu Song;Jianxin Li", "authorids": "~Junnan_Liu1;~Qianren_Mao4;~Chenghua_Lin1;~Yangqiu_Song1;~Jianxin_Li3", "gender": "M;M;;M;M", "homepage": "https://github.com/spankeran;;;https://www.cse.ust.hk/~yqsong/;http://myjianxin.github.io", "dblp": "206/8503;234/5350;;86/2159;l/JianxinLi-2.html", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=PnDqlPkAAAAJ;;MdQZ-q8AAAAJ;EY2lqD0AAAAJ", "or_profile": "~Junnan_Liu1;~Qianren_Mao4;~Chenghua_Lin1;~Yangqiu_Song1;~Jianxin_Li3", "aff": "Beihang University;Beihang University;;Hong Kong University of Science and Technology;Beihang University ", "aff_domain": "buaa.edu.cn;buaa.edu.cn;;ust.hk;buaa.edu.cn", "position": "MS student;PhD student;;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nliu2023latentlogic,\ntitle={{LATENTLOGIC}: Learning Logic Rules in Latent Space over Knowledge Graphs},\nauthor={Junnan Liu and Qianren Mao and Chenghua Lin and Yangqiu Song and Jianxin Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jbicunmyXh}\n}", "github": "", "project": "", "reviewers": "GJYL;rqqL;dB1w;Tpzv", "site": "https://openreview.net/forum?id=jbicunmyXh", "pdf_size": 0, "rating": "2;2;2;2", "confidence": "4;3;4;4", "excitement": "3;3;4;3", "reproducibility": "4;3;4;3", "correctness": "3;4;3;3", "rating_avg": 2.0, "confidence_avg": 3.75, "excitement_avg": 3.25, "reproducibility_avg": 3.5, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-0780-0628;;0000-0002-7818-6090;0000-0001-5152-0055", "linkedin": ";%E4%B9%BE%E4%BB%BB-%E6%AF%9B-574534326/;;yqsong/;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Beihang University;Hong Kong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.buaa.edu.cn/;https://www.ust.hk", "aff_unique_abbr": "BUAA;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "jcqBLHFcYA", "title": "MindGames: Targeting Theory of Mind in Large Language Models with Dynamic Epistemic Modal Logic", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Theory of Mind (ToM) is a critical component of intelligence but its assessment remains the subject of heated debates. Prior research applied human ToM assessments to natural language processing models using either human-created standardized tests or rule-based templates. However, these methods primarily focus on simplistic reasoning and require further validation. Here, we leverage dynamic epistemic logic to isolate a particular component of ToM and to generate controlled problems. We also introduce new verbalization techniques to express these problems in English natural language. Our findings indicate that some language model scaling (from 70M to 6B and 350M to 174B) does not consistently yield results better than random chance. While GPT-4 demonstrates superior epistemic reasoning capabilities, there is still room for improvement. Our code and datasets are publicly available.", "keywords": "logic;epistemic logic;theory of mind;language models", "primary_area": "", "supplementary_material": "", "author": "Damien Sileo;Antoine Lernould", "authorids": "~Damien_Sileo2;~Antoine_Lernould1", "gender": "M;Not Specified", "homepage": ";https://sileod.github.io/", "dblp": ";206/6447", "google_scholar": "mZm25vsAAAAJ;SIJPeoYAAAAJ", "or_profile": "~Antoine_Lernould1;~Damien_Sileo1", "aff": "Universit\u00e9 de Lille;INRIA", "aff_domain": "univ-lille.fr;inria.fr", "position": "MS student;Researcher", "bibtex": "@inproceedings{\nsileo2023mindgames,\ntitle={MindGames: Targeting Theory of Mind in Large Language Models with Dynamic Epistemic Modal Logic},\nauthor={Damien Sileo and Antoine Lernould},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jcqBLHFcYA}\n}", "github": "", "project": "", "reviewers": "6Ahj;fwC4;GnXA", "site": "https://openreview.net/forum?id=jcqBLHFcYA", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;5;3", "excitement": "3;3;4", "reproducibility": "5;3;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-3274-291X", "linkedin": ";", "aff_unique_index": "0;1", "aff_unique_norm": "Universit\u00e9 de Lille;INRIA", "aff_unique_dep": ";", "aff_unique_url": "https://www.univ-lille.fr;https://www.inria.fr", "aff_unique_abbr": "UdeL;INRIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "id": "jcx5YIN3Sd", "title": "That was the last straw, we need more: Are Translation Systems Sensitive to Disambiguating Context?", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The translation of ambiguous text presents a challenge for translation systems, as it requires using the surrounding context to disambiguate the intended meaning as much as possible. While prior work has studied ambiguities that result from different grammatical features of the source and target language, we study semantic ambiguities that exist in the source (English in this work) itself. In particular, we focus on idioms that are open to both literal and figurative interpretations (e.g., goose egg), and collect TIDE, a dataset of 512 pairs of English sentences containing idioms with disambiguating context such that one is literal (it laid a goose egg) and another is figurative (they scored a goose egg, as in a score of zero). In experiments, we compare MT-specific models and language models for (i) their preference when given an ambiguous subsentence, (ii) their sensitivity to disambiguating context, and (iii) the performance disparity between figurative and literal source sentences. We find that current MT models consistently translate English idioms literally, even when the context suggests a figurative interpretation. On the other hand, LMs are far more context-aware, although there remain disparities across target languages. Our findings underline the potential of LMs as a strong backbone for context-aware translation.", "keywords": "model evaluation;model analysis;dataset creation;machine translation;LM", "primary_area": "", "supplementary_material": "", "author": "Jaechan Lee;Alisa Liu;Orevaoghene Ahia;Hila Gonen;Noah A. Smith", "authorids": "~Jaechan_Lee1;~Alisa_Liu1;~Orevaoghene_Ahia1;~Hila_Gonen1;~Noah_A._Smith2", "gender": "M;F;;;", "homepage": ";https://alisawuffles.github.io/;;https://gonenhila.github.io/;", "dblp": ";;;167/5312;", "google_scholar": ";3-lTFAwAAAAJ;;URThmtMAAAAJ;", "or_profile": "~Jaechan_Lee1;~Alisa_Liu1;~Orevaoghene_Ahia1;~Hila_Gonen1;~Noah_A._Smith2", "aff": "Department of Computer Science;Google;;Meta Facebook;", "aff_domain": "cs.washington.edu;google.com;;facebook.com;", "position": "Undergrad student;Intern;;Postdoc;", "bibtex": "@inproceedings{\nlee2023that,\ntitle={That was the last straw, we need more: Are Translation Systems Sensitive to Disambiguating Context?},\nauthor={Jaechan Lee and Alisa Liu and Orevaoghene Ahia and Hila Gonen and Noah A. Smith},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jcx5YIN3Sd}\n}", "github": "", "project": "", "reviewers": "ZKwx;PHaX;eu4q", "site": "https://openreview.net/forum?id=jcx5YIN3Sd", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;4;3", "reproducibility": "3;3;3", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "jaechan-lee-8186b415a/;;;;", "aff_unique_index": "0;1;2", "aff_unique_norm": "Unknown Institution;Google;Meta", "aff_unique_dep": "Department of Computer Science;Google;Meta Platforms, Inc.", "aff_unique_url": ";https://www.google.com;https://meta.com", "aff_unique_abbr": ";Google;Meta", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "1;1", "aff_country_unique": ";United States" }, { "id": "jfaJdk29k4", "title": "Prompting Large Language Models with Chain-of-Thought for Few-Shot Knowledge Base Question Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The task of Question Generation over Knowledge Bases (KBQG) aims to convert a logical form into a natural language question. For\nthe sake of expensive cost of large-scale question annotation, the methods of KBQG under low-resource scenarios urgently need to be developed. However, current methods heavily rely on annotated data for fine-tuning, which is not well-suited for few-shot question generation. The emergence of Large Language Models (LLMs) has shown their impressive generalization ability in few-shot tasks. Inspired by Chain-of-Thought (CoT) prompting, which is an in-context learning strategy for reasoning, we formulate KBQG task as a reasoning problem, where the generation of a complete question is splitted into a series of sub-question generation. Our proposed prompting method KQG-CoT first retrieves supportive logical forms from the unlabeled data pool taking account of the characteristics of the logical form. Then, we write a prompt to explicit the reasoning chain of generating complicated questions based on the selected demonstrations. To further ensure prompt quality, we extend KQG-CoT into KQG-CoT+ via sorting the logical forms by their complexity. We conduct extensive experiments over three public KBQG datasets. The results demonstrate that our prompting method consistently outperforms other prompting baselines on the evaluated datasets. Remarkably, our KQG-CoT+ method could surpass existing few-shot SoTA results of the PathQuestions dataset by 18.25, 10.72, and 10.18 absolute points on BLEU-4, METEOR, and ROUGE-L, respectively.", "keywords": "Large Language Models;Chain-of-Thought;Few-Shot;Knowledge Base Question Generation", "primary_area": "", "supplementary_material": "", "author": "Yuanyuan Liang;Jianing Wang;Hanlun Zhu;Lei Wang;Weining Qian;Yunshi Lan", "authorids": "~Yuanyuan_Liang1;~Jianing_Wang4;~Hanlun_Zhu1;~Lei_Wang28;~Weining_Qian1;~Yunshi_Lan1", "gender": "M;M;M;;F;M", "homepage": ";https://github.com/timberflow;https://demoleiwang.github.io/HomePage/;;https://lanyunshi.github.io;https://github.com/wjn1996", "dblp": ";359/0727;;55/3364;185/6830.html;", "google_scholar": "egseNs8AAAAJ;-yKPeckAAAAJ;VidA02oAAAAJ;;Q0F92XIAAAAJ;ccaimI8AAAAJ", "or_profile": "~Yuanyuan_Liang1;~Hanlun_Zhu1;~Lei_Wang28;~Weining_Qian1;~Yunshi_Lan1;~Jia-ning_Wang1", "aff": "East China Normal University;East China Normal University;Singapore Management University;East China Normal University;East China Normal University;East China Normal University", "aff_domain": "stu.ecnu.edu.cn;stu.ecnu.edu.cn;smu.edu.sg;ecnu.edu.cn;ecnu.edu.cn;ecnu.edu.cn", "position": "PhD student;Undergrad student;PhD student;Full Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nliang2023prompting,\ntitle={Prompting Large Language Models with Chain-of-Thought for Few-Shot Knowledge Base Question Generation},\nauthor={Yuanyuan Liang and Jianing Wang and Hanlun Zhu and Lei Wang and Weining Qian and Yunshi Lan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jfaJdk29k4}\n}", "github": "", "project": "", "reviewers": "jeur;BZaK;PKjA", "site": "https://openreview.net/forum?id=jfaJdk29k4", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;4;4", "excitement": "3;3;4", "reproducibility": "3;3;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-0192-8498;0000-0001-6006-053X", "linkedin": ";;;;;", "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "East China Normal University;Singapore Management University", "aff_unique_dep": ";", "aff_unique_url": "http://www.ecnu.edu.cn;https://www.smu.edu.sg", "aff_unique_abbr": "ECNU;SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "China;Singapore" }, { "id": "jg2WCVrjhS", "title": "Uncertainty Guided Global Memory Improves Multi-Hop Question Answering", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Transformers have become the gold standard for many natural language processing tasks and, in particular, for multi-hop question answering (MHQA). This task includes processing a long document and reasoning over the multiple parts of it. The landscape of MHQA approaches can be classified into two primary categories. The first group focuses on extracting supporting evidence, thereby constraining the QA model's context to predicted facts. Conversely, the second group relies on the attention mechanism of the long input encoding model to facilitate multi-hop reasoning. However, attention-based token representations lack explicit global contextual information to connect reasoning steps. To address these issues, we propose GEMFormer, a two-stage method that first collects relevant information over the entire document to the memory and then combines it with local context to solve the task. Our experimental results show that fine-tuning a pre-trained model with memory-augmented input, including the most certain global elements, improves the model's performance on three MHQA datasets compared to the baseline. We also found that the global explicit memory contains information from supporting facts required for the correct answer.", "keywords": "Transformer;Memory;Multi-hop Question Answering", "primary_area": "", "supplementary_material": "", "author": "Alsu Sagirova;Mikhail Burtsev", "authorids": "~Alsu_Sagirova1;~Mikhail_Burtsev1", "gender": ";M", "homepage": ";", "dblp": ";95/11265", "google_scholar": ";t_PLQakAAAAJ", "or_profile": "~Alsu_Sagirova1;~Mikhail_Burtsev1", "aff": ";London Institute for Mathematical Sciences", "aff_domain": ";lims.ac.uk", "position": ";Researcher", "bibtex": "@inproceedings{\nsagirova2023uncertainty,\ntitle={Uncertainty Guided Global Memory Improves Multi-Hop Question Answering},\nauthor={Alsu Sagirova and Mikhail Burtsev},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jg2WCVrjhS}\n}", "github": "", "project": "", "reviewers": "E7Ta;Wfje;LDqA;yjSH", "site": "https://openreview.net/forum?id=jg2WCVrjhS", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "1;4;4;3", "excitement": "3;2;3;4", "reproducibility": "3;3;5;4", "correctness": "3;2;3;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.75, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0", "aff_unique_norm": "London Institute for Mathematical Sciences", "aff_unique_dep": "Mathematical Sciences", "aff_unique_url": "https://www.lims.ac.uk", "aff_unique_abbr": "LIMS", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "id": "jhdVt7rC8k", "title": "Large Language Models are Temporal and Causal Reasoners for Video Question Answering", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large Language Models (LLMs) have shown remarkable performances on a wide range of natural language understanding and generation tasks.\nWe observe that the LLMs provide effective priors in exploiting $\\textit{linguistic shortcuts}$ for temporal and causal reasoning in Video Question Answering (VideoQA).\nHowever, such priors often cause suboptimal results on VideoQA by leading the model to over-rely on questions, $\\textit{i.e.}$, $\\textit{linguistic bias}$, while ignoring visual content.\nThis is also known as 'ungrounded guesses' or 'hallucinations'.\nTo address this problem while leveraging LLMs' prior on VideoQA, we propose a novel framework, Flipped-VQA, encouraging the model to predict all the combinations of $\\langle$V, Q, A$\\rangle$ triplet by flipping the source pair and the target label to understand their complex relationships, $\\textit{i.e.}$, predict A, Q, and V given a VQ, VA, and QA pairs, respectively.\nIn this paper, we develop LLaMA-VQA by applying Flipped-VQA to LLaMA, and it outperforms both LLMs-based and non-LLMs-based models on five challenging VideoQA benchmarks.\nFurthermore, our Flipped-VQA is a general framework that is applicable to various LLMs (OPT and GPT-J) and consistently improves their performances.\nWe empirically demonstrate that Flipped-VQA not only enhances the exploitation of linguistic shortcuts but also mitigates the linguistic bias, which causes incorrect answers over-relying on the question.\nCode is available at https://github.com/mlvlab/Flipped-VQA.", "keywords": "large language models;temporal and causal reasoning", "primary_area": "", "supplementary_material": "", "author": "Dohwan Ko;Ji Soo Lee;Woo-Young Kang;Byungseok Roh;Hyunwoo J. Kim", "authorids": "~Dohwan_Ko1;~Ji_Soo_Lee1;~Woo-Young_Kang1;~Byungseok_Roh1;~Hyunwoo_J._Kim3", "gender": "M;;M;;M", "homepage": "https://ikodoh.github.io;https://github.com/simplewhite9;https://wykang.github.io/;;https://hyunwoojkim.com/publications", "dblp": "305/6692;59/2866;213/8452;258/1192;150/4259", "google_scholar": "JoYPLHUAAAAJ;2rgTuvkAAAAJ;;H4VWYHwAAAAJ;https://scholar.google.co.kr/citations?user=LfBoJt8AAAAJ", "or_profile": "~Dohwan_Ko1;~Ji_Soo_Lee1;~Woo-Young_Kang1;~Byungseok_Roh1;~Hyunwoo_Kim1", "aff": "Korea University;Korea University;Kakaobrain;Kakao Brain;Korea University", "aff_domain": "korea.ac.kr;korea.ac.kr;kakaobrain.com;kakaobrain.com;korea.ac.kr", "position": "PhD student;Undergrad student;Researcher;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nko2023large,\ntitle={Large Language Models are Temporal and Causal Reasoners for Video Question Answering},\nauthor={Dohwan Ko and Ji Soo Lee and Woo-Young Kang and Byungseok Roh and Hyunwoo J. Kim},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jhdVt7rC8k}\n}", "github": "", "project": "", "reviewers": "W2ex;peaG;iJms", "site": "https://openreview.net/forum?id=jhdVt7rC8k", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "4;4;4", "reproducibility": "4;3;3", "correctness": "4;5;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-2181-9264", "linkedin": "dohwan-ko-4b232a14b/;;;;", "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "Korea University;Kakao Brain", "aff_unique_dep": ";", "aff_unique_url": "https://www.korea.ac.kr;https://brain.kakao.com", "aff_unique_abbr": "KU;Kakao Brain", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "jjSOGqLT2X", "title": "Video-Helpful Multimodal Machine Translation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Existing multimodal machine translation (MMT) datasets consist of images and video captions or instructional video subtitles, which rarely contain linguistic ambiguity, making visual information ineffective in generating appropriate translations. Recent work has constructed an ambiguous subtitles dataset to alleviate this problem but is still limited to the problem that videos do not necessarily contribute to disambiguation.\n\nWe introduce EVA (Extensive training set and Video-helpful evaluation set for Ambiguous subtitles translation), an MMT dataset containing 852k Japanese-English parallel subtitle pairs, 520k Chinese-English parallel subtitle pairs, and corresponding video clips collected from movies and TV episodes. In addition to the extensive training set, EVA contains a video-helpful evaluation set in which subtitles are ambiguous, and videos are guaranteed helpful for disambiguation.\n\nFurthermore, we propose SAFA, an MMT model based on the Selective Attention model with two novel methods: Frame attention loss and Ambiguity augmentation, aiming to use videos in EVA for disambiguation fully. Experiments on EVA show that visual information and the proposed methods can boost translation performance, and our model performs significantly better than existing MMT models.", "keywords": "[Multimodal machine translation;Video]", "primary_area": "", "supplementary_material": "", "author": "Yihang Li;Shuichiro Shimizu;Chenhui Chu;Sadao Kurohashi;Wei Li", "authorids": "~Yihang_Li1;~Shuichiro_Shimizu1;~Chenhui_Chu1;~Sadao_Kurohashi1;~Wei_Li50", "gender": "M;M;M;M;F", "homepage": ";https://cromz22.github.io/;http://researchmap.jp/chu/?lang=en;https://nlp.ist.i.kyoto-u.ac.jp/member/kuro/index.html;", "dblp": ";312/5015;126/8755;42/2149;", "google_scholar": "https://scholar.google.com.hk/citations?user=-QjslucAAAAJ;_YhqXyUAAAAJ;https://scholar.google.co.jp/citations?user=6ef0qbgAAAAJ;https://scholar.google.co.jp/citations?user=gpKS5P0AAAAJ;bEL0CR4AAAAJ", "or_profile": "~Yihang_Li1;~Shuichiro_Shimizu1;~Chenhui_Chu1;~Sadao_Kurohashi1;~Wei_Li50", "aff": "Kyoto University, Kyoto University;Kyoto University;Kyoto University;Kyoto University;", "aff_domain": "i.kyoto-u.ac.jp;i.kyoto-u.ac.jp;kyoto-u.ac.jp;kyoto-u.ac.jp;", "position": "MS student;PhD student;Associate Professor;Full Professor;", "bibtex": "@inproceedings{\nli2023videohelpful,\ntitle={Video-Helpful Multimodal Machine Translation},\nauthor={Yihang Li and Shuichiro Shimizu and Chenhui Chu and Sadao Kurohashi and Wei Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jjSOGqLT2X}\n}", "github": "", "project": "", "reviewers": "7em6;Dww6;3gan", "site": "https://openreview.net/forum?id=jjSOGqLT2X", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;4;4", "reproducibility": "3;3;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0003-4929-8995;0000-0001-9848-6384;0000-0001-5398-8399;", "linkedin": ";shuichiro-shimizu-68666b232/;;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Kyoto University", "aff_unique_dep": "", "aff_unique_url": "https://www.kyoto-u.ac.jp", "aff_unique_abbr": "Kyoto U", "aff_campus_unique_index": "0", "aff_campus_unique": "Kyoto;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Japan" }, { "id": "jkI9KGEFQz", "title": "Support or Refute: Analyzing the Stance of Evidence to Detect Out-of-Context Mis- and Disinformation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Mis- and disinformation online have become a major societal problem as major sources of online harms of different kinds. One common form of mis- and disinformation is out-of-context (OOC) information, where different pieces of information are falsely associated, e.g., a real image combined with a false textual caption or a misleading textual description. Although some past studies have attempted to defend against OOC mis- and disinformation through external evidence, they tend to disregard the role of different pieces of evidence with different stances. Motivated by the intuition that the stance of evidence represents a bias towards different detection results, we propose a stance extraction network (SEN) that can extract the stances of different pieces of multi-modal evidence in a unified framework. Moreover, we introduce a support-refutation score calculated based on the co-occurrence relations of named entities into the textual SEN. Extensive experiments on a public large-scale dataset demonstrated that our proposed method outperformed the state-of-the-art baselines, with the best model achieving a performance gain of 3.2% in accuracy.", "keywords": "out-of-context;stance analysis;misinformation;Internet evidence", "primary_area": "", "supplementary_material": "", "author": "Xin Yuan;Jie Guo;Weidong Qiu;Zheng Huang;Shujun Li", "authorids": "~Xin_Yuan7;~Jie_Guo7;~Weidong_Qiu1;~Zheng_Huang1;~Shujun_Li2", "gender": "M;F;M;M;M", "homepage": ";https://infosec.sjtu.edu.cn/DirectoryDetail.aspx?id=143;https://infosec.sjtu.edu.cn/DirectoryDetail.aspx?id=34;https://infosec.sjtu.edu.cn/DirectoryDetail.aspx?id=17;http://www.hooklee.com/", "dblp": "78/713-11;;;;09/6954.html", "google_scholar": "https://scholar.google.com/citations?hl=en;;;;https://scholar.google.co.uk/citations?user=2DNYP3EAAAAJ", "or_profile": "~Xin_Yuan7;~Jie_Guo7;~Weidong_Qiu1;~Zheng_Huang1;~Shujun_Li1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;University of Kent", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;kent.ac.uk", "position": "MS student;Associate Professor;Full Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nyuan2023support,\ntitle={Support or Refute: Analyzing the Stance of Evidence to Detect Out-of-Context Mis- and Disinformation},\nauthor={Xin Yuan and Jie Guo and Weidong Qiu and Zheng Huang and Shujun Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jkI9KGEFQz}\n}", "github": "", "project": "", "reviewers": "RUdv;V426;2aq7", "site": "https://openreview.net/forum?id=jkI9KGEFQz", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;2;3", "excitement": "4;4;4", "reproducibility": "4;4;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0000-2042-0043;;;0009-0009-1753-5574;0000-0001-5628-7328", "linkedin": ";;;;hooklee/", "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Shanghai Jiao Tong University;University of Kent", "aff_unique_dep": ";", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.kent.ac.uk", "aff_unique_abbr": "SJTU;UKC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;United Kingdom" }, { "id": "jmopGajkFY", "title": "MEGA: Multilingual Evaluation of Generative AI", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Generative AI models have shown impressive performance on many Natural Language Processing tasks such as language understanding, reasoning, and language generation. An important question being asked by the AI community today is about the capabilities and limits of these models, and it is clear that evaluating generative AI is very challenging. Most studies on generative LLMs have been restricted to English and it is unclear how capable these models are at understanding and generating text in other languages. We present the first comprehensive benchmarking of generative LLMs - MEGA, which evaluates models on standard NLP benchmarks, covering 16 NLP datasets across 70 typologically diverse languages. We compare the performance of generative LLMs including Chat-GPT and GPT-4 to State of the Art (SOTA) non-autoregressive models on these tasks to determine how well generative models perform compared to the previous generation of LLMs. We present a thorough analysis of the performance of models across languages and tasks and discuss challenges in improving the performance of generative LLMs on low-resource languages. We create a framework for evaluating generative LLMs in the multilingual setting and provide directions for future progress in the field.", "keywords": "Large Language Models;Multilinguality;Evaluation;Low Resource Languages;Benchmarking", "primary_area": "", "supplementary_material": "", "author": "Kabir Ahuja;Harshita Diddee;Rishav Hada;Millicent Ochieng;Krithika Ramesh;Prachi Jain;Akshay Nambi;Tanuja Ganu;Sameer Segal;Mohamed Ahmed;Kalika Bali;Sunayana Sitaram", "authorids": "~Kabir_Ahuja1;~Harshita_Diddee1;~Rishav_Hada1;~Millicent_Ochieng1;~Krithika_Ramesh1;~Prachi_Jain3;~Akshay_Nambi1;~Tanuja_Ganu1;~Sameer_Segal1;~Mohamed_Ahmed1;~Kalika_Bali1;~Sunayana_Sitaram1", "gender": "M;F;M;F;F;;;;M;;F;F", "homepage": "https://kabirahuja2431.github.io/;https://harshitadd.netlify.app/;https://sites.google.com/view/rishavhada;https://millicentochieng.github.io/src/pages/index.html;https://kr-ramesh.github.io/;;;https://www.microsoft.com/en-us/research/people/taganu/;https://www.microsoft.com/en-us/research/people/sameersegal/;;https://www.microsoft.com/en-us/research/people/kalikab/;https://www.microsoft.com/en-us/research/people/susitara/", "dblp": "https://dblp.uni-trier.de/pid/265/5632;280/8888;;319/6802;255/2136.html;;;31/11538;;49/4653-1;19/5717;27/7642", "google_scholar": "xQ4sUrYAAAAJ;https://scholar.google.com/citations?mauthors=Harshita+Diddee;ctKGG_YAAAAJ;cjJQnDIAAAAJ;N5Wj_44AAAAJ;;;https://scholar.google.co.in/citations?user=uU9COWkAAAAJ;;_any3jgAAAAJ;HSIGxEgAAAAJ;PUxwYrkAAAAJ", "or_profile": "~Kabir_Ahuja1;~Harshita_Diddee1;~Rishav_Hada1;~Millicent_Ochieng1;~Krithika_Ramesh1;~Prachi_Jain3;~Akshay_Nambi1;~Tanuja_Ganu1;~Sameer_Segal1;~Mohamed_Ahmed1;~Kalika_Bali1;~Sunayana_Sitaram1", "aff": "Microsoft;Microsoft;Microsoft Research India;Microsoft;Johns Hopkins University;;;Microsoft;;Microsoft Research;Microsoft Research Labs;Microsoft", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;microsoft.com;jh.edu;;;microsoft.com;;research.microsoft.com;microsoft.com;microsoft.com", "position": "Research Fellow;Researcher;Researcher;Researcher;PhD student;;;Researcher;;Principal Researcher;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nahuja2023mega,\ntitle={{MEGA}: Multilingual Evaluation of Generative {AI}},\nauthor={Kabir Ahuja and Harshita Diddee and Rishav Hada and Millicent Ochieng and Krithika Ramesh and Prachi Jain and Akshay Nambi and Tanuja Ganu and Sameer Segal and Mohamed Ahmed and Kalika Bali and Sunayana Sitaram},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jmopGajkFY}\n}", "github": "", "project": "", "reviewers": "ppmZ;svC2;eRUE;1g3u", "site": "https://openreview.net/forum?id=jmopGajkFY", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;3;3;3", "excitement": "4;3;3;4", "reproducibility": "4;4;3;3", "correctness": "4;4;3;3", "rating_avg": 4.0, "confidence_avg": 3.25, "excitement_avg": 3.5, "reproducibility_avg": 3.5, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 12, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-0852-7371;;0000-0003-4769-7039;;;;;0009-0002-5076-4650;;0000-0001-9275-742X;", "linkedin": "kabirahuja2431/;harshita-diddee/;;millicent-ochieng-b8061a14b/;krithika-ramesh-06a22a16b/;;;;sameersegal/;;kalika-bali-b72bab9/;", "aff_unique_index": "0;0;0;0;1;0;0;0;0", "aff_unique_norm": "Microsoft;Johns Hopkins University", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://www.jhu.edu", "aff_unique_abbr": "Microsoft;JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0;0;0;0", "aff_country_unique": "United States;India" }, { "id": "jp80nsryCF", "title": "Transductive Learning for Textual Few-Shot Classification in API-based Embedding Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Proprietary and closed APIs are becoming increasingly common to process natural language, and are impacting the practical applications of natural language processing, including few-shot classification. Few-shot classification involves training a model to perform a new classification task with a handful of labeled data. This paper presents three contributions. First, we introduce a scenario where the embedding of a pre-trained model is served through a gated API with compute-cost and data-privacy constraints. Second, we propose a transductive inference, a learning paradigm that has been overlooked by the NLP community. Transductive inference, unlike traditional inductive learning, leverages the statistics of unlabelled data. We also introduce a new parameter-free transductive regularizer based on the Fisher-Rao loss, which can be used on top of the gated API embeddings. This method fully utilizes unlabelled data, does not share any label with the third-party API provider and could serve as a baseline for future research. Third, we propose an improved experimental setting and compile a benchmark of eight datasets involving multiclass classification in four different languages, with up to 151 classes. We evaluate our methods using eight backbone models, along with an episodic evaluation over 1,000 episodes, which demonstrate the superiority of transductive inference over the standard inductive setting.", "keywords": "NLP;few shot;text classifiers", "primary_area": "", "supplementary_material": "", "author": "Pierre Colombo;Victor Pellegrain;Malik Boudiaf;Myriam Tami;Victor Storchan;Ismail Ben Ayed;Pablo Piantanida", "authorids": "~Pierre_Colombo2;~Victor_Pellegrain1;~Malik_Boudiaf1;~Myriam_Tami1;~Victor_Storchan1;~Ismail_Ben_Ayed1;~Pablo_Piantanida2", "gender": "M;M;;;;M;M", "homepage": "https://pierrecolombo.github.io/;;;https://myriamtami.github.io/;https://www.linkedin.com/in/storchan/;https://profs.etsmtl.ca/ibenayed/;https://www.pablo-piantanida.org", "dblp": ";;;228/8539;;68/4478;44/1416", "google_scholar": "yPoMt8gAAAAJ;jdarCHMAAAAJ;;kavk5oUAAAAJ;;https://scholar.google.ca/citations?user=29vyUccAAAAJ;https://scholar.google.fr/citations?user=QyBEFv0AAAAJ", "or_profile": "~Pierre_Colombo2;~Victor_Pellegrain1;~Malik_Boudiaf1;~Myriam_Tami1;~Victor_Storchan1;~Ismail_Ben_Ayed1;~Pablo_Piantanida2", "aff": "CentraleSupelec;CentraleSupelec;\u00c9cole de technologie sup\u00e9rieure;CentraleSupelec;;\u00c9cole de technologie sup\u00e9rieure, Universit\u00e9 du Qu\u00e9bec;Mila - Quebec AI Institute ", "aff_domain": "centralesupelec.fr;centralesupelec.fr;etsmtl.ca;centralesupelec.fr;;etsmtl.ca;mila.quebec", "position": "Assistant Professor;PhD student;PhD student;Associate Professor;;Full Professor;Full Professor", "bibtex": "@inproceedings{\ncolombo2023transductive,\ntitle={Transductive Learning for Textual Few-Shot Classification in {API}-based Embedding Models},\nauthor={Pierre Colombo and Victor Pellegrain and Malik Boudiaf and Myriam Tami and Victor Storchan and Ismail Ben Ayed and Pablo Piantanida},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jp80nsryCF}\n}", "github": "", "project": "", "reviewers": "CwfZ;d1BR;RYzB", "site": "https://openreview.net/forum?id=jp80nsryCF", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "3;4;4", "reproducibility": "3;4;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-3665-7683;;;;;", "linkedin": ";victor-pellegrain-515557aa/;malik-boudiaf/;;;;pablo-piantanida-60a51bb5/?locale=en_US", "aff_unique_index": "0;0;1;0;2;3", "aff_unique_norm": "CentraleSup\u00e9lec;\u00c9cole de technologie sup\u00e9rieure;Universit\u00e9 du Qu\u00e9bec;Quebec AI Institute", "aff_unique_dep": ";;;AI Institute", "aff_unique_url": "https://www.centralesupelec.fr;https://www.etsmtl.ca;https://www.etsmtl.ca;https://mila.quebec", "aff_unique_abbr": "CS;ETS;ETS;Mila", "aff_campus_unique_index": "1", "aff_campus_unique": ";\u00c9cole de technologie sup\u00e9rieure", "aff_country_unique_index": "0;0;1;0;1;1", "aff_country_unique": "France;Canada" }, { "id": "jph8GlHueb", "title": "MUX-PLMs: Data Multiplexing for High-throughput Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The widespread adoption of large language models such as ChatGPT and Bard has led to unprecedented demand for these technologies. The burgeoning cost of inference for ever-increasing model sizes coupled with hardware shortages has limited affordable access and poses a pressing need for efficiency approaches geared towards high throughput and performance. Multi-input multi-output (MIMO) algorithms such as data multiplexing, offer a promising solution with a many-fold increase in throughput by performing inference for multiple inputs at the cost of a single input. Yet these approaches are not currently performant enough to be deployed in modern systems. We change that by developing MUX-PLMs, a class of high throughput pre-trained language models (PLMs) trained with data multiplexing, that can be fine-tuned for any downstream task to yield high-throughput high-performance. Our novel multiplexing and demultiplexing modules proficiently entangle and disentangle inputs, and enable high-performance high throughput MUX-PLMs that are competitive with vanilla PLMs while achieving 2x/5x inference speedup with only a 1-4 % drop on a broad suite of tasks.", "keywords": "Efficient Inference;Multi-input Multi-output architectures;Data Multiplexing", "primary_area": "", "supplementary_material": "", "author": "Vishvak Murahari;Ameet Deshpande;Carlos E Jimenez;Izhak Shafran;Mingqiu Wang;Yuan Cao;Karthik R Narasimhan", "authorids": "~Vishvak_Murahari1;~Ameet_Deshpande1;~Carlos_E_Jimenez1;~Izhak_Shafran1;~Mingqiu_Wang2;~Yuan_Cao2;~Karthik_R_Narasimhan1", "gender": "M;M;M;;M;M;M", "homepage": "https://vishvakmurahari.com/;https://www.carlosejimenez.com;;;;http://www.karthiknarasimhan.com;https://ameet-1997.github.io", "dblp": "249/5621;153/0588;66/3591;;52/4472-7.html;147/0322;220/4337", "google_scholar": "Y_NYX7MAAAAJ;Ue4wghAAAAAJ;;egUY_UQAAAAJ;Q82vvqcAAAAJ;euc0GX4AAAAJ;332L1coAAAAJ", "or_profile": "~Vishvak_Murahari1;~Carlos_E_Jimenez1;~Izhak_Shafran1;~Mingqiu_Wang2;~Yuan_Cao2;~Karthik_R_Narasimhan1;~Ameet_S_Deshpande1", "aff": "Princeton University;Princeton University;Google;Google;Google DeepMind;Princeton University;Princeton University", "aff_domain": "princeton.edu;princeton.edu;google.com;google.com;google.com;princeton.edu;princeton.edu", "position": "PhD student;PhD student;Research Scientist;Researcher;Research scientist;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nmurahari2023muxplms,\ntitle={{MUX}-{PLM}s: Data Multiplexing for High-throughput Language Models},\nauthor={Vishvak Murahari and Ameet Deshpande and Carlos E Jimenez and Izhak Shafran and Mingqiu Wang and Yuan Cao and Karthik R Narasimhan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jph8GlHueb}\n}", "github": "", "project": "", "reviewers": "29YX;R2qS;dyvA", "site": "https://openreview.net/forum?id=jph8GlHueb", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "4;3;3", "reproducibility": "4;4;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 12, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-9370-3909;;;0000-0002-1267-8930;;", "linkedin": ";;;mingqiu-wang-ucirvine;;;", "aff_unique_index": "0;0;1;1;1;0;0", "aff_unique_norm": "Princeton University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.princeton.edu;https://www.google.com", "aff_unique_abbr": "Princeton;Google", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "jqOIacThP3", "title": "Verb Conjugation in Transformers Is Determined by Linear Encodings of Subject Number", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Deep architectures such as Transformers are sometimes criticized for having uninterpretable \"black-box\" representations. We use causal intervention analysis to show that, in fact, some linguistic features are represented in a linear, interpretable format. Specifically, we show that BERT's ability to conjugate verbs relies on a linear encoding of subject number that can be manipulated with predictable effects on conjugation accuracy. This encoding is found in the subject position at the first layer and the verb position at the last layer, but distributed across positions at middle layers, particularly when there are multiple cues to subject number.", "keywords": "interpretability;analysis;representations;hidden vectors;syntax;subject-verb agreement;transformers;pre-trained models;language models;bert;causal analysis;causality;causal intervention;inlp", "primary_area": "", "supplementary_material": "", "author": "Sophie Hao;Tal Linzen", "authorids": "~Sophie_Hao1;~Tal_Linzen1", "gender": "M;F", "homepage": "http://tallinzen.net;https://www.notaphonologist.com", "dblp": "169/3438;205/9024", "google_scholar": "5mJDXjoAAAAJ;Qi-umS0AAAAJ", "or_profile": "~Tal_Linzen1;~Yiding_Hao1", "aff": "New York University;New York University", "aff_domain": "nyu.edu;nyu.edu", "position": "Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nhao2023verb,\ntitle={Verb Conjugation in Transformers Is Determined by Linear Encodings of Subject Number},\nauthor={Sophie Hao and Tal Linzen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jqOIacThP3}\n}", "github": "", "project": "", "reviewers": "vo72;SDLf;cZfP", "site": "https://openreview.net/forum?id=jqOIacThP3", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "3;3;3", "reproducibility": "4;3;4", "correctness": "3;4;4", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "New York University", "aff_unique_dep": "", "aff_unique_url": "https://www.nyu.edu", "aff_unique_abbr": "NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "jqOymNqzuB", "title": "Ideology Takes Multiple Looks: A High-Quality Dataset for Multifaceted Ideology Detection", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Ideology detection (ID) is important for gaining insights about peoples\u2019 opinions and stances on our world and society, which can find many applications in politics, economics and social sciences. It is not uncommon that a piece of text can contain descriptions of various issues. It is also widely accepted that a person can take different ideological stances in different facets. However, existing datasets for the ID task only label a text as ideologically left- or right-leaning as a whole, regardless whether the text containing one or more different issues. Moreover, most prior work annotates texts from data resources with known ideological bias through distant supervision approaches, which may result in many false labels. With some theoretical help from social sciences, this work first designs an ideological schema containing five domains and twelve facets for a new multifaceted ideology detection (MID) task to provide a more complete and delicate description of ideology. We construct a MITweet dataset for the MID task, which contains 12,594 English Twitter posts, each annotated with a Relevance and an Ideology label for all twelve facets. We also design and test a few of strong baselines for the MID task under in-topic and cross-topic settings, which can serve as benchmarks for further research.", "keywords": "Ideology detection;multifaceted ideology schema;dataset;political spectrum", "primary_area": "", "supplementary_material": "", "author": "Songtao Liu;Ziling Luo;Minghua Xu;LiXiao Wei;Ziyao Wei;Han Yu;Wei Xiang;Bang Wang", "authorids": "~Songtao_Liu3;~Ziling_Luo1;~Minghua_Xu1;~LiXiao_Wei1;~Ziyao_Wei1;~Han_Yu12;~Wei_Xiang2;~Bang_Wang1", "gender": ";F;F;F;F;F;M;M", "homepage": "http://minslab.info;http://minslab.info;;http://minslab.info/;;http://minslab.info/;;http://ei.hust.edu.cn/teacher/wangbang/index.htm", "dblp": ";;33/2798;;;;37/1682-5;18/38723", "google_scholar": ";;;;3Bc7Vw8AAAAJ;;YrcnOxYAAAAJ;GPycMSIAAAAJ", "or_profile": "~Songtao_Liu3;~Ziling_Luo1;~Minghua_Xu1;~LiXiao_Wei1;~Ziyao_Wei1;~Han_Yu12;~Wei_Xiang2;~Bang_Wang1", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology", "aff_domain": "hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn;hust.edu.cn", "position": "MS student;MS student;Full Professor;MS student;PhD student;MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nliu2023ideology,\ntitle={Ideology Takes Multiple Looks: A High-Quality Dataset for Multifaceted Ideology Detection},\nauthor={Songtao Liu and Ziling Luo and Minghua Xu and LiXiao Wei and Ziyao Wei and Han Yu and Wei Xiang and Bang Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jqOymNqzuB}\n}", "github": "", "project": "", "reviewers": "upds;bU4Y;4GHc;6B2A", "site": "https://openreview.net/forum?id=jqOymNqzuB", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "5;4;4;3", "excitement": "4;4;4;4", "reproducibility": "5;4;3;4", "correctness": "4;3;3;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 3.5, "replies_avg": 12, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;0000-0002-4675-3900;0000-0002-0312-4805", "linkedin": ";;;;;;;", "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "Huazhong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hust.edu.cn", "aff_unique_abbr": "HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "jqbhtSDPz7", "title": "The Art of SOCRATIC QUESTIONING: Recursive Thinking with Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Chain-of-Thought (CoT) prompting enables large language models to solve complex reasoning problems by generating intermediate steps. However, confined by its inherent single-pass and sequential generation process, CoT heavily relies on the initial decisions, causing errors in early steps to accumulate and impact the final answers. In contrast, humans adopt recursive thinking when tackling complex reasoning problems, i.e. iteratively breaking the original problem into approachable sub-problems and aggregating their answers to resolve the original one. Inspired by the human cognitive process, we propose SOCRATIC QUESTIONING, a divide-and-conquer style algorithm that mimics the recursive thinking process. Specifically, SOCRATIC QUESTIONING leverages large language models to raise and answer sub-questions until collecting enough information to tackle the original question. \nUnlike CoT, SOCRATIC QUESTIONING explicitly navigates the thinking space, stimulates effective recursive thinking, and is more robust towards errors in the thinking process.\nExtensive experiments on several complex reasoning tasks, including MMLU, MATH, LogiQA, and visual question-answering demonstrate significant performance improvements over the state-of-the-art prompting methods, such as CoT, and Tree-of-Thought. The qualitative analysis clearly shows that the intermediate reasoning steps elicited by SOCRATIC QUESTIONING are similar to humans' recursively thinking process of complex reasoning problems.", "keywords": "Chain of Thought;prompting;large language model;reasoning;multimodal;question answering", "primary_area": "", "supplementary_material": "", "author": "Jingyuan Qi;Zhiyang Xu;Ying Shen;Minqian Liu;Di Jin;Qifan Wang;Lifu Huang", "authorids": "~Jingyuan_Qi1;~Zhiyang_Xu1;~Ying_Shen4;~Minqian_Liu2;~Di_Jin1;~Qifan_Wang2;~Lifu_Huang1", "gender": "M;M;F;M;M;M;M", "homepage": "https://github.com/jingyq1;;https://yingshen-ys.github.io/;https://mqianliu.github.io/;https://jind11.github.io/;https://wqfcr.github.io/;https://wilburone.github.io/", "dblp": "303/4002.html;267/2280;01/8558-6;193/2086;;33/8610;127/0072", "google_scholar": "3r6whm4AAAAJ;11zbVUAAAAAJ;NytpXgwAAAAJ;xCR8nrwAAAAJ;x5QTK9YAAAAJ;LrSyLosAAAAJ;76IEGtYAAAAJ", "or_profile": "~Jingyuan_Qi1;~Zhiyang_Xu1;~Ying_Shen4;~Minqian_Liu2;~Di_Jin1;~Qifan_Wang2;~Lifu_Huang1", "aff": "Virginia Polytechnic Institute and State University;Virginia Polytechnic Institute and State University;Apple;Amazon;Amazon;Meta AI;Virginia Tech", "aff_domain": "vt.edu;vt.edu;apple.com;amazon.com;amazon.com;fb.com;vt.edu", "position": "PhD student;PhD student;Intern;Intern;Researcher;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nqi2023the,\ntitle={The Art of {SOCRATIC} {QUESTIONING}: Recursive Thinking with Large Language Models},\nauthor={Jingyuan Qi and Zhiyang Xu and Ying Shen and Minqian Liu and Di Jin and Qifan Wang and Lifu Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jqbhtSDPz7}\n}", "github": "", "project": "", "reviewers": "riYe;VEuk;VWnD", "site": "https://openreview.net/forum?id=jqbhtSDPz7", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;3", "excitement": "4;4;4", "reproducibility": "3;3;3", "correctness": "3;3;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0009-0005-9847-4507;;;0000-0002-7570-5756;", "linkedin": "jingyuan-qi-014bb91a5/;;ying-shen-ys/;;;;", "aff_unique_index": "0;0;1;2;2;3;0", "aff_unique_norm": "Virginia Tech;Apple;Amazon;Meta", "aff_unique_dep": ";Apple Inc.;Amazon.com, Inc.;Meta AI", "aff_unique_url": "https://www.vt.edu;https://www.apple.com;https://www.amazon.com;https://meta.com", "aff_unique_abbr": "VT;Apple;Amazon;Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "jsmV1WxXyb", "title": "Statistically Profiling Biases in Natural Language Reasoning Datasets and Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recent studies have shown that many natural language understanding\nand reasoning datasets contain statistical cues that can be exploited by NLP models,\nresulting in an overestimation of their capabilities. Existing methods, such\nas \u201chypothesis-only\u201d tests and CheckList, are limited in identifying these cues\nand evaluating model weaknesses. We introduce ICQ (I-See-Cue), a lightweight,\ngeneral statistical profiling framework that automatically identifies potential biases\nin multiple-choice NLU datasets without requiring additional test cases. ICQ\nassesses the extent to which models exploit these biases through black-box testing,\naddressing the limitations of current methods. In this work, we conduct a\ncomprehensive evaluation of statistical biases in 10 popular NLU datasets and 4\nmodels, confirming prior findings, revealing new insights, and offering an online\ndemonstration system to encourage users to assess their own datasets and models.\nFurthermore, we present a case study on investigating ChatGPT\u2019s bias, providing\nvaluable recommendations for practical applications.", "keywords": "nature language processing;model robustness;bias analysis", "primary_area": "", "supplementary_material": "", "author": "Shanshan Huang;Kenny Q. Zhu", "authorids": "~Shanshan_Huang2;~Kenny_Q._Zhu1", "gender": "M;F", "homepage": "http://www.cs.sjtu.edu.cn/~kzhu/;", "dblp": "z/KennyQiliZhu;92/8363-2.html", "google_scholar": "https://scholar.google.com.tw/citations?user=ZIRJ6lIAAAAJ;", "or_profile": "~Kenny_Q._Zhu1;~Flora_Huang1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "cs.sjtu.edu.cn;sjtu.edu.cn", "position": "Full Professor;PhD student", "bibtex": "@inproceedings{\nhuang2023statistically,\ntitle={Statistically Profiling Biases in Natural Language Reasoning Datasets and Models},\nauthor={Shanshan Huang and Kenny Q. Zhu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jsmV1WxXyb}\n}", "github": "", "project": "", "reviewers": "eyRB;o4hf;cXYc", "site": "https://openreview.net/forum?id=jsmV1WxXyb", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;3;4", "reproducibility": "1;3;5", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "juUEOaH7bK", "title": "ULF: Unsupervised Labeling Function Correction using Cross-Validation for Weak Supervision", "track": "main", "status": "Short Main", "tldr": "", "abstract": "A cost-effective alternative to manual data labeling is weak supervision (WS), where data samples are automatically annotated using a predefined set of labeling functions (LFs), rule-based mechanisms that generate artificial labels for the associated classes. In this work, we investigate noise reduction techniques for WS based on the principle of k-fold cross-validation. We introduce a new algorithm ULF for Unsupervised Labeling Function correction, which denoises WS data by leveraging models trained on all but some LFs to identify and correct biases specific to the held-out LFs. Specifically, ULF refines the allocation of LFs to classes by re-estimating this assignment on highly reliable cross-validated samples. Evaluation on multiple datasets confirms ULF\u2019s effectiveness in enhancing WS learning without the need for manual labeling.", "keywords": "weak supervision;cross-validation;denoising methods", "primary_area": "", "supplementary_material": "", "author": "Anastasiia Sedova;Benjamin Roth", "authorids": "~Anastasiia_Sedova1;~Benjamin_Roth2", "gender": "F;", "homepage": "https://anasedova.github.io;https://www.benjaminroth.net", "dblp": "287/2141;63/8171-1", "google_scholar": "x_qC4nQAAAAJ;", "or_profile": "~Anastasiia_Sedova1;~Benjamin_Roth2", "aff": "University of Vienna;Universit\u00e4t Vienna", "aff_domain": "univie.ac.at;univie.ac.at", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nsedova2023ulf,\ntitle={{ULF}: Unsupervised Labeling Function Correction using Cross-Validation for Weak Supervision},\nauthor={Anastasiia Sedova and Benjamin Roth},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=juUEOaH7bK}\n}", "github": "", "project": "", "reviewers": "PGPZ;jCVR;D61f", "site": "https://openreview.net/forum?id=juUEOaH7bK", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;2", "excitement": "4;4;2", "reproducibility": "4;3;5", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": "anastasiia-sedova-82340118b/;", "aff_unique_index": "0;0", "aff_unique_norm": "University of Vienna", "aff_unique_dep": "", "aff_unique_url": "https://univie.ac.at", "aff_unique_abbr": "UV", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Austria" }, { "id": "jvNVmkGxiU", "title": "Human Learning by Model Feedback: The Dynamics of Iterative Prompting with Midjourney", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Generating images with a Text-to-Image model often requires multiple trials, where human users iteratively update their prompt based on feedback, namely the output image. Taking inspiration from cognitive work on reference games and dialogue alignment, this paper analyzes the dynamics of the user prompts along such iterations. We compile a dataset of iterative interactions of human users with Midjourney. Our analysis then reveals that prompts predictably converge toward specific traits along these iterations.\nWe further study whether this convergence is due to human users, realizing they missed important details, or due to adaptation to the model's ``preferences'', producing better images for a specific language style. We show initial evidence that both possibilities are at play.\nThe possibility that users adapt to the model's preference raises concerns about reusing user data for further training. The prompts may be biased towards the preferences of a specific model, rather than align with human intentions and natural manner of expression.", "keywords": "Interaction;Text-to-Image;alignment;Cognitive Science", "primary_area": "", "supplementary_material": "", "author": "Shachar Don-Yehiya;Leshem Choshen;Omri Abend", "authorids": "~Shachar_Don-Yehiya1;~Leshem_Choshen1;~Omri_Abend1", "gender": ";Not Specified;M", "homepage": ";https://ktilana.wixsite.com/leshem-choshen;http://www.cs.huji.ac.il/~oabend/", "dblp": ";218/5237;30/8159", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=BD_hRzYAAAAJ", "or_profile": "~Shachar_Don-Yehiya1;~Leshem_Choshen1;~Omri_Abend1", "aff": ";International Business Machines;Hebrew University of Jerusalem", "aff_domain": ";ibm.com;huji.ac.il", "position": ";Researcher;Associate Professor", "bibtex": "@inproceedings{\ndon-yehiya2023human,\ntitle={Human Learning by Model Feedback: The Dynamics of Iterative Prompting with Midjourney},\nauthor={Shachar Don-Yehiya and Leshem Choshen and Omri Abend},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jvNVmkGxiU}\n}", "github": "", "project": "", "reviewers": "jjW3;4xF9;qxK5", "site": "https://openreview.net/forum?id=jvNVmkGxiU", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "4;4;4", "reproducibility": "2;5;3", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-0085-6496;", "linkedin": ";leshemchoshen/;", "aff_unique_index": "0;1", "aff_unique_norm": "International Business Machines Corporation;Hebrew University of Jerusalem", "aff_unique_dep": ";", "aff_unique_url": "https://www.ibm.com;https://www.huji.ac.il", "aff_unique_abbr": "IBM;HUJI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Jerusalem", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;Israel" }, { "id": "jvTV8vSa3X", "title": "Text-guided 3D Human Generation from 2D Collections", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "3D human modeling has been widely used for engaging interaction in gaming, film, and animation. The customization of these characters is crucial for creativity and scalability, which highlights the importance of controllability. In this work, we introduce Text-guided 3D Human Generation (T3H), where a model is to generate a 3D human, guided by the fashion description. There are two goals: 1) the 3D human should render articulately, and 2) its outfit is controlled by the given text. To address this T3H task, we propose Compositional Cross-modal Human (CCH). CCH adopts cross-modal attention to fuse compositional human rendering with the extracted fashion semantics. Each human body part perceives relevant textual guidance as its visual patterns. We incorporate the human prior and semantic discrimination to enhance 3D geometry transformation and fine-grained consistency, enabling it to learn from 2D collections for data efficiency. We conduct evaluations on DeepFashion and SHHQ with diverse fashion attributes covering the shape, fabric, and color of upper and lower clothing. Extensive experiments demonstrate that CCH achieves superior results for T3H with high efficiency.", "keywords": "Text-guided Visual Generation;3D Human Generation", "primary_area": "", "supplementary_material": "", "author": "Tsu-Jui Fu;Wenhan Xiong;Yixin Nie;Jingyu Liu;Barlas Oguz;William Yang Wang", "authorids": "~Tsu-Jui_Fu2;~Wenhan_Xiong1;~Yixin_Nie2;~Jingyu_Liu6;~Barlas_Oguz1;~William_Yang_Wang2", "gender": "M;M;M;M;;M", "homepage": "https://tsujuifu.github.io;https://xwhan.github.io;https://easonnie.github.io;https://jingyu6.github.io;;https://www.cs.ucsb.edu/~william/", "dblp": "218/5366.html;203/8542;205/2725;;https://dblp.org/pers/hd/o/Oguz:Barlas;08/9282", "google_scholar": "https://scholar.google.com.tw/citations?user=7QRDcC0AAAAJ;;g5QpITUAAAAJ;jidrykQAAAAJ;iPmTQZMAAAAJ;gf8Ms_8AAAAJ", "or_profile": "~Tsu-Jui_Fu2;~Wenhan_Xiong1;~Yixin_Nie2;~Jingyu_Liu6;~Barlas_Oguz1;~William_Wang1", "aff": "UC Santa Barbara;Meta Facebook;Meta Platforms, Inc.;Meta Facebook;Meta;UC Santa Barbara", "aff_domain": "ucsb.edu;fb.com;meta.com;facebook.com;meta.com;ucsb.edu", "position": "PhD student;Researcher;Researcher;Intern;Research Scientist;Full Professor", "bibtex": "@inproceedings{\nfu2023textguided,\ntitle={Text-guided 3D Human Generation from 2D Collections},\nauthor={Tsu-Jui Fu and Wenhan Xiong and Yixin Nie and Jingyu Liu and Barlas Oguz and William Yang Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jvTV8vSa3X}\n}", "github": "", "project": "", "reviewers": "o3tu;3mSQ;oudz", "site": "https://openreview.net/forum?id=jvTV8vSa3X", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "4;4;1", "reproducibility": "3;4;3", "correctness": "5;4;1", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": "tsujuifu1996;;;jingyu6/;barlas-o%C4%9Fuz-25465050;", "aff_unique_index": "0;1;1;1;1;0", "aff_unique_norm": "University of California, Santa Barbara;Meta", "aff_unique_dep": ";Meta Platforms, Inc.", "aff_unique_url": "https://www.ucsb.edu;https://meta.com", "aff_unique_abbr": "UCSB;Meta", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Santa Barbara;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "jw1iZfW5zN", "title": "A Framework for Bidirectional Decoding: Case Study in Morphological Inflection", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Transformer-based encoder-decoder models that generate outputs in a left-to-right fashion have become standard for sequence-to-sequence tasks. In this paper, we propose a framework for decoding that produces sequences from the \"outside-in\": at each step, the model chooses to generate a token on the left, on the right, or join the left and right sequences. We argue that this is more principled than prior bidirectional decoders. Our proposal supports a variety of model architectures and includes several training methods, such as a dynamic programming algorithm that marginalizes out the latent ordering variable. Our model sets state-of-the-art (SOTA) on the 2022 and 2023 shared tasks, beating the next best systems by over 4.7 and 2.7 points in average accuracy respectively. The model performs particularly well on long sequences, can implicitly learn the split point of words composed of stem and affix, and performs better relative to the baseline on datasets that have fewer unique lemmas.", "keywords": "morphology;decoding;inflection;transformers", "primary_area": "", "supplementary_material": "", "author": "Marc Canby;Julia Hockenmaier", "authorids": "~Marc_Canby1;~Julia_Hockenmaier1", "gender": "M;F", "homepage": ";https://cs.illinois.edu/directory/profile/juliahmr", "dblp": "230/2916;64/2448", "google_scholar": ";https://scholar.google.com.tw/citations?user=iIiVrrQAAAAJ", "or_profile": "~Marc_Canby1;~Julia_Hockenmaier1", "aff": "University of Illinois Urbana-Champaign;University of Illinois, Urbana Champaign", "aff_domain": "cs.illinois.edu;illinois.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\ncanby2023a,\ntitle={A Framework for Bidirectional Decoding: Case Study in Morphological Inflection},\nauthor={Marc Canby and Julia Hockenmaier},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jw1iZfW5zN}\n}", "github": "", "project": "", "reviewers": "qVTT;4BJv;HEuK", "site": "https://openreview.net/forum?id=jw1iZfW5zN", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;3", "excitement": "4;3;3", "reproducibility": "3;3;4", "correctness": "3;3;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": "marc-canby-491142113/;", "aff_unique_index": "0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "jxgz7FEqWq", "title": "Sparse Low-rank Adaptation of Pre-trained Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Fine-tuning pre-trained large language models in a parameter-efficient manner is widely studied for its effectiveness and efficiency. \nThe popular method of low-rank adaptation (LoRA) offers a notable approach, hypothesizing that the adaptation process is intrinsically low-dimensional. Although LoRA has demonstrated commendable performance, it is implemented with a fixed and unalterable intrinsic rank that might not always be the ideal choice. Recognizing the need for more flexible adaptation, we extend the methodology of LoRA to an innovative approach we call sparse low-rank adaptation (SoRA) that enables dynamic adjustments to the intrinsic rank during the adaptation process. We achieve this through the incorporation of a gate unit optimized with proximal gradient method in the training stage, controlling the cardinality of rank under the sparsity of the gate. In the subsequent inference stage, we eliminate the parameter blocks corresponding to the zeroed-out ranks, to reduce each SoRA module back to a concise yet rank-optimal LoRA. Our approach strengthens the representation power of LoRA by initializing it with a higher rank, while efficiently taming a temporarily increased number of parameters via updating in a sparse way. We further introduce a sparsifying scheduler for SoRA, aiming to examine the impact of the number of non-zero parameters on the model's memorization and generalization. Our experimental results demonstrate that SoRA can outperform other baselines even with 70\\% retained parameters and 70\\% training time.", "keywords": "Parameter-efficient;Sparse Adaptation", "primary_area": "", "supplementary_material": "", "author": "Ning Ding;Xingtai Lv;Qiaosen Wang;Yulin Chen;Bowen Zhou;Zhiyuan Liu;Maosong Sun", "authorids": "~Ning_Ding5;~Xingtai_Lv1;~Qiaosen_Wang1;~Yulin_Chen1;~Bowen_Zhou4;~Zhiyuan_Liu1;~Maosong_Sun1", "gender": "M;;M;F;;M;M", "homepage": "https://www.stingning.cn/;https://github.com/telxt/telxt.github.io;;;;http://nlp.csai.tsinghua.edu.cn/~lzy;https://www.cs.tsinghua.edu.cn/csen/info/1312/4394.htm", "dblp": ";351/0835;;;;53/3245-1;95/3291-1", "google_scholar": "uZXQuYAAAAAJ;Q3a25IEAAAAJ;https://scholar.google.com/citations?view_op=new_profile;tAiXl18AAAAJ;https://scholar.google.com/citations?hl=zh-CN;dT0v5u0AAAAJ;https://scholar.google.com.tw/citations?user=zIgT0HMAAAAJ", "or_profile": "~Ning_Ding5;~Xingtai_Lv1;~Qiaosen_Wang1;~Yulin_Chen1;~Bowen_Zhou4;~Zhiyuan_Liu1;~Maosong_Sun1", "aff": "Tsinghua University;Tsinghua University;University of Chicago;Tsinghua University;JD.com;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;mails.tsinghua.edu.cn;uchicago.edu;tsinghua.edu.cn;jd.com;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Undergrad student;PhD student;MS student;Vice President;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nding2023sparse,\ntitle={Sparse Low-rank Adaptation of Pre-trained Language Models},\nauthor={Ning Ding and Xingtai Lv and Qiaosen Wang and Yulin Chen and Bowen Zhou and Zhiyuan Liu and Maosong Sun},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=jxgz7FEqWq}\n}", "github": "", "project": "", "reviewers": "mFGF;rN3k;kCU2", "site": "https://openreview.net/forum?id=jxgz7FEqWq", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;4;4", "excitement": "4;4;4", "reproducibility": "4;5;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0002-7709-2543;", "linkedin": ";;;;;;", "aff_unique_index": "0;0;1;0;2;0;0", "aff_unique_norm": "Tsinghua University;University of Chicago;JD.com", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.uchicago.edu;https://www.jd.com", "aff_unique_abbr": "THU;UChicago;JD", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0;0", "aff_country_unique": "China;United States" }, { "id": "k2VHhq2LH9", "title": "Reasoning about Ambiguous Definite Descriptions", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Natural language reasoning plays an increasingly important role in improving language models' ability to solve complex language understanding tasks. An interesting use case for reasoning is the resolution of context-dependent ambiguity. But no resources exist to evaluate how well Large Language Models can use explicit reasoning to resolve ambiguity in language. We propose to use ambiguous definite descriptions for this purpose and create and publish the first benchmark dataset consisting of such phrases. Our method includes all information required to resolve the ambiguity in the prompt, which means a model does not require anything but reasoning to do well.\nWe find this to be a challenging task for recent LLMs. Code and data available at: https://github.com/sfschouten/exploiting-ambiguity", "keywords": "Natural language reasoning;definite descriptions;ambiguity;large language models", "primary_area": "", "supplementary_material": "", "author": "Stefan Frederik Schouten;Peter Bloem;Ilia Markov;Piek Vossen", "authorids": "~Stefan_Frederik_Schouten1;~Peter_Bloem1;~Ilia_Markov2;~Piek_Vossen2", "gender": "M;M;M;M", "homepage": "https://sfschouten.github.io;http://peterbloem.nl;https://ilia-markov.github.io/;https://vossen.info/", "dblp": "292/4095;151/0108;146/9620;", "google_scholar": "Hhxbv4kAAAAJ;https://scholar.google.nlcitations/?user=zVntAfQAAAAJ;;JvllTMIAAAAJ", "or_profile": "~Stefan_Frederik_Schouten1;~Peter_Bloem1;~Ilia_Markov2;~Piek_Vossen2", "aff": "Vrije Universiteit Amsterdam;Vrije Universiteit Amsterdam;Vrije Universiteit Amsterdam;Vrije Universiteit Amsterdam", "aff_domain": "vu.nl;vu.nl;vu.nl;vu.nl", "position": "PhD student;Assistant Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nschouten2023reasoning,\ntitle={Reasoning about Ambiguous Definite Descriptions},\nauthor={Stefan Frederik Schouten and Peter Bloem and Ilia Markov and Piek Vossen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=k2VHhq2LH9}\n}", "github": "", "project": "", "reviewers": "xFyH;VEf9;sY4f", "site": "https://openreview.net/forum?id=k2VHhq2LH9", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "2;4;4", "reproducibility": "4;5;5", "correctness": "3;4;4", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.666666666666667, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-9839-9985;0000-0002-0189-5817;0000-0001-9533-748X;0000-0002-6238-5941", "linkedin": ";;ilia-markov-9b685680/;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Vrije Universiteit Amsterdam", "aff_unique_dep": "", "aff_unique_url": "https://www.vu.nl", "aff_unique_abbr": "VU Amsterdam", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Netherlands" }, { "id": "k3i6PKlKY8", "title": "mRedditSum: A Multimodal Abstractive Summarization Dataset of Reddit Threads with Images", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The growing number of multimodal online discussions necessitates automatic summarization to save time and reduce content overload. \nHowever, existing summarization datasets are not suitable for this purpose, as they either do not cover discussions, multiple modalities, or both. To this end, we present mRedditSum, the first multimodal discussion summarization dataset. It consists of 3,033 discussion threads where a post solicits advice regarding an issue described with an image and text, and respective comments express diverse opinions. We annotate each thread with a human-written summary that captures both the essential information from the text, as well as the details available only in the image. Experiments show that popular summarization models---GPT-3.5, BART, and T5---consistently improve in performance when visual information is incorporated. We also introduce a novel method, cluster-based multi-stage summarization, that outperforms existing baselines and serves as a competitive baseline for future work.", "keywords": "Multimodal Abstractive Summarization Dataset", "primary_area": "", "supplementary_material": "", "author": "Keighley Overbay;Jaewoo Ahn;Fatemeh Pesaran zadeh;Joonsuk Park;Gunhee Kim", "authorids": "~Keighley_Overbay1;~Jaewoo_Ahn1;~Fatemeh_Pesaran_zadeh1;~Joonsuk_Park1;~Gunhee_Kim1", "gender": "F;M;F;M;M", "homepage": "https://vision.snu.ac.kr/people/keighleyoverbay.html;https://ahnjaewoo.github.io/;https://www.linkedin.com/in/fatemeh-pesaranzadeh-4107ab115/;http://www.joonsuk.org;http://vision.snu.ac.kr/gunhee/", "dblp": ";17/6374;;50/9717;45/115", "google_scholar": ";tQiOa1cAAAAJ;;3SPMM3oAAAAJ;https://scholar.google.co.kr/citations?user=CiSdOV0AAAAJ", "or_profile": "~Keighley_Overbay1;~Jaewoo_Ahn1;~Fatemeh_Pesaran_zadeh1;~Joonsuk_Park1;~Gunhee_Kim1", "aff": "Seoul National University;Seoul National University;Seoul National University, Seoul National University;University of Richmond;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;cse.snu.ac.kr;richmond.edu;snu.ac.kr", "position": "MS student;PhD student;MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\noverbay2023mredditsum,\ntitle={mRedditSum: A Multimodal Abstractive Summarization Dataset of Reddit Threads with Images},\nauthor={Keighley Overbay and Jaewoo Ahn and Fatemeh Pesaran zadeh and Joonsuk Park and Gunhee Kim},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=k3i6PKlKY8}\n}", "github": "", "project": "", "reviewers": "BJQs;2UFg;anng", "site": "https://openreview.net/forum?id=k3i6PKlKY8", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "3;3;3", "reproducibility": "4;3;3", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-1182-4836;0000-0002-9543-7453", "linkedin": ";jaewoo-ahn/;;;", "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Seoul National University;University of Richmond", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;https://www.richmond.edu", "aff_unique_abbr": "SNU;UR", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seoul", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "South Korea;United States" }, { "id": "k4QqDDoRyI", "title": "ATFormer: A Learned Performance Model with Transfer Learning Across Devices for Deep Learning Tensor Programs", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The training and inference efficiency of ever-larger deep neural networks highly rely on the performance of tensor operators on specific hardware platforms. Therefore, a compilation-based optimization flow with automatic tensor generation and parameter tuning is necessary for efficient model deployment. While compilation-based methods with performance models can provide dynamic and suitable code optimization, they suffer from a large design space exploration with rough measurement accuracy and poor transferability among different hardware platforms. This paper presents ATFormer, a simple yet efficient design with attention-inspired modules to accurately predict the performance of optimized operators by capturing global and long-range dependencies within a complete scheduling space. Compared with state-of-the-arts, ATFormer can predict the optimal implementation of tensor operators to reduce inference time with minimal effort on modern DNN benchmarks. Furthermore, ATFormer with pre-trained parameters can quickly adapt to different workloads and hardware via transfer learning.", "keywords": "Tensor Program; Performance Model; Efficient Transfer Learning; NLP Application; Model Deployment", "primary_area": "", "supplementary_material": "", "author": "Yang Bai;Wenqian Zhao;Shuo Yin;Zixiao Wang;Bei Yu", "authorids": "~Yang_Bai6;~Wenqian_Zhao2;~Shuo_Yin2;~Zixiao_Wang1;~Bei_Yu2", "gender": "M;M;M;M;M", "homepage": "https://ybai62868.github.io/;;https://sawydust1228.github.io/;https://shiningsord.github.io/;http://www.cse.cuhk.edu.hk/~byu/index.html", "dblp": ";;;;28/4556-1.html", "google_scholar": ";;;https://scholar.google.com/citations?view_op=list_works;tGneTm4AAAAJ", "or_profile": "~Yang_Bai6;~Wenqian_Zhao2;~Shuo_Yin2;~Zixiao_Wang1;~Bei_Yu2", "aff": "The Chinese University of Hong Kong;Department of Computer Science and Engineering, The Chinese University of Hong Kong;Department of Computer Science and Engineering, The Chinese University of Hong Kong;Department of Computer Science and Engineering, The Chinese University of Hong Kong;Department of Computer Science and Engineering, The Chinese University of Hong Kong", "aff_domain": "cuhk.edu.hk;cse.cuhk.edu.hk;cse.cuhk.edu.hk;cse.cuhk.edu.hk;cse.cuhk.edu.hk", "position": "PhD student;PhD student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nbai2023atformer,\ntitle={{ATF}ormer: A Learned Performance Model with Transfer Learning Across Devices for Deep Learning Tensor Programs},\nauthor={Yang Bai and Wenqian Zhao and Shuo Yin and Zixiao Wang and Bei Yu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=k4QqDDoRyI}\n}", "github": "", "project": "", "reviewers": "fHck;y1Wr;astq", "site": "https://openreview.net/forum?id=k4QqDDoRyI", "pdf_size": 0, "rating": "5;5;5", "confidence": "2;2;3", "excitement": "4;4;4", "reproducibility": "4;3;3", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 2.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-4927-0194;0009-0000-8179-0996;0000-0001-6406-4810", "linkedin": ";https://www.linkedin.com/public-profile/settings?trk=d_flagship3_profile_self_view_public_profile&lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_self_edit_top_card%3BAR9ogijPQiGqjcriNqmP4Q%3D%3D;;;yubei/", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Chinese University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cuhk.edu.hk", "aff_unique_abbr": "CUHK", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "k8rxolXsPE", "title": "SuperDialseg: A Large-scale Dataset for Supervised Dialogue Segmentation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Dialogue segmentation is a crucial task for dialogue systems allowing a better understanding of conversational texts. Despite recent progress in unsupervised dialogue segmentation methods, their performances are limited by the lack of explicit supervised signals for training. Furthermore, the precise definition of segmentation points in conversations still remains as a challenging problem, increasing the difficulty of collecting manual annotations. In this paper, we provide a feasible definition of dialogue segmentation points with the help of document-grounded dialogues and release a large-scale supervised dataset called SuperDialseg, containing 9,478 dialogues based on two prevalent document-grounded dialogue corpora, and also inherit their useful dialogue-related annotations. Moreover, we provide a benchmark including 18 models across five categories for the dialogue segmentation task with several proper evaluation metrics. Empirical studies show that supervised learning is extremely effective in in-domain datasets and models trained on SuperDialseg can achieve good generalization ability on out-of-domain data. Additionally, we also conducted human verification on the test set and the Kappa score confirmed the quality of our automatically constructed dataset. We believe our work is an important step forward in the field of dialogue segmentation.", "keywords": "Dialogue Segmentation;Dataset;Benchmark", "primary_area": "", "supplementary_material": "", "author": "Junfeng Jiang;ChengZhang Dong;Sadao Kurohashi;Akiko Aizawa", "authorids": "~Junfeng_Jiang2;~ChengZhang_Dong1;~Sadao_Kurohashi1;~Akiko_Aizawa1", "gender": "M;M;M;F", "homepage": "https://coldog2333.github.io;;https://nlp.ist.i.kyoto-u.ac.jp/member/kuro/index.html;https://www.nii.ac.jp/en/faculty/digital_content/aizawa_akiko/", "dblp": ";;42/2149;63/1600", "google_scholar": "gvKNfGEAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.jp/citations?user=gpKS5P0AAAAJ;JQy5hPoAAAAJ", "or_profile": "~Junfeng_Jiang2;~ChengZhang_Dong1;~Sadao_Kurohashi1;~Akiko_Aizawa1", "aff": "The University of Tokyo;Kyoto University, Kyoto University;Kyoto University;National Institute of Informatics", "aff_domain": "u-tokyo.ac.jp;i.kyoto-u.ac.jp;kyoto-u.ac.jp;nii.ac.jp", "position": "PhD student;MS student;Full Professor;Full Professor", "bibtex": "@inproceedings{\njiang2023superdialseg,\ntitle={SuperDialseg: A Large-scale Dataset for Supervised Dialogue Segmentation},\nauthor={Junfeng Jiang and ChengZhang Dong and Sadao Kurohashi and Akiko Aizawa},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=k8rxolXsPE}\n}", "github": "", "project": "", "reviewers": "mUQu;EsRS;pN2F", "site": "https://openreview.net/forum?id=k8rxolXsPE", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;4", "excitement": "3;4;4", "reproducibility": "3;4;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-3680-2465;;0000-0001-5398-8399;", "linkedin": "%E4%BF%8A%E9%94%8B-%E6%B1%9F-72aa64152/?locale=en_US;;;", "aff_unique_index": "0;1;1;2", "aff_unique_norm": "University of Tokyo;Kyoto University;National Institute of Informatics", "aff_unique_dep": ";;", "aff_unique_url": "https://www.u-tokyo.ac.jp;https://www.kyoto-u.ac.jp;https://www.nii.ac.jp/", "aff_unique_abbr": "UTokyo;Kyoto U;NII", "aff_campus_unique_index": "1", "aff_campus_unique": ";Kyoto", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Japan" }, { "id": "k95cAni5Hk", "title": "Toxicity, Morality, and Speech Act Guided Stance Detection", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In this work, we focus on the task of determining the public attitude toward various social issues discussed on social media platforms. Platforms such as Twitter, however, are often used to spread misinformation, fake news through polarizing views. Existing literature suggests that higher levels of toxicity prevalent in Twitter conversations often spread negativity and delay addressing issues. Further, the embedded moral values and speech acts specifying the intention of the tweet correlate with public opinions expressed on various topics. However, previous works, which mainly focus on stance detection, either ignore the speech act, toxic, and moral features of these tweets that can collectively help capture public opinion or lack an efficient architecture that can detect the attitudes across targets. Therefore, in our work, we focus on the main task of stance detection by exploiting the toxicity, morality, and speech act as auxiliary tasks. We propose a multitasking model TWISTED that initially extracts the valence, arousal, and dominance aspects hidden in the tweets and injects the emotional sense into the embedded text followed by an efficient attention framework to correctly detect the tweet's stance by using the shared features of toxicity, morality, and speech acts present in the tweet. Extensive experiments conducted on 4 benchmark stance detection datasets (SemEval-2016, P-Stance, COVID19-Stance, and ClimateChange) comprising different domains demonstrate the effectiveness and generalizability of our approach.", "keywords": "stance detection;toxicity;morality. speech act classification;Twitter", "primary_area": "", "supplementary_material": "", "author": "Apoorva Upadhyaya;Marco Fisichella;Wolfgang Nejdl", "authorids": "~Apoorva_Upadhyaya1;~Marco_Fisichella1;~Wolfgang_Nejdl1", "gender": "F;M;M", "homepage": ";https://l3s.de/~mfisichella/;https://kbs.uni-hannover.de/~nejdl/", "dblp": "https://dblp.org/search?q=apoorva+upadhyaya;40/8342;n/WolfgangNejdl", "google_scholar": "kRwvOfUAAAAJ;-mVY3u0AAAAJ;LC62bdYAAAAJ", "or_profile": "~Apoorva_Upadhyaya1;~Marco_Fisichella1;~Wolfgang_Nejdl1", "aff": "L3S Research Center, Leibniz University Hannover;Universit\u00e4t Hannover;Universit\u00e4t Hannover", "aff_domain": "l3s.de;uni-hannover.de;uni-hannover.de", "position": "Researcher;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nupadhyaya2023toxicity,\ntitle={Toxicity, Morality, and Speech Act Guided Stance Detection},\nauthor={Apoorva Upadhyaya and Marco Fisichella and Wolfgang Nejdl},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=k95cAni5Hk}\n}", "github": "", "project": "", "reviewers": "ygJA;9nz9;o8Ty", "site": "https://openreview.net/forum?id=k95cAni5Hk", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;3;5", "excitement": "2;4;3", "reproducibility": "3;5;3", "correctness": "2;4;3", "rating_avg": 2.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-6894-1101;0000-0003-3374-2193", "linkedin": ";marco-fisichella;wolfgangnejdl/", "aff_unique_index": "0;1;1", "aff_unique_norm": "Leibniz University Hannover;University of Hanover", "aff_unique_dep": "L3S Research Center;", "aff_unique_url": "https://www.uni-hannover.de;https://www.uni-hannover.de", "aff_unique_abbr": "LUH;Uni Hanover", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "kCzhhVMo4r", "title": "Length-Adaptive Distillation: Customizing Small Language Model for Dynamic Token Pruning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Pre-trained language models greatly improve the performance of various tasks but at a cost of high computation overhead. To facilitate practical applications, there are mainly two lines of research to accelerate model inference: model compression and dynamic computation (e.g., dynamic token pruning). Existing works either adopt these methods individually or simply apply dynamic computation approaches upon a compressed small language model. We argue that they are sub-optimal since the two approaches are separately designed so the compressed model may not be tailored for dynamic computation. To tackle this problem and make compressed small language models faster, we propose Length-Adaptive Distillation, a two-stage knowledge distillation framework that aims to produce a customized small language model for dynamic token pruning. In the general distillation stage, we enforce the student to mimic and reconstruct the teacher's output based on the dynamically pruned representations. Then in the task-specific distillation stage, the student is further accustomed to token pruning while absorbing the task-specific knowledge. Experimental results on GLUE benchmark demonstrate that our method can make the small language model more customized for dynamic token pruning and achieve better speed-performance trade-off.", "keywords": "knowledge distillation;language model compression;token pruning", "primary_area": "", "supplementary_material": "", "author": "Chang Liu;Chongyang Tao;Jianxin Liang;Jiazhan Feng;Tao Shen;Quzhe Huang;Dongyan Zhao", "authorids": "~Chang_Liu18;~Chongyang_Tao1;~Jianxin_Liang1;~Jiazhan_Feng1;~Tao_Shen1;~Quzhe_Huang1;~Dongyan_Zhao1", "gender": "M;M;;M;M;;M", "homepage": "https://github.com/LiuChang97;;;https://sites.google.com/view/jzfeng/home/;;https://andrewzhe.github.io/;https://www.wict.pku.edu.cn/zhaodongyan/en/", "dblp": ";;;242/9191;95/4097-1;278/1884;63/1870", "google_scholar": ";x_cOKuwAAAAJ;;uYHmew8AAAAJ;https://scholar.google.com.au/citations?user=SegyX9AAAAAJ;https://scholar.google.com/citations?hl=en;lhR8-68AAAAJ", "or_profile": "~Chang_Liu18;~Chongyang_Tao1;~Jianxin_Liang1;~Jiazhan_Feng1;~Tao_Shen1;~Quzhe_Huang1;~Dongyan_Zhao2", "aff": "Peking University;Microsoft;Peking University;Peking University;University of Technology Sydney;Peking University;Peking University", "aff_domain": "pku.edu.cn;microsoft.com;pku.edu.cn;pku.edu.cn;uts.edu.au;pku.edu.cn;pku.edu.cn", "position": "PhD student;Researcher;PhD student;PhD student;Postdoc;PhD student;Full Professor", "bibtex": "@inproceedings{\nliu2023lengthadaptive,\ntitle={Length-Adaptive Distillation: Customizing Small Language Model for Dynamic Token Pruning},\nauthor={Chang Liu and Chongyang Tao and Jianxin Liang and Jiazhan Feng and Tao Shen and Quzhe Huang and Dongyan Zhao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=kCzhhVMo4r}\n}", "github": "", "project": "", "reviewers": "YrX6;uJ9f;42mY", "site": "https://openreview.net/forum?id=kCzhhVMo4r", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;3", "excitement": "3;3;3", "reproducibility": "2;3;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-5832-6199;;;", "linkedin": ";;%E5%BB%BA%E6%96%B0-%E6%A2%81-590aba19b/;;;;", "aff_unique_index": "0;1;0;0;2;0;0", "aff_unique_norm": "Peking University;Microsoft;University of Technology Sydney", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "http://www.pku.edu.cn;https://www.microsoft.com;https://www.uts.edu.au", "aff_unique_abbr": "Peking U;Microsoft;UTS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;2;0;0", "aff_country_unique": "China;United States;Australia" }, { "id": "kEcDQzX3cI", "title": "Vicinal Risk Minimization for Few-Shot Cross-lingual Transfer in Abusive Language Detection", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Cross-lingual transfer learning from high-resource to medium and low-resource languages has shown encouraging results. However, the scarcity of resources in target languages remains a challenge. In this work, we resort to data augmentation and continual pre-training for domain adaptation to improve cross-lingual abusive language detection. For data augmentation, we analyze two existing techniques based on vicinal risk minimization and propose MIXAG, a novel data augmentation method which interpolates pairs of instances based on the angle of their representations. Our experiments involve seven languages typologically distinct from English and three different domains. The results reveal that the data augmentation strategies can enhance few-shot cross-lingual abusive language detection. Specifically, we observe that consistently in all target languages, MIXAG improves significantly in multidomain and multilingual environments. Finally, we show through an error analysis how the domain adaptation can favour the class of abusive texts (reducing false negatives), but at the same time, declines the precision of the abusive language detection model.", "keywords": "Vicinal Risk Minimization;Few-Shot Cross-lingual Transfer;Abusive Language Detection", "primary_area": "", "supplementary_material": "", "author": "Gretel Liz De la Pe\u00f1a Sarrac\u00e9n;Paolo Rosso;Robert Litschko;Goran Glava\u0161;Simone Paolo Ponzetto", "authorids": "~Gretel_Liz_De_la_Pe\u00f1a_Sarrac\u00e9n1;~Paolo_Rosso1;~Robert_Litschko1;~Goran_Glava\u01611;~Simone_Paolo_Ponzetto1", "gender": "F;M;;M;M", "homepage": ";http://personales.upv.es/prosso/;https://rlitschk.github.io/;https://sites.google.com/view/goranglavas;http://dws.informatik.uni-mannheim.de/ponzetto", "dblp": "206/1104.html;05/3463;220/3207;50/11059;04/2532", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.es/citations?user=HFKXPH8AAAAJ;https://scholar.google.de/citations?user=LFKL_o8AAAAJ;Ym0myOwAAAAJ;VmIFG0EAAAAJ", "or_profile": "~Gretel_Liz_De_la_Pe\u00f1a_Sarrac\u00e9n1;~Paolo_Rosso1;~Robert_Litschko1;~Goran_Glava\u01611;~Simone_Paolo_Ponzetto1", "aff": "Universidad Polit\u00e9cnica de Valencia;Universitat Polit\u00e8cnica de Val\u00e8ncia;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Julius-Maximilians-Universit\u00e4t W\u00fcrzburg;Universit\u00e4t Mannheim", "aff_domain": "upv.es;upv.es;lmu.de;uni-wuerzburg.de;uni-mannheim.de", "position": "Researcher;Full Professor;Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\nsarrac{\\'e}n2023vicinal,\ntitle={Vicinal Risk Minimization for Few-Shot Cross-lingual Transfer in Abusive Language Detection},\nauthor={Gretel Liz De la Pe{\\~n}a Sarrac{\\'e}n and Paolo Rosso and Robert Litschko and Goran Glava{\\v{s}} and Simone Paolo Ponzetto},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=kEcDQzX3cI}\n}", "github": "", "project": "", "reviewers": "mJE9;zEh7;bbqD", "site": "https://openreview.net/forum?id=kEcDQzX3cI", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;3;3", "reproducibility": "3;2;3", "correctness": "3;3;3", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-4448-2323;0000-0002-8922-1242;0000-0003-4637-983X;;0000-0001-7484-2049", "linkedin": "gretel-liz-de-la-pe%C3%B1a-a92687a4/;paolo-rosso-753b1016/?originalSubdomain=es;robertlitschko/;goran-glava\u0161-8484b420;", "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Universidad Polit\u00e9cnica de Valencia;Universitat Polit\u00e8cnica de Val\u00e8ncia;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Julius-Maximilians-Universit\u00e4t W\u00fcrzburg;University of Mannheim", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.upv.es;https://www.upv.es;https://www.lmu.de;https://www.uni-wuerzburg.de;https://www.uni-mannheim.de", "aff_unique_abbr": "UPV;UPV;LMU;JMU;UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;1", "aff_country_unique": "Spain;Germany" }, { "id": "kEflZNzau4", "title": "Language Model is Suitable for Correction of Handwritten Mathematical Expressions Recognition", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Handwritten mathematical expression recognition (HMER) is a multidisciplinary task that generates LaTeX sequences from images. Existing approaches, employing tree decoders within attention-based encoder-decoder architectures, aim to capture the hierarchical tree structure, but are limited by CFGs and pre-generated triplet data, hindering expandability and neglecting visual ambiguity challenges. This article investigates the distinctive language characteristics of LaTeX mathematical expressions, revealing two key observations: 1) the presence of explicit structural symbols, and 2) the treatment of symbols, particularly letters, as minimal units with context-dependent semantics, representing variables or constants. Rooted in these properties, we propose that language models have the potential to synchronously and complementarily provide both structural and semantic information, making them suitable for correction of HMER. To validate our proposition, we propose an architecture called Recognize and Language Fusion Network (RLFN), which integrates recognition and language features to output corrected sequences while jointly optimizing with a string decoder recognition model. Experiments show that RLFN outperforms existing state-of-the-art methods on the CROHME 2014/2016/2019 datasets.", "keywords": "LaTeX mathematical expression;handwritten mathematical expression recognition;language model;LaTeX language property", "primary_area": "", "supplementary_material": "", "author": "Zui Chen;Jiaqi Han;Chaofan Yang;Yi Zhou", "authorids": "~Zui_Chen2;~Jiaqi_Han4;~Chaofan_Yang1;~Yi_Zhou23", "gender": "M;M;M;", "homepage": ";https://github.com/HerzPilatus;https://github.com/Catycf;https://eeis.ustc.edu.cn/_t780/2022/0607/c2648a557568/page.htm", "dblp": ";;;", "google_scholar": "7GvOY0gAAAAJ;;;", "or_profile": "~Zui_Chen2;~Jiaqi_Han4;~Chaofan_Yang1;~Yi_Zhou23", "aff": "ShanghaiTech University;ShanghaiTech University;ShanghaiTech University;University of Science and Technology of China", "aff_domain": "shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn;ustc.edu.cn", "position": "MS student;MS student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nchen2023language,\ntitle={Language Model is Suitable for Correction of Handwritten Mathematical Expressions Recognition},\nauthor={Zui Chen and Jiaqi Han and Chaofan Yang and Yi Zhou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=kEflZNzau4}\n}", "github": "", "project": "", "reviewers": "xgEb;Gsfw;W2zF;jmUa", "site": "https://openreview.net/forum?id=kEflZNzau4", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;3;3;3", "excitement": "4;4;4;3", "reproducibility": "4;5;3;4", "correctness": "3;4;4;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.75, "reproducibility_avg": 4.0, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "ShanghaiTech University;University of Science and Technology of China", "aff_unique_dep": ";", "aff_unique_url": "https://www.shanghaitech.edu.cn;http://www.ustc.edu.cn", "aff_unique_abbr": "ShanghaiTech;USTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "kEhBOEsXXx", "title": "HPE: Answering Complex Questions over Text by Hybrid Question Parsing and Execution", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The dominant paradigm of textual question answering systems is based on end-to-end neural networks, which excels at answering natural language questions but falls short on complex ones. This stands in contrast to the broad adaptation of semantic parsing approaches over structured data sources (e.g., relational database, knowledge graphs), that convert natural language questions to logical forms and execute them with query engines. Towards combining the strengths of neural and symbolic methods, we propose a framework of question parsing and execution on textual QA. It comprises two central pillars: (1) We parse the question of varying complexity into an intermediate representation, named H-expression, which is composed of simple questions as the primitives and symbolic operations representing the relationships among them; (2) To execute the resulting H-expressions, we design a hybrid executor, which integrates the deterministic rules to translate the symbolic operations with a drop-in neural reader network to answer each decomposed simple question. Hence, the proposed framework can be viewed as a top-down question parsing followed by a bottom-up answer backtracking. The resulting H-expressions closely guide the execution process, offering higher precision besides better interpretability while still preserving the advantages of the neural readers for resolving its primitive elements. Our extensive experiments on MuSiQue, 2WikiQA, HotpotQA, and NQ show that the proposed parsing and hybrid execution framework outperforms existing approaches in supervised, few-shot, and zero-shot settings, while also effectively exposing its underlying reasoning process.", "keywords": "Multi-hop Question answering; Neuro-Symbolic method; Tree Structure Reasoning; Semantic Parsing", "primary_area": "", "supplementary_material": "", "author": "Ye Liu;Semih Yavuz;Rui Meng;Dragomir Radev;Caiming Xiong;Shafiq Joty;Yingbo Zhou", "authorids": "~Ye_Liu4;~Semih_Yavuz1;~Rui_Meng1;~Dragomir_Radev2;~Caiming_Xiong1;~Shafiq_Joty1;~Yingbo_Zhou1", "gender": "F;M;M;M;;;M", "homepage": ";http://memray.me;http://cmxiong.com/;https://raihanjoty.github.io/;;http://www.cs.yale.edu/~radev;", "dblp": "96/2615-6;;80/7282;62/2078;72/8614;r/DragomirRRadev;", "google_scholar": "QMKD6YMAAAAJ;s6h8L_UAAAAJ;vaSdahkAAAAJ;hR249csAAAAJ;H_6RQ7oAAAAJ;vIqWvgwAAAAJ;krh3p8AAAAAJ", "or_profile": "~Ye_Liu4;~Rui_Meng1;~Caiming_Xiong1;~Shafiq_Joty1;~Yingbo_Zhou1;~Dragomir_Radkov_Radev1;~Semih_Yavuz2", "aff": "SalesForce.com;Salesforce Research;Salesforce Research;SalesForce.com;Salesforce Research;Yale University;SalesForce.com", "aff_domain": "salesforce.com;salesforce.com;salesforce.com;salesforce.com;salesforce.com;yale.edu;salesforce.com", "position": "Researcher;Researcher;Research Scientist;Principal Researcher;Research Scientist;Full Professor;Research Scientist", "bibtex": "@inproceedings{\nliu2023hpe,\ntitle={{HPE}: Answering Complex Questions over Text by Hybrid Question Parsing and Execution},\nauthor={Ye Liu and Semih Yavuz and Rui Meng and Dragomir Radev and Caiming Xiong and Shafiq Joty and Yingbo Zhou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=kEhBOEsXXx}\n}", "github": "", "project": "", "reviewers": "YSXk;NMo5;CHKJ;5EBV", "site": "https://openreview.net/forum?id=kEhBOEsXXx", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;3;4", "excitement": "3;3;3;3", "reproducibility": "4;4;2;5", "correctness": "4;4;4;3", "rating_avg": 3.0, "confidence_avg": 3.75, "excitement_avg": 3.0, "reproducibility_avg": 3.75, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-5583-4924;;;;0000-0002-0213-7487;", "linkedin": ";memray/;caiming-xiong-150a1417;;yingbozhou/;dragomir-radev/;semih-yavuz-4303518b", "aff_unique_index": "0;0;0;0;0;1;0", "aff_unique_norm": "Salesforce;Yale University", "aff_unique_dep": ";", "aff_unique_url": "https://www.salesforce.com;https://www.yale.edu", "aff_unique_abbr": "Salesforce;Yale", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "kEzI6OYXV4", "title": "Are All Steps Equally Important? Benchmarking Essentiality Detection in Event Processes", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Natural language often describes events in different granularities, such that more coarse-grained (goal) events can often be decomposed into fine-grained sequences of (step) events. A critical but overlooked challenge in understanding an event process lies in the fact that the step events are not equally important to the central goal. In this paper, we seek to fill this gap by studying how well current models can understand the essentiality of different step events towards a goal event. As discussed by cognitive studies, such an ability enables the machine to mimic human\u2019s commonsense reasoning about preconditions and necessary efforts of daily-life tasks. Our work contributes with a high-quality corpus of (goal, step) pairs from a community guideline website WikiHow, where the steps are manually annotated with their essentiality w.r.t. the goal. The high IAA indicates that humans have a consistent understanding of the events. Despite evaluating various statistical and massive pre-trained NLU models, we observe that existing SOTA models all perform drastically behind humans, indicating the need for future investigation of this crucial yet challenging task.", "keywords": "Event Granularities;Essentiality;Event Processes;Goals;Steps", "primary_area": "", "supplementary_material": "", "author": "Haoyu Wang;Hongming Zhang;Yueguan Wang;Yuqian Deng;Muhao Chen;Dan Roth", "authorids": "~Haoyu_Wang2;~Hongming_Zhang2;~Yueguan_Wang2;~Yuqian_Deng1;~Muhao_Chen1;~Dan_Roth3", "gender": "M;M;M;;M;M", "homepage": "https://why2011btv.github.io;http://www.cse.ust.hk/~hzhangal/;https://github.com/etsurin;;https://muhaochen.github.io/;https://www.cis.upenn.edu/~danroth/", "dblp": "50/8499;;;;173/2608;r/DanRoth", "google_scholar": "U0rJDP0AAAAJ;i5ETuuQAAAAJ;;;k79yEZkAAAAJ;E-bpPWgAAAAJ", "or_profile": "~Haoyu_Wang2;~Hongming_Zhang2;~Yueguan_Wang2;~Yuqian_Deng1;~Muhao_Chen1;~Dan_Roth3", "aff": "University of Pennsylvania;Tencent AI Lab Seattle;Graduate School of Information Science and Technology, The University of Tokyo;Amazon;University of Southern California;Amazon", "aff_domain": "seas.upenn.edu;tencent.com;g.ecc.u-tokyo.ac.jp;amazon.com;usc.edu;amazon.com", "position": "PhD student;Researcher;MS student;Enginner;Assistant Research Professor;VP and Distinguished Scientist", "bibtex": "@inproceedings{\nwang2023are,\ntitle={Are All Steps Equally Important? Benchmarking Essentiality Detection in Event Processes},\nauthor={Haoyu Wang and Hongming Zhang and Yueguan Wang and Yuqian Deng and Muhao Chen and Dan Roth},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=kEzI6OYXV4}\n}", "github": "", "project": "", "reviewers": "piJ4;eKZY;PfMd", "site": "https://openreview.net/forum?id=kEzI6OYXV4", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;3", "excitement": "4;4;3", "reproducibility": "5;4;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0003-0118-3147;", "linkedin": ";;;yuqiand;;dan-roth-8667361/", "aff_unique_index": "0;1;2;3;4;3", "aff_unique_norm": "University of Pennsylvania;Tencent;University of Tokyo;Amazon;University of Southern California", "aff_unique_dep": ";Tencent AI Lab;Graduate School of Information Science and Technology;Amazon.com, Inc.;", "aff_unique_url": "https://www.upenn.edu;https://ai.tencent.com;https://www.u-tokyo.ac.jp;https://www.amazon.com;https://www.usc.edu", "aff_unique_abbr": "UPenn;Tencent AI Lab;UTokyo;Amazon;USC", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Seattle;Tokyo;Los Angeles", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;Japan" }, { "id": "kFQrpCFanH", "title": "Dynosaur: A Dynamic Growth Paradigm for Instruction-Tuning Data Curation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Instruction tuning has emerged to enhance the capabilities of large language models (LLMs) to comprehend instructions and generate appropriate responses. Existing methods either manually annotate or employ LLM (e.g., GPT-series) to generate data for instruction tuning. However, they often overlook associating instructions with existing annotated datasets. In this paper, we propose Dynosaur, a dynamic growth paradigm for the automatic curation of instruction-tuning data. Based on the metadata of existing datasets, we use LLMs to automatically construct instruction-tuning data by identifying relevant data fields and generating appropriate instructions. \n\nBy leveraging the existing annotated datasets, Dynosaur offers several advantages: 1) it reduces the API cost for generating instructions (e.g., it costs less than \\$12 USD by calling GPT-3.5-turbo for generating 800K instruction tuning samples; 2) it provides high-quality data for instruction tuning (e.g., it performs better than Alpaca and Flan on Super-NI and Longform with comparable data sizes); and 3) it supports the continuous improvement of models by generating instruction-tuning data when a new annotated dataset becomes available. We further investigate a continual learning scheme for learning with the ever-growing instruction-tuning dataset, and demonstrate that replaying tasks with diverse instruction embeddings not only helps mitigate forgetting issues but generalizes to unseen tasks better. \n\nCode and data are available at https://github.com/WadeYin9712/Dynosaur.", "keywords": "instruction tuning;dynamic;generalizable", "primary_area": "", "supplementary_material": "", "author": "Da Yin;Xiao Liu;Fan Yin;Ming Zhong;Hritik Bansal;Jiawei Han;Kai-Wei Chang", "authorids": "~Da_Yin2;~Xiao_Liu19;~Fan_Yin1;~Ming_Zhong2;~Hritik_Bansal2;~Jiawei_Han1;~Kai-Wei_Chang1", "gender": "M;F;M;M;M;M;M", "homepage": "https://wadeyin9712.github.io/;https://xxxiaol.github.io/;;https://maszhongming.github.io/;https://sites.google.com/view/hbansal;http://hanj.cs.illinois.edu/;http://kwchang.net", "dblp": "131/0141;82/1364-32;;;239/5922;h/JiaweiHan.html;18/2428", "google_scholar": "n32w34kAAAAJ;c3bdW2IAAAAJ;klShdV0AAAAJ;mnifqeUAAAAJ;gAKTYtoAAAAJ;https://scholar.google.com.tw/citations?user=Kv9AbjMAAAAJ;fqDBtzYAAAAJ", "or_profile": "~Da_Yin2;~Xiao_Liu19;~Fan_Yin1;~Ming_Zhong2;~Hritik_Bansal2;~Jiawei_Han1;~Kai-Wei_Chang1", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles;University of Illinois Urbana Champaign;University of California, Los Angeles;University of Illinois at Urbana-Champaign (UIUC);Amazon", "aff_domain": "cs.ucla.edu;ucla.edu;cs.ucla.edu;illinois.edu;ucla.edu;illinois.edu;amazon.com", "position": "PhD student;Researcher;PhD student;PhD student;PhD student;Full Professor;Researcher", "bibtex": "@inproceedings{\nyin2023dynosaur,\ntitle={Dynosaur: A Dynamic Growth Paradigm for Instruction-Tuning Data Curation},\nauthor={Da Yin and Xiao Liu and Fan Yin and Ming Zhong and Hritik Bansal and Jiawei Han and Kai-Wei Chang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=kFQrpCFanH}\n}", "github": "", "project": "", "reviewers": "eo7N;Dkdk;FuNK", "site": "https://openreview.net/forum?id=kFQrpCFanH", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "3;4;4", "reproducibility": "4;4;3", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0002-3629-2696;0000-0001-5365-0072", "linkedin": ";;fan-y-60b666180/;;hritik-bansal/;;kai-wei-chang-41239040", "aff_unique_index": "0;0;0;1;0;1;2", "aff_unique_norm": "University of California, Los Angeles;University of Illinois Urbana-Champaign;Amazon", "aff_unique_dep": ";;Amazon.com, Inc.", "aff_unique_url": "https://www.ucla.edu;https://illinois.edu;https://www.amazon.com", "aff_unique_abbr": "UCLA;UIUC;Amazon", "aff_campus_unique_index": "0;0;0;1;0;1", "aff_campus_unique": "Los Angeles;Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "kIRIjRPgfR", "title": "MacLaSa: Multi-Aspect Controllable Text Generation via Efficient Sampling from Compact Latent Space", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Multi-aspect controllable text generation aims to generate fluent sentences that possess multiple desired attributes simultaneously.\nTraditional methods either require expensive iteration / searching within the discrete text space during the decoding stage, or train separate controllers for each aspect, resulting in a degradation of text quality due to the discrepancy between different aspects. To address these limitations, we introduce a novel approach for $\\textbf{M}$ulti-$\\textbf{a}$spect $\\textbf{c}$ontrol, namely MacLaSa, that estimates compact $\\textbf{La}$tent space for multiple aspects, and performs efficient $\\textbf{Sa}$mpling with a fast sampler. To eliminate the domain discrepancies between different aspects, we first utilize a variational autoencoder (VAE) network to map text sequences from various data sources into close latent representations. The estimated latent space enables the formulation of joint energy-based models and the plugging in of arbitrary attribute discriminators to achieve multi-aspect control. Afterwards, we draw latent samples with a fast sampler based on ordinary differential equations and feed sampled examples to the VAE decoder to produce target text sequences. Experimental results demonstrate that MacLaSa outperforms strong baselines on both attribute relevance and textual quality while maintaining a high inference speed.", "keywords": "Natural Language Generation;Controllable Text Generation;Pretrained Language Models", "primary_area": "", "supplementary_material": "", "author": "Hanxing Ding;Liang Pang;Zihao Wei;Huawei Shen;Xueqi Cheng;Tat-Seng Chua", "authorids": "~Hanxing_Ding1;~Liang_Pang1;~Zihao_Wei2;~Huawei_Shen1;~Xueqi_Cheng1;~Tat-Seng_Chua2", "gender": "M;M;M;M;M;M", "homepage": ";https://pl8787.github.io/;;https://www.ict.ac.cn/sourcedb/cn/jssrck/201402/t20140221_4037648.html;https://people.ucas.ac.cn/~cxq?language=en;http://www.comp.nus.edu.sg/~chuats/", "dblp": "260/2132;37/11078;235/4940.html;;44/912;", "google_scholar": ";1dgQHBkAAAAJ;cYUdH_4AAAAJ;;hY8aLqAAAAAJ;https://scholar.google.com.tw/citations?user=Z9DWCBEAAAAJ", "or_profile": "~Hanxing_Ding1;~Liang_Pang1;~Zihao_Wei2;~Huawei_Shen1;~Xueqi_Cheng1;~Tat-seng_Chua1", "aff": "Institute of Computing Technology of the Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy;National University of Singapore", "aff_domain": "ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;nus.edu.sg", "position": "PhD student;Associate Professor;PhD student;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nding2023maclasa,\ntitle={MacLaSa: Multi-Aspect Controllable Text Generation via Efficient Sampling from Compact Latent Space},\nauthor={Hanxing Ding and Liang Pang and Zihao Wei and Huawei Shen and Xueqi Cheng and Tat-Seng Chua},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=kIRIjRPgfR}\n}", "github": "", "project": "", "reviewers": "YvKi;V3ko;2HjF", "site": "https://openreview.net/forum?id=kIRIjRPgfR", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;3;3", "reproducibility": "4;4;3", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-1161-8546;;0000-0002-1081-8119;;0000-0001-6097-7807", "linkedin": ";;;;;", "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Chinese Academy of Sciences;National University of Singapore", "aff_unique_dep": "Institute of Computing Technology;", "aff_unique_url": "http://www.ict.ac.cn;https://www.nus.edu.sg", "aff_unique_abbr": "CAS;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "China;Singapore" }, { "id": "kKKzd8SaMy", "title": "The Interpreter Understands Your Meaning: End-to-end Spoken Language Understanding Aided by Speech Translation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "End-to-end spoken language understanding (SLU) remains elusive even with current large pretrained language models on text and speech, especially in multilingual cases. Machine translation has been established as a powerful pretraining objective on text as it enables the model to capture high-level semantics of the input utterance and associations between different languages, which is desired for speech models that work on lower-level acoustic frames. Motivated particularly by the task of cross-lingual SLU, we demonstrate that the task of speech translation (ST) is a good means of pretraining speech models for end-to-end SLU on both intra- and cross-lingual scenarios. \n\nBy introducing ST, our models reach higher performance over baselines on monolingual and multilingual intent classification as well as spoken question answering using SLURP, MINDS-14, and NMSQA benchmarks. To verify the effectiveness of our methods, we also create new benchmark datasets from both synthetic and real sources, for speech summarization and low-resource/zero-shot transfer from English to French or Spanish. We further show the value of preserving knowledge for the ST pretraining task for better downstream performance, possibly using Bayesian transfer regularizers.", "keywords": "spoken language understanding;multilinguality;bayesian transfer learning", "primary_area": "", "supplementary_material": "", "author": "Mutian He;Philip N. Garner", "authorids": "~Mutian_He1;~Philip_N._Garner1", "gender": ";M", "homepage": "http://mutiann.github.io/;https://www.idiap.ch/~pgarner", "dblp": "https://dblp.uni-trier.de/pid/222/7849-1;42/7533", "google_scholar": "lqALniEAAAAJ;https://scholar.google.ch/citations?user=c9nAX2AAAAAJ", "or_profile": "~Mutian_He1;~Philip_N._Garner1", "aff": "Idiap Research Institute;Idiap Research Institute", "aff_domain": "idiap.ch;idiap.ch", "position": "PhD student;Senior Researcher", "bibtex": "@inproceedings{\nhe2023the,\ntitle={The Interpreter Understands Your Meaning: End-to-end Spoken Language Understanding Aided by Speech Translation},\nauthor={Mutian He and Philip N. Garner},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=kKKzd8SaMy}\n}", "github": "", "project": "", "reviewers": "1hZ9;bU2d;LTf6", "site": "https://openreview.net/forum?id=kKKzd8SaMy", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;3;4", "reproducibility": "3;4;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Idiap Research Institute", "aff_unique_dep": "", "aff_unique_url": "https://www.idiap.ch", "aff_unique_abbr": "Idiap", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Switzerland" }, { "id": "kKX9X0tMRH", "title": "DisCo: Distilled Student Models Co-training for Semi-supervised Text Mining", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Many text mining models are constructed by fine-tuning a large deep pre-trained language model (PLM) in downstream tasks.\nHowever, a significant challenge that arises nowadays is how to maintain performance when we use a lightweight model with limited labeled samples. \nWe present DisCo, a semi-supervised learning (SSL) framework for fine-tuning a cohort of small student models generated from a large PLM using knowledge distillation.\nOur key insight is to share complementary knowledge among distilled student cohorts to promote their SSL effectiveness. \nDisCo employs a novel co-training technique to optimize a cohort of multiple small student models by promoting knowledge sharing among students under diversified views: model views produced by different distillation strategies and data views produced by various input augmentations. \nWe evaluate DisCo on both semi-supervised text classification and extractive summarization tasks.\nExperimental results show that DisCo can produce student models that are $7.6\\times$ smaller and $4.8 \\times$ faster in inference than the baseline PLMs while maintaining comparable performance.\nWe also show that DisCo-generated student models outperform the similar-sized models elaborately tuned in distinct tasks.", "keywords": "co-training;semi-supervised learning;knowledge distillation", "primary_area": "", "supplementary_material": "", "author": "Weifeng Jiang;Qianren Mao;Chenghua Lin;Jianxin Li;Ting Deng;Weiyi Yang;Zheng Wang", "authorids": "~Weifeng_Jiang2;~Qianren_Mao4;~Chenghua_Lin1;~Jianxin_Li3;~Ting_Deng3;~Weiyi_Yang1;~Zheng_Wang21", "gender": ";M;;M;;F;M", "homepage": ";;;http://myjianxin.github.io;;;https://zwang4.github.io/", "dblp": "211/3336;234/5350;;l/JianxinLi-2.html;;;w/ZhengWang1", "google_scholar": ";https://scholar.google.com.hk/citations?user=PnDqlPkAAAAJ;;EY2lqD0AAAAJ;;;qJ7ZKG8AAAAJ", "or_profile": "~Weifeng_Jiang2;~Qianren_Mao4;~Chenghua_Lin1;~Jianxin_Li3;~Ting_Deng3;~Weiyi_Yang1;~Zheng_Wang21", "aff": "Nanyang Technological University;Beihang University;;Beihang University ;;Beihang University ;University of Leeds", "aff_domain": "ntu.edu.sg;buaa.edu.cn;;buaa.edu.cn;;buaa.edu.cn;leeds.ac.uk", "position": "MS student;PhD student;;Full Professor;;PhD student;Full Professor", "bibtex": "@inproceedings{\njiang2023disco,\ntitle={DisCo: Distilled Student Models Co-training for Semi-supervised Text Mining},\nauthor={Weifeng Jiang and Qianren Mao and Chenghua Lin and Jianxin Li and Ting Deng and Weiyi Yang and Zheng Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=kKX9X0tMRH}\n}", "github": "", "project": "", "reviewers": "VCv8;vrX3;QmSR", "site": "https://openreview.net/forum?id=kKX9X0tMRH", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "4;3;4", "reproducibility": "1;3;5", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0008-2314-0299;0000-0003-0780-0628;;0000-0001-5152-0055;;0009-0009-9166-1739;0000-0001-6157-0662", "linkedin": ";%E4%B9%BE%E4%BB%BB-%E6%AF%9B-574534326/;;;;;", "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "Nanyang Technological University;Beihang University;University of Leeds", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ntu.edu.sg;http://www.buaa.edu.cn/;https://www.leeds.ac.uk", "aff_unique_abbr": "NTU;BUAA;Leeds", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;2", "aff_country_unique": "Singapore;China;United Kingdom" }, { "id": "kNCHv0NZ69", "title": "A Tale of Pronouns: Interpretability Informs Gender Bias Mitigation for Fairer Instruction-Tuned Machine Translation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recent instruction fine-tuned models can solve multiple NLP tasks when prompted to do so, with machine translation (MT) being a prominent use case. However, current research often focuses on standard performance benchmarks, leaving compelling fairness and ethical considerations behind. In MT, this might lead to misgendered translations, resulting, among other harms, in the perpetuation of stereotypes and prejudices. \nIn this work, we address this gap by investigating whether and to what extent such models exhibit gender bias in machine translation and how we can mitigate it.\nConcretely, we compute established gender bias metrics on the WinoMT corpus from English to German and Spanish. We discover that IFT models default to male-inflected translations, even disregarding female occupational stereotypes. \nNext, using interpretability methods, we unveil that models systematically overlook the pronoun indicating the gender of a target occupation in misgendered translations.\nFinally, based on this finding, we propose an easy-to-implement and effective bias mitigation solution based on few-shot learning that leads to significantly fairer translations.", "keywords": "machine translation;gender bias;ethics;interpretability;instruction fine-tuned language models", "primary_area": "", "supplementary_material": "", "author": "Giuseppe Attanasio;Flor Miriam Plaza del Arco;Debora Nozza;Anne Lauscher", "authorids": "~Giuseppe_Attanasio1;~Flor_Miriam_Plaza_del_Arco1;~Debora_Nozza1;~Anne_Lauscher1", "gender": "M;F;F;", "homepage": "https://gattanasio.cc;https://fmplaza.github.io/;https://www.deboranozza.com/;", "dblp": "198/3907;185/4247.html;157/9859.html;209/6857", "google_scholar": "https://scholar.google.it/citations?user=IuhnRJQAAAAJ;4GqDwGEAAAAJ;AKi-UWQAAAAJ;https://scholar.google.it/citations?user=IbJS3UEAAAAJ", "or_profile": "~Giuseppe_Attanasio1;~Flor_Miriam_Plaza_del_Arco1;~Debora_Nozza1;~Anne_Lauscher1", "aff": "Bocconi University;University of Ja\u00e9n;Bocconi University;Universit\u00e4t Hamburg", "aff_domain": "unibocconi.it;ujaen.es;unibocconi.it;uni-hamburg.de", "position": "Postdoc;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nattanasio2023a,\ntitle={A Tale of Pronouns: Interpretability Informs Gender Bias Mitigation for Fairer Instruction-Tuned Machine Translation},\nauthor={Giuseppe Attanasio and Flor Miriam Plaza del Arco and Debora Nozza and Anne Lauscher},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=kNCHv0NZ69}\n}", "github": "", "project": "", "reviewers": "HKEt;eP6E;6GEX", "site": "https://openreview.net/forum?id=kNCHv0NZ69", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;4;4", "reproducibility": "5;4;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-3020-5512;0000-0002-7998-2267;", "linkedin": ";flor-miriam-plaza-del-arco-395770b9/;deboranozza;", "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Bocconi University;University of Ja\u00e9n;University of Hamburg", "aff_unique_dep": ";;", "aff_unique_url": "https://www.bocconi.edu;https://www.ujaen.es;https://www.uni-hamburg.de", "aff_unique_abbr": "Bocconi;UJA;UHH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;2", "aff_country_unique": "Italy;Spain;Germany" }, { "id": "kNUglj7Kq1", "title": "Unifying Cross-Lingual Transfer across Scenarios of Resource Scarcity", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The scarcity of data in many of the world's languages necessitates the transfer of knowledge from other, resource-rich languages. However, the level of scarcity varies significantly across multiple dimensions, including: i) the amount of task-specific data available in the source and target languages; ii) the amount of monolingual and parallel data available for both languages; and iii) the extent to which they are supported by pretrained multilingual and translation models. Prior work has largely treated these dimensions and the various techniques for dealing with them separately; in this paper, we offer a more integrated view by exploring how to deploy the arsenal of cross-lingual transfer tools across a range of scenarios, especially the most challenging, low-resource ones. To this end, we run experiments on the AmericasNLI and NusaX benchmarks over 20 languages, simulating a range of few-shot settings.\nThe best configuration in our experiments employed parameter-efficient language and task adaptation of massively multilingual Transformers, trained simultaneously on source language data and both machine-translated and natural data for multiple target languages. In addition, we show that pre-trained translation models can be easily adapted to unseen languages, thus extending the range of our hybrid technique and translation-based transfer more broadly. Beyond new insights into the mechanisms of cross-lingual transfer, we hope our work will provide practitioners with a toolbox to integrate multiple techniques for different real-world scenarios. Our code is available at https://github.com/parovicm/unified-xlt.", "keywords": "cross-lingual transfer;low-resource scenarios;parameter-efficient fine-tuning", "primary_area": "", "supplementary_material": "", "author": "Alan Ansell;Marinela Parovi\u0107;Ivan Vuli\u0107;Anna Korhonen;Edoardo Ponti", "authorids": "~Alan_Ansell1;~Marinela_Parovi\u01071;~Ivan_Vuli\u01071;~Anna_Korhonen1;~Edoardo_Ponti1", "gender": ";;M;;", "homepage": ";;https://sites.google.com/site/ivanvulic/;https://sites.google.com/site/annakorhonen/;https://ducdauge.github.io/", "dblp": ";;77/9768;14/6532;178/8829", "google_scholar": ";;ZX8js60AAAAJ;https://scholar.google.co.uk/citations?user=SCoVoOYAAAAJ;https://scholar.google.ca/citations?user=tklL2q0AAAAJ", "or_profile": "~Alan_Ansell1;~Marinela_Parovi\u01071;~Ivan_Vuli\u01071;~Anna_Korhonen1;~Edoardo_Ponti1", "aff": ";;PolyAI Limited;University of Cambridge;University of Edinburgh", "aff_domain": ";;poly-ai.com;cam.ac.uk;ed.ac.uk", "position": ";;Senior Scientist;Professor;Assistant Professor", "bibtex": "@inproceedings{\nansell2023unifying,\ntitle={Unifying Cross-Lingual Transfer across Scenarios of Resource Scarcity},\nauthor={Alan Ansell and Marinela Parovi{\\'c} and Ivan Vuli{\\'c} and Anna Korhonen and Edoardo Ponti},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=kNUglj7Kq1}\n}", "github": "", "project": "", "reviewers": "8j5G;jn1b;TbEt", "site": "https://openreview.net/forum?id=kNUglj7Kq1", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;4", "excitement": "4;3;4", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-6308-1050", "linkedin": ";;ivan-vuli%C4%87-286b4a81/;anna-korhonen-534a9b5/;edoardo-maria-ponti/", "aff_unique_index": "0;1;2", "aff_unique_norm": "PolyAI Limited;University of Cambridge;University of Edinburgh", "aff_unique_dep": ";;", "aff_unique_url": "https://www.poly.ai;https://www.cam.ac.uk;https://www.ed.ac.uk", "aff_unique_abbr": "PolyAI;Cambridge;Edinburgh", "aff_campus_unique_index": "1", "aff_campus_unique": ";Cambridge", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "kOhxudaIEj", "title": "CS2W: A Chinese Spoken-to-Written Style Conversion Dataset with Multiple Conversion Types", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Spoken texts (either manual or automatic transcriptions from automatic speech recognition (ASR)) often contain disfluencies and grammatical errors, which pose tremendous challenges to downstream tasks. Converting spoken into written language is hence desirable. Unfortunately, the availability of datasets for this is limited. To address this issue, we present CS2W, a Chinese Spoken-to-Written style conversion dataset comprising 7,237 spoken sentences extracted from transcribed conversational texts. Four types of conversion problems are covered in CS2W: disfluencies, grammatical errors, ASR transcription errors, and colloquial words. Our annotation convention, data, and code are publicly available at https://github.com/guozishan/CS2W.", "keywords": "Spoken-to-Written Style Conversion;Disfluency Detection;Grammatical Error Correction;ASR", "primary_area": "", "supplementary_material": "", "author": "Zishan Guo;Linhao Yu;Minghui Xu;Renren Jin;Deyi Xiong", "authorids": "~Zishan_Guo1;~Linhao_Yu1;~Minghui_Xu2;~Renren_Jin1;~Deyi_Xiong2", "gender": "F;M;M;M;M", "homepage": ";https://hasuer.github.io/;;;https://dyxiong.github.io", "dblp": ";;;329/4176;55/6548", "google_scholar": ";;;qW3oQDUAAAAJ;QPLO3myO5PkC", "or_profile": "~Zishan_Guo1;~Linhao_Yu1;~Minghui_Xu2;~Renren_Jin1;~Deyi_Xiong2", "aff": "Tianjin University;Tianjin University;Tianjin University;Tianjin University;Tianjin University", "aff_domain": "tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn", "position": "MS student;Undergrad student;MS student;MS student;Full Professor", "bibtex": "@inproceedings{\nguo2023csw,\ntitle={{CS}2W: A Chinese Spoken-to-Written Style Conversion Dataset with Multiple Conversion Types},\nauthor={Zishan Guo and Linhao Yu and Minghui Xu and Renren Jin and Deyi Xiong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=kOhxudaIEj}\n}", "github": "", "project": "", "reviewers": "i9a8;3uWx;7Dmy", "site": "https://openreview.net/forum?id=kOhxudaIEj", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "5;4;4", "reproducibility": "5;4;4", "correctness": "5;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.333333333333333, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0003-2193-294X;;;;0000-0002-2353-5038", "linkedin": ";;minghui-xu-9431371b2/;renren-jin-222a861b3/;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Tianjin University", "aff_unique_dep": "", "aff_unique_url": "http://www.tju.edu.cn", "aff_unique_abbr": "TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "kQSlGF9lH6", "title": "Investigating Efficiently Extending Transformers for Long Input Summarization", "track": "main", "status": "Long Main", "tldr": "", "abstract": "While large pretrained Transformer models have proven highly capable at tackling natural language tasks, handling long sequence inputs still poses a significant challenge. One such task is long input summarization, where inputs are longer than the maximum input context of most models. Through an extensive set of experiments, we investigate what model architectural changes and pretraining paradigms most efficiently adapt a pretrained Transformer for long input summarization. We find that a staggered, block-local Transformer with global encoder tokens strikes a good balance of performance and efficiency, and that an additional pretraining phase on long sequences meaningfully improves downstream summarization performance. Based on our findings, we introduce PEGASUS-X, an extension of the PEGASUS model with additional long input pretraining to handle inputs of up to 16K tokens, which achieves strong performance on long input summarization tasks comparable with much larger models.", "keywords": "summarization;nlp;architectures;long context", "primary_area": "", "supplementary_material": "", "author": "Jason Phang;Yao Zhao;Peter J Liu", "authorids": "~Jason_Phang1;~Yao_Zhao5;~Peter_J_Liu1", "gender": "M;;", "homepage": "https://jasonphang.com/;;http://www.peterjliu.com", "dblp": "227/3174;;190/7667", "google_scholar": "hxbdOuoAAAAJ;p7L3HrMAAAAJ;", "or_profile": "~Jason_Phang1;~Yao_Zhao5;~Peter_J_Liu1", "aff": "Microsoft Research;Google;Google Brain", "aff_domain": "research.microsoft.com;google.com;google.com", "position": "Intern;Researcher;Research Scientist", "bibtex": "@inproceedings{\nphang2023investigating,\ntitle={Investigating Efficiently Extending Transformers for Long Input Summarization},\nauthor={Jason Phang and Yao Zhao and Peter J Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=kQSlGF9lH6}\n}", "github": "", "project": "", "reviewers": "5wpq;8ipF;fXYW", "site": "https://openreview.net/forum?id=kQSlGF9lH6", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;3;3", "excitement": "4;3;4", "reproducibility": "5;4;3", "correctness": "5;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "jason-phang/;;p3t3rliu", "aff_unique_index": "0;1;1", "aff_unique_norm": "Microsoft;Google", "aff_unique_dep": "Microsoft Research;Google", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.google.com", "aff_unique_abbr": "MSR;Google", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "kUNzgI1HxN", "title": "Frugal Prompting for Dialog Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The use of large language models (LLMs) in natural language processing (NLP) tasks is rapidly increasing, leading to changes in how researchers approach problems in the field. To fully utilize these models' abilities, a better understanding of their behavior for different input protocols is required. With LLMs, users can directly interact with the models through a text-based interface to define and solve various tasks. Hence, understanding the conversational abilities of these LLMs, which may not have been specifically trained for dialog modeling, is also important. This study examines different approaches for building dialog systems using LLMs by considering various aspects of the prompt. As part of prompt tuning, we experiment with various ways of providing instructions, exemplars, current query and additional context.\nThe research also analyzes the representations of dialog history that have the optimal usable-information density. Based on the findings, the paper suggests more compact ways of providing dialog history information while ensuring good performance and reducing model's inference-API costs. The research contributes to a better understanding of how LLMs can be effectively used for building interactive systems.", "keywords": "dialog;dialog generation;natural language generation;dialogue generation;dialogue;NLP", "primary_area": "", "supplementary_material": "", "author": "Bishal Santra;Sakya Basak;Abhinandan De;Manish Gupta;Pawan Goyal", "authorids": "~Bishal_Santra1;~Sakya_Basak1;~Abhinandan_De1;~Manish_Gupta4;~Pawan_Goyal1", "gender": "A5B10;M;M;M;M", "homepage": "https://bsantraigi.github.io/;;;http://cse.iitkgp.ac.in/~pawang/;https://sites.google.com/view/manishg/", "dblp": "191/6050;;;77/2307-2;g/ManishGupta1.html", "google_scholar": "U2szfuYAAAAJ;j6KmcmsAAAAJ;;https://scholar.google.com.tw/citations?user=F14FHsIAAAAJ;https://scholar.google.co.in/citations?user=eX9PSu0AAAAJ", "or_profile": "~Bishal_Santra1;~Sakya_Basak1;~Abhinandan_De1;~Pawan_Goyal1;~Manish_Gupta1", "aff": "Indian Institute of Technology Kharagpur, India;Microsoft;Indian Institute of Technology Kharagpur;IIT Kharagpur;Microsoft", "aff_domain": "iitkgp.ac.in;microsoft.com;iitkgp.ernet.in;cse.iitkgp.ac.in;microsoft.com", "position": "PhD student;Researcher;Undergrad student;Associate Professor;Principal Researcher", "bibtex": "@inproceedings{\nsantra2023frugal,\ntitle={Frugal Prompting for Dialog Models},\nauthor={Bishal Santra and Sakya Basak and Abhinandan De and Manish Gupta and Pawan Goyal},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=kUNzgI1HxN}\n}", "github": "", "project": "", "reviewers": "MCUv;6RFn;RHz2", "site": "https://openreview.net/forum?id=kUNzgI1HxN", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;2;4", "excitement": "3;3;4", "reproducibility": "4;4;4", "correctness": "4;3;4", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-0380-689X;;;;0000-0002-2843-3110", "linkedin": ";;abhinandan0136/;;manishsgupta/", "aff_unique_index": "0;1;0;0;1", "aff_unique_norm": "Indian Institute of Technology Kharagpur;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.iitkgp.ac.in;https://www.microsoft.com", "aff_unique_abbr": "IIT Kharagpur;Microsoft", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Kharagpur;", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "India;United States" }, { "id": "kXHDXPubz9", "title": "A Rose by Any Other Name would not Smell as Sweet: Social Bias in Names Mistranslation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We ask the question: Are there widespread disparities in machine translations of names across race/ethnicity, and gender? We hypothesize that the translation quality of names and surrounding context will be lower for names associated with US racial and ethnic minorities due to these systems\u2019 tendencies to standardize language to predominant language patterns. We develop a dataset of names that are strongly demographically aligned and propose a translation evaluation procedure based on round-trip translation. We analyze the effect of name demographics on translation quality using generalized linear mixed effects models and find that the ability of translation systems to correctly translate female-associated names is significantly lower than male-associated names. This effect is particularly pronounced for female-associated names that are also associated with racial (Black) and ethnic (Hispanic) minorities. This disparity in translation quality between social groups for something as personal as someone's name has significant implications for people's professional, personal, and cultural identities, self-worth and ease of communication. Our findings suggest that more MT research is needed to improve the translation of names and to provide high-quality service for users regardless of gender, race, and ethnicity.", "keywords": "fairness and bias;NLP-related harms;name translation;evaluation", "primary_area": "", "supplementary_material": "", "author": "Sandra Camille Sandoval;Jieyu Zhao;Marine Carpuat;Hal Daum\u00e9 III", "authorids": "~Sandra_Camille_Sandoval1;~Jieyu_Zhao1;~Marine_Carpuat1;~Hal_Daum\u00e9_III1", "gender": "F;F;F;M", "homepage": ";http://jyzhao.net/;http://www.cs.umd.edu/~marine/;http://hal3.name", "dblp": ";59/2379-1;71/1827;77/2856.html", "google_scholar": ";9VaGBCQAAAAJ;iPAX6jcAAAAJ;PbEw81gAAAAJ", "or_profile": "~Sandra_Camille_Sandoval1;~Jieyu_Zhao1;~Marine_Carpuat1;~Hal_Daum\u00e9_III1", "aff": "University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;Microsoft", "aff_domain": "umd.edu;umd.edu;umd.edu;microsoft.com", "position": "PhD student;Postdoc;Associate Professor;Senior Principle Researcher", "bibtex": "@inproceedings{\nsandoval2023a,\ntitle={A Rose by Any Other Name would not Smell as Sweet: Social Bias in Names Mistranslation},\nauthor={Sandra Camille Sandoval and Jieyu Zhao and Marine Carpuat and Hal Daum{\\'e} III},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=kXHDXPubz9}\n}", "github": "", "project": "", "reviewers": "ndtx;EGZQ;QCvK", "site": "https://openreview.net/forum?id=kXHDXPubz9", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "4;3;3", "reproducibility": "5;3;5", "correctness": "4;2;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "sandra-sandoval-86b2482/;;;", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "University of Maryland;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www/umd.edu;https://www.microsoft.com", "aff_unique_abbr": "UMD;Microsoft", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "kY7lpT8z1E", "title": "Contrastive Learning of Sentence Embeddings from Scratch", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Contrastive learning has been the dominant approach to train state-of-the-art sentence embeddings. Previous studies have typically learned sentence embeddings either through the use of human-annotated natural language inference (NLI) data or via large-scale unlabeled sentences in an unsupervised manner. However, even in the case of unlabeled data, their acquisition presents challenges in certain domains due to various reasons. due to copyright restrictions, data distribution issues, and messy formats, among other factors. To address these issues, we present SynCSE, a contrastive learning framework that trains sentence embeddings with synthetic data. Specifically, we explore utilizing large language models to synthesize the required data samples for contrastive learning, including (1) producing positive and negative annotations given unlabeled sentences SynCSE-partial, and (2) generating sentences along with their corresponding annotations from scratch SynCSE-scratch. Notably, SynCSE-scratch constitutes the first contrastive learning method to learn sentence embeddings from scratch without manually collecting any data sample. Experimental results on sentence similarity and reranking tasks indicate that both SynCSE-partial and SynCSE-scratch greatly outperform unsupervised baselines, and SynCSE-partial even achieves comparable performance to the supervised models in most settings.", "keywords": "contrastive learning;sentence embeddings", "primary_area": "", "supplementary_material": "", "author": "Junlei Zhang;Zhenzhong Lan;Junxian He", "authorids": "~Junlei_Zhang1;~Zhenzhong_Lan2;~Junxian_He1", "gender": "M;;M", "homepage": ";;https://jxhe.github.io", "dblp": "197/3153.html;27/3780;188/6127.html", "google_scholar": ";tlDABkgAAAAJ;BIFGeoUAAAAJ", "or_profile": "~Junlei_Zhang1;~Zhenzhong_Lan2;~Junxian_He1", "aff": "Westlake University;Westlake University;Hong Kong University of Science and Technology", "aff_domain": "westlake.edu;westlake.edu.cn;ust.hk", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023contrastive,\ntitle={Contrastive Learning of Sentence Embeddings from Scratch},\nauthor={Junlei Zhang and Zhenzhong Lan and Junxian He},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=kY7lpT8z1E}\n}", "github": "", "project": "", "reviewers": "DJxQ;YEGU;P81C", "site": "https://openreview.net/forum?id=kY7lpT8z1E", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "4;4;4", "reproducibility": "4;4;3", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0;1", "aff_unique_norm": "Westlake University;Hong Kong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.westlake.edu.cn;https://www.ust.hk", "aff_unique_abbr": "WU;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "kZob2CsZXm", "title": "COVID-19 Vaccine Misinformation in Middle Income Countries", "track": "main", "status": "Long Main", "tldr": "", "abstract": "This paper introduces a multilingual dataset of COVID-19 vaccine misinformation, consisting of annotated tweets from three middle-income countries: Brazil, Indonesia, and Nigeria. The expertly curated dataset includes annotations for 5,952 tweets, assessing their relevance to COVID-19 vaccines, presence of misinformation, and the themes of the misinformation. To address challenges posed by domain specificity, the low-resource setting, and data imbalance, we adopt two approaches for developing COVID-19 vaccine misinformation detection models: domain-specific pre-training and text augmentation using a large language model. Our best misinformation detection models demonstrate improvements ranging from 2.7 to 15.9 percentage points in macro F1-score compared to the baseline models. Additionally, we apply our misinformation detection models in a large-scale study of 19 million unlabeled tweets from the three countries between 2020 and 2022, showcasing the practical application of our dataset and models for detecting and analyzing vaccine misinformation in multiple countries and languages. Our analysis indicates that percentage changes in the number of new COVID-19 cases are positively associated with COVID-19 vaccine misinformation rates in a staggered manner for Brazil and Indonesia, and there are significant positive associations between the misinformation rates across the three countries.", "keywords": "COVID-19;vaccine;misinformation;NLP applications;domain-specific pre-training;text augmentation;distributed lag model", "primary_area": "", "supplementary_material": "", "author": "Jongin Kim;Byeo Rhee Bak;Aditya Agrawal;Jiaxi Wu;Veronika J. Wirtz;Traci Hong;Derry Wijaya", "authorids": "~Jongin_Kim1;~Byeo_Rhee_Bak1;~Aditya_Agrawal1;~Jiaxi_Wu2;~Veronika_J._Wirtz1;~Traci_Hong1;~Derry_Wijaya1", "gender": "M;M;M;;F;F;F", "homepage": "https://zzoliman.github.io/;https://sites.google.com/view/byeorheebak/home;;;https://www.bu.edu/sph/profile/veronika-wirtz/;;https://derrywijaya.github.io/", "dblp": "140/1754;;;;;;https://dblp.org/pers/w/Wijaya:Derry", "google_scholar": "https://scholar.google.com/citations?hl=ko;;;Z-CTMmsAAAAJ;https://scholar.google.com/citations?hl=en;9vqoC4sAAAAJ;8lmWWD0AAAAJ", "or_profile": "~Jongin_Kim1;~Byeo_Rhee_Bak1;~Aditya_Agrawal1;~Jiaxi_Wu2;~Veronika_J._Wirtz1;~Traci_Hong1;~Derry_Wijaya1", "aff": "Boston University, Boston University;Boston University, Boston University;Boston University, Boston University;Boston University, Boston University;National Institute of Public Health ;Boston University, Boston University;Boston University", "aff_domain": "bu.edu;bu.edu;bu.edu;bu.edu;insp.mx;bu.edu;bu.edu", "position": "PhD student;PhD student;MS student;PhD student;Assistant Professor;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nkim2023covid,\ntitle={{COVID}-19 Vaccine Misinformation in Middle Income Countries},\nauthor={Jongin Kim and Byeo Rhee Bak and Aditya Agrawal and Jiaxi Wu and Veronika J. Wirtz and Traci Hong and Derry Wijaya},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=kZob2CsZXm}\n}", "github": "", "project": "", "reviewers": "Knzu;XBsM;cr9A", "site": "https://openreview.net/forum?id=kZob2CsZXm", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;4;3", "excitement": "4;3;4", "reproducibility": "4;2;4", "correctness": "3;3;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-0794-7427;;;;0000-0002-0863-8768;0000-0001-9107-1880;0000-0002-0848-4703", "linkedin": "jongin-kim-267929146/;;aditya-a-4a5112135/;;veronika-wirtz-9970a58a/;tracijhong/;derry-wijaya-577b80178/", "aff_unique_index": "0;0;0;0;1;0;0", "aff_unique_norm": "Boston University;National Institute of Public Health", "aff_unique_dep": ";", "aff_unique_url": "https://www.bu.edu;https://www.rivm.nl", "aff_unique_abbr": "BU;", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Boston;", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "United States;Netherlands" }, { "id": "kayoyzcsTa", "title": "$\\textit{Lost in Translation, Found in Spans}$: Identifying Claims in Multilingual Social Media", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Claim span identification (CSI) is an important step in fact-checking pipelines, aiming to identify text segments that contain a check-worthy claim or assertion in a social media post. \nDespite its importance to journalists and human fact-checkers, it remains a severely understudied problem, and the scarce research on this topic so far has only focused on English. \nHere we aim to bridge this gap by creating a novel dataset, X-CLAIM, consisting of 7K real-world claims collected from numerous social media platforms in five Indian languages and English.\nWe report strong baselines with state-of-the-art encoder-only language models (e.g., XLM-R) and we demonstrate the benefits of training on multiple languages over alternative cross-lingual transfer methods such as zero-shot transfer, or training on translated data, from a high-resource language such as English.\nWe evaluate generative large language models from the GPT series using prompting methods on the X-CLAIM dataset and we find that they underperform the smaller encoder-only language models for low-resource languages.", "keywords": "Claim Span Identification;Multilinguality;Social Media;Claims;Low-resource Languages", "primary_area": "", "supplementary_material": "", "author": "Shubham Mittal;Megha Sundriyal;Preslav Nakov", "authorids": "~Shubham_Mittal1;~Megha_Sundriyal1;~Preslav_Nakov2", "gender": "M;F;M", "homepage": "https://sm354.github.io/;;https://mbzuai.ac.ae/study/faculty/preslav-nakov/", "dblp": "98/1960-1;284/1031;https://dblp.uni-trier.de/pid/19/1947", "google_scholar": "l_bIdRcAAAAJ;vbmdVSAAAAAJ;DfXsKZ4AAAAJ", "or_profile": "~Shubham_Mittal1;~Megha_Sundriyal1;~Preslav_Nakov2", "aff": "Google;Indraprastha Institute of Information Technology, Delhi;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": "google.com;iiitd.ac.in;mbzuai.ac.ae", "position": "Researcher;PhD student;Full Professor", "bibtex": "@inproceedings{\nmittal2023textitlost,\ntitle={\\${\\textbackslash}textit\\{Lost in Translation, Found in Spans\\}\\$: Identifying Claims in Multilingual Social Media},\nauthor={Shubham Mittal and Megha Sundriyal and Preslav Nakov},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=kayoyzcsTa}\n}", "github": "", "project": "", "reviewers": "G3UE;bWC1;oZSC", "site": "https://openreview.net/forum?id=kayoyzcsTa", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;2;3", "excitement": "4;3;4", "reproducibility": "3;4;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-2268-0137;0000-0002-3600-1510", "linkedin": "sm354/;sundriyalmegha/;preslavnakov/", "aff_unique_index": "0;1;2", "aff_unique_norm": "Google;Indraprastha Institute of Information Technology;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": "Google;;", "aff_unique_url": "https://www.google.com;http://www.iiitd.ac.in;https://mbzuai.ac.ae", "aff_unique_abbr": "Google;IIIT-D;MBZUAI", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Mountain View;Delhi;", "aff_country_unique_index": "0;1;2", "aff_country_unique": "United States;India;United Arab Emirates" }, { "id": "kc2YhavobV", "title": "Continual Generalized Intent Discovery: Marching Towards Dynamic and Open-world Intent Recognition", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In a practical dialogue system, users may input out-of-domain (OOD) queries. The Generalized Intent Discovery (GID) task aims to discover OOD intents from OOD queries and extend them to the in-domain (IND) classifier. However, GID only considers one stage of OOD learning, and needs to utilize the data in all previous stages for joint training, which limits its wide application in reality. In this paper, we introduce a new task, Continual Generalized Intent Discovery (CGID), which aims to continuously and automatically discover OOD intents from dynamic OOD data streams and then incrementally add them to the classifier with almost no previous data, thus moving towards dynamic intent recognition in an open world. Next, we propose a method called Prototype-guided Learning with Replay and Distillation (PLRD) for CGID, which bootstraps new intent discovery through class prototypes and balances new and old intents through data replay and feature distillation. Finally, we conduct detailed experiments and analysis to verify the effectiveness of PLRD and understand the key challenges of CGID for future research.", "keywords": "Out of Domain;Intent classification;Continual Learning", "primary_area": "", "supplementary_material": "", "author": "Xiaoshuai Song;Yutao Mou;Keqing He;Yueyan Qiu;Jinxu Zhao;Pei Wang;Weiran Xu", "authorids": "~Xiaoshuai_Song1;~Yutao_Mou1;~Keqing_He1;~Yueyan_Qiu1;~Jinxu_Zhao1;~Pei_Wang12;~Weiran_Xu1", "gender": "M;;;F;M;;M", "homepage": ";;https://helicqin.github.io/about/index.html;https://github.com/aqiua94;https://pris-nlp.github.io/author/%E8%B5%B5%E9%87%91%E6%97%AD/;;", "dblp": "45/9576;;79/2314;;;;41/5448", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;811USNoAAAAJ;;;;https://scholar.google.com/citations?view_op=list_works", "or_profile": "~Xiaoshuai_Song1;~Yutao_Mou1;~Keqing_He1;~Yueyan_Qiu1;~Jinxu_Zhao1;~Pei_Wang12;~Weiran_Xu1", "aff": "Beijing University of Posts and Telecommunications;;Meituan Group;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Post and Telecommunication", "aff_domain": "bupt.edu.cn;;meituan.com;bupt.edu.cn;bupt.edu;bupt.edu.cn;bupt.edu.cn", "position": "MS student;;Researcher;Undergrad student;MS student;MS student;Associate Professor", "bibtex": "@inproceedings{\nsong2023continual,\ntitle={Continual Generalized Intent Discovery: Marching Towards Dynamic and Open-world Intent Recognition},\nauthor={Xiaoshuai Song and Yutao Mou and Keqing He and Yueyan Qiu and Jinxu Zhao and Pei Wang and Weiran Xu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=kc2YhavobV}\n}", "github": "", "project": "", "reviewers": "3pu5;Ldu4;fQPy", "site": "https://openreview.net/forum?id=kc2YhavobV", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "2;3;3", "reproducibility": "4;4;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;0000-0002-9416-7666", "linkedin": ";;;;;https://www.linkedin.cn/in/%E9%9C%88-%E7%8E%8B-18a94a174;", "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications;Meituan Group", "aff_unique_dep": ";", "aff_unique_url": "http://www.bupt.edu.cn/;https://www.meituan.com", "aff_unique_abbr": "BUPT;Meituan", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "kda8szucLZ", "title": "Continual Dialogue State Tracking via Example-Guided Question Answering", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Dialogue systems are frequently updated to accommodate new services, but naively updating them by continually training with data for new services in diminishing performance on previously learnt services. Motivated by the insight that dialogue state tracking (DST), a crucial component of dialogue systems that estimates the user's goal as a conversation proceeds, is a simple natural language understanding task, we propose reformulating it as a bundle of granular example-guided question answering tasks to minimize the task shift between services and thus benefit continual learning. Our approach alleviates service-specific memorization and teaches a model to contextualize the given question and example to extract the necessary information from the conversation. We find that a model with just 60M parameters can achieve a significant boost by learning to learn from in-context examples retrieved by a retriever trained to identify turns with similar dialogue state changes. Combining our method with dialogue-level memory replay, our approach attains state of the art performance on DST continual learning metrics without relying on any complex regularization or parameter expansion methods.", "keywords": "dialogue state tracking;dialogue;natural language processing;continual learning", "primary_area": "", "supplementary_material": "", "author": "Hyundong Justin Cho;Andrea Madotto;Zhaojiang Lin;Khyathi Chandu;Satwik Kottur;Jing Xu;Jonathan May;Chinnadhurai Sankar", "authorids": "~Hyundong_Justin_Cho1;~Andrea_Madotto1;~Zhaojiang_Lin1;~Khyathi_Chandu1;~Satwik_Kottur1;~Jing_Xu5;~Jonathan_May1;~Chinnadhurai_Sankar2", "gender": "M;M;M;;M;F;M;M", "homepage": "https://justin-cho.com;http://andreamad8.github.io/;https://zlinao.github.io;;https://satwikkottur.github.io/;;http://jonmay.net;https://chinnadhurai.github.io/", "dblp": "263/6759;174/2905;228/9217;;172/1012;;00/4758;155/0592", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.it/citations?user=JBnyLicAAAAJ;https://scholar.google.co.uk/citations?user=cPtgl3wAAAAJ;;iQxXG8kAAAAJ;https://scholar.google.com/citations?hl=en;tmK5EPEAAAAJ;KynAS2gAAAAJ", "or_profile": "~Hyundong_Justin_Cho1;~Andrea_Madotto1;~Zhaojiang_Lin1;~Khyathi_Chandu1;~Satwik_Kottur1;~Jing_Xu5;~Jonathan_May1;~Chinnadhurai_Sankar2", "aff": "USC/ISI;FAIR;Meta;;Meta Facebook;FAIR;USC/ISI;Slicex AI", "aff_domain": "isi.edu;meta.com;meta.com;;facebook.com;meta.com;isi.edu;slicex.ai", "position": "PhD student;Researcher;Researcher;;Research Scientist;Researcher;Research Scientist;Principal Researcher", "bibtex": "@inproceedings{\ncho2023continual,\ntitle={Continual Dialogue State Tracking via Example-Guided Question Answering},\nauthor={Hyundong Justin Cho and Andrea Madotto and Zhaojiang Lin and Khyathi Chandu and Satwik Kottur and Jing Xu and Jonathan May and Chinnadhurai Sankar},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=kda8szucLZ}\n}", "github": "", "project": "", "reviewers": "GgpV;88YD;6XWq", "site": "https://openreview.net/forum?id=kda8szucLZ", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "3;4;4", "reproducibility": "3;3;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0001-8289-1852;0000-0002-5284-477X;", "linkedin": ";;;;;jing-xu-818022a1;jonmayjonmay/;chinnadhuraisankar/", "aff_unique_index": "0;1;1;1;1;0;2", "aff_unique_norm": "University of Southern California;Meta;Slicex AI", "aff_unique_dep": ";Facebook AI Research;", "aff_unique_url": "https://isi.usc.edu;https://research.facebook.com;", "aff_unique_abbr": "USC;FAIR;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "ISI;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States;" }, { "id": "kdjSXbypKX", "title": "TRAVEL: Tag-Aware Conversational FAQ Retrieval via Reinforcement Learning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Efficiently retrieving FAQ questions that match users' intent is essential for online customer service. \nExisting methods aim to fully utilize the dynamic conversation context to enhance the semantic association between the user query and FAQ questions.\nHowever, the conversation context contains noise, e.g., users may click questions they don't like, leading to inaccurate semantics modeling.\nTo tackle this, we introduce tags of FAQ questions, which can help us eliminate irrelevant information. \nWe later integrate them into a reinforcement learning framework and minimize the negative impact of irrelevant information in the dynamic conversation context. \nWe experimentally demonstrate our efficiency and effectiveness on conversational FAQ retrieval compared to other baselines.", "keywords": "FAQ retrieval;Conversational FAQ Retrieval;Dialogue System;Human-machine interaction", "primary_area": "", "supplementary_material": "", "author": "Yue Chen;Dingnan Jin;Chen Huang;Jia Liu;Wenqiang Lei", "authorids": "~Yue_Chen10;~Dingnan_Jin1;~Chen_Huang7;~Jia_Liu4;~Wenqiang_Lei1", "gender": "M;M;;M;M", "homepage": "https://github.com/SCU-ChenYue;https://github.com/ijinmao;;;https://sites.google.com/view/wenqianghome/home", "dblp": ";350/0152;;;167/9604", "google_scholar": ";;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=qexdxuEAAAAJ", "or_profile": "~Yue_Chen10;~Dingnan_Jin1;~Chen_Huang7;~Jia_Liu4;~Wenqiang_Lei1", "aff": "Sichuan University;Alibaba Group;;Ant Group;Sichuan University", "aff_domain": "scu.edu.cn;antgroup.com;;antgroup.com;scu.edu.cn", "position": "MS student;Researcher;;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nchen2023travel,\ntitle={{TRAVEL}: Tag-Aware Conversational {FAQ} Retrieval via Reinforcement Learning},\nauthor={Yue Chen and Dingnan Jin and Chen Huang and Jia Liu and Wenqiang Lei},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=kdjSXbypKX}\n}", "github": "", "project": "", "reviewers": "LGK8;TqE8;M6qM", "site": "https://openreview.net/forum?id=kdjSXbypKX", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "4;3;3", "reproducibility": "3;2;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;;", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Sichuan University;Alibaba Group;Ant Group", "aff_unique_dep": ";;", "aff_unique_url": "https://www.scu.edu.cn;https://www.alibaba.com;https://www.antgroup.com", "aff_unique_abbr": "SCU;Alibaba;Ant Group", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "kgxtMJHe7w", "title": "Selective Labeling: How to Radically Lower Data-Labeling Costs for Document Extraction Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Building automatic extraction models for visually rich documents like invoices, receipts, bills, tax forms, etc. has received significant attention lately. A key bottleneck in developing extraction models for new document types is the cost of acquiring the several thousand high-quality labeled documents that are needed to train a model with acceptable accuracy. In this paper, we propose selective labeling as a solution to this problem. The key insight is to simplify the labeling task to provide \u201cyes/no\u201d labels for candidate extractions predicted by a model trained on partially labeled documents. We combine this with a custom active learning strategy to find the predictions that the model is most uncertain about. We show through experiments on document types drawn from 3 different domains that selective labeling can reduce the cost of acquiring labeled data by 10\u00d7 with a negligible loss in accuracy.", "keywords": "information extraction;selective labeling;data efficiency;annotation efficiency", "primary_area": "", "supplementary_material": "", "author": "Yichao Zhou;James Bradley Wendt;Navneet Potti;Jing Xie;Sandeep Tata", "authorids": "~Yichao_Zhou2;~James_Bradley_Wendt1;~Navneet_Potti1;~Jing_Xie2;~Sandeep_Tata1", "gender": "M;M;;F;M", "homepage": "https://yz-joey.github.io/;https://jameswendt.com/;;;https://sandeeptata.blogspot.com/", "dblp": "146/9862-1;79/11278;;56/2373;20/1055", "google_scholar": "jneypZ8AAAAJ;7CotKHgAAAAJ;;Q6SGkZAAAAAJ;K5VpjOsAAAAJ", "or_profile": "~Yichao_Zhou2;~James_Bradley_Wendt1;~Navneet_Potti1;~Jing_Xie2;~Sandeep_Tata1", "aff": "Google;Research, Google;;Google;Google", "aff_domain": "google.com;research.google.com;;google.com;google.com", "position": "Researcher;Researcher;;Researcher;Researcher", "bibtex": "@inproceedings{\nzhou2023selective,\ntitle={Selective Labeling: How to Radically Lower Data-Labeling Costs for Document Extraction Models},\nauthor={Yichao Zhou and James Bradley Wendt and Navneet Potti and Jing Xie and Sandeep Tata},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=kgxtMJHe7w}\n}", "github": "", "project": "", "reviewers": "5HoU;b6MD;Gbq2", "site": "https://openreview.net/forum?id=kgxtMJHe7w", "pdf_size": 0, "rating": "5;5;5", "confidence": "2;2;4", "excitement": "2;3;4", "reproducibility": "1;1;2", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 1.3333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0003-8632-446X;;;;", "linkedin": ";jameswendt;;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "kj4MRgh2K5", "title": "Transparency at the Source: Evaluating and Interpreting Language Models With Access to the True Distribution", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We present a setup for training, evaluating and interpreting neural language models, that uses artificial, language-like data. The data is generated using a massive probabilistic grammar (based on state-split PCFGs), that is itself derived from a large natural language corpus, but also provides us complete control over the generative process. We describe and release both grammar and corpus, and test for the naturalness of our generated data. This approach allows us define closed-form expressions to efficiently compute exact lower bounds on obtainable perplexity using both causal and masked language modelling. Our results show striking differences between neural language modelling architectures and training objectives in how closely they allow approximating the lower bound on perplexity. Our approach also allows us to directly compare learned representations to symbolic rules in the underlying source. We experiment with various techniques for interpreting model behaviour and learning dynamics. With access to the underlying true source, our results show striking differences and outcomes in learning dynamics between different classes of words.", "keywords": "language models;perplexity;interpretability;PCFG;learning dynamics;synthetic data", "primary_area": "", "supplementary_material": "", "author": "Jaap Jumelet;Willem Zuidema", "authorids": "~Jaap_Jumelet1;~Willem_Zuidema1", "gender": "M;M", "homepage": "https://jumelet.ai/;https://staff.fnwi.uva.nl/w.zuidema/", "dblp": "225/7711;67/1016", "google_scholar": "i2wNV20AAAAJ;MBkG_FYAAAAJ", "or_profile": "~Jaap_Jumelet1;~Willem_Zuidema1", "aff": "University of Amsterdam;University of Amsterdam", "aff_domain": "uva.nl;uva.nl", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\njumelet2023transparency,\ntitle={Transparency at the Source: Evaluating and Interpreting Language Models With Access to the True Distribution},\nauthor={Jaap Jumelet and Willem Zuidema},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=kj4MRgh2K5}\n}", "github": "", "project": "", "reviewers": "ZP1G;45J7;ybVx", "site": "https://openreview.net/forum?id=kj4MRgh2K5", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;3;3", "excitement": "3;3;4", "reproducibility": "4;4;4", "correctness": "2;4;4", "rating_avg": 2.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-2362-5447", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "University of Amsterdam", "aff_unique_dep": "", "aff_unique_url": "https://www.uva.nl", "aff_unique_abbr": "UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Netherlands" }, { "id": "kp1U6wBPXq", "title": "Adapting Language Models to Compress Contexts", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Transformer-based language models (LMs) are powerful and widely-applicable tools, but their usefulness is constrained by a finite context window and the expensive computational cost of processing long text documents. We propose to adapt pre-trained LMs into AutoCompressors. These language models are capable of compressing long contexts into summary vectors, which are then accessible to the model as soft prompts. Summary vectors are trained with an unsupervised objective, whereby long documents are processed in segments, and summary vectors from all previous segments are used in language modeling. We fine-tune OPT and Llama-2 models on sequences of up to 30,720 tokens and show that AutoCompressors can utilize long contexts to improve perplexity. We evaluate AutoCompressors on in-context learning by compressing task demonstrations and find that summary vectors are good substitutes for plain-text demonstrations, increasing accuracy while reducing inference costs. Finally, we explore the benefits of pre-computing summary vectors for large corpora by applying summary vectors to retrieval-augmented language modeling and a passage re-ranking task.\nOverall, AutoCompressors emerge as a simple and inexpensive solution to extend the context window of LMs while speeding up inference over long contexts.", "keywords": "language models;transformers;long-range language modeling;retrieval-augmented language modeling;in-context learning", "primary_area": "", "supplementary_material": "", "author": "Alexis Chevalier;Alexander Wettig;Anirudh Ajith;Danqi Chen", "authorids": "~Alexis_Chevalier1;~Alexander_Wettig1;~Anirudh_Ajith1;~Danqi_Chen1", "gender": "Not Specified;;M;F", "homepage": "https://www.ias.edu/scholars/alexis-chevalier;https://www.cs.princeton.edu/~awettig/;https://anirudhajith.github.io;https://www.cs.princeton.edu/~danqic/", "dblp": ";302/0235;348/5792;87/7949", "google_scholar": ";N_jSE08AAAAJ;KarsBWAAAAAJ;sVR8ktkAAAAJ", "or_profile": "~Alexis_Chevalier1;~Alexander_Wettig1;~Anirudh_Ajith1;~Danqi_Chen1", "aff": "Institue for Advanced Study, Princeton;Princeton University;Princeton University;Princeton University", "aff_domain": "ias.edu;princeton.edu;princeton.edu;cs.princeton.edu", "position": "Postdoc;PhD student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nchevalier2023adapting,\ntitle={Adapting Language Models to Compress Contexts},\nauthor={Alexis Chevalier and Alexander Wettig and Anirudh Ajith and Danqi Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=kp1U6wBPXq}\n}", "github": "", "project": "", "reviewers": "PB37;9A3j;Db2M", "site": "https://openreview.net/forum?id=kp1U6wBPXq", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;4", "excitement": "3;4;4", "reproducibility": "4;3;5", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "alexchvl;alexander-wettig/;anirudhajith/;", "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Institute for Advanced Study;Princeton University", "aff_unique_dep": ";", "aff_unique_url": "https://wwwIAS.edu;https://www.princeton.edu", "aff_unique_abbr": "IAS;Princeton", "aff_campus_unique_index": "0", "aff_campus_unique": "Princeton;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "kqm0SOisFq", "title": "Information Extraction from Legal Wills: How Well Does GPT-4 Do?", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "This work presents a manually annotated dataset for Information Extraction (IE) from legal wills, and relevant in-context learning experiments on the dataset. The dataset consists of entities, binary relations between the entities (e.g., relations between testator and beneficiary), and n-ary events (e.g., bequest) extracted from 45 legal wills from two US states. This dataset can serve as a foundation for downstream tasks in the legal domain. Another use case of this dataset is evaluating the performance of large language models (LLMs) on this IE task. We evaluated GPT-4 with our dataset to investigate its ability to extract information from legal wills. Our evaluation result demonstrates that the model is capable of handling the task reasonably well. When given instructions and examples as a prompt, GPT-4 shows decent performance for both entity extraction and relation extraction tasks. Nevertheless, the evaluation result also reveals that the model is not perfect. We observed inconsistent outputs (given a prompt) as well as prompt over-generalization.", "keywords": "Information Extraction;Legal Natural Language Processing", "primary_area": "", "supplementary_material": "", "author": "Alice Saebom Kwak;Cheonkam Jeong;Gaetano Vincent Forte;Derek Bambauer;Clayton T Morrison;Mihai Surdeanu", "authorids": "~Alice_Saebom_Kwak1;~Cheonkam_Jeong1;~Gaetano_Vincent_Forte1;~Derek_Bambauer1;~Clayton_T_Morrison1;~Mihai_Surdeanu1", "gender": "F;F;M;M;;M", "homepage": "https://linguistics.arizona.edu/people/alice-kwak;https://cheonkamjeong.blogspot.com/;;https://law.arizona.edu/derek-bambauer;http://surdeanu.info/mihai/;https://ml4ai.github.io/", "dblp": "311/4160;;;;18/3479;", "google_scholar": "https://scholar.google.com/citations?hl=en;BwJokOEAAAAJ;;;https://scholar.google.com/citations?hl=en;", "or_profile": "~Alice_Saebom_Kwak1;~Cheonkam_Jeong1;~Gaetano_Vincent_Forte1;~Derek_Bambauer1;~Mihai_Surdeanu1;~Clayton_Morrison1", "aff": "University of Arizona;University of Arizona;University of Arizona;University of Arizona;University of Arizona;University of Arizona", "aff_domain": "arizona.edu;arizona.edu;arizona.edu;arizona.edu;arizona.edu;arizona.edu", "position": "PhD student;PhD student;PhD student;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nkwak2023information,\ntitle={Information Extraction from Legal Wills: How Well Does {GPT}-4 Do?},\nauthor={Alice Saebom Kwak and Cheonkam Jeong and Gaetano Vincent Forte and Derek Bambauer and Clayton T Morrison and Mihai Surdeanu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=kqm0SOisFq}\n}", "github": "", "project": "", "reviewers": "ZHWR;shKx;Mcw9", "site": "https://openreview.net/forum?id=kqm0SOisFq", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;3;4", "excitement": "3;2;2", "reproducibility": "4;4;5", "correctness": "3;3;2", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 2.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0002-3606-0078", "linkedin": "alice-s-kwak/;cheonkamjeong/;gaetano-forte-96b626148/;;;", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of Arizona", "aff_unique_dep": "", "aff_unique_url": "https://www.arizona.edu", "aff_unique_abbr": "UA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "kspXkK9PtA", "title": "Enhancing Task-oriented Dialogue Systems with Generative Post-processing Networks", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recently, post-processing networks (PPNs), which modify the outputs of arbitrary modules including non-differentiable ones in task-oriented dialogue systems, have been proposed. PPNs have successfully improved the dialogue performance by post-processing natural language understanding (NLU), dialogue state tracking (DST), and dialogue policy (Policy) modules with a classification-based approach. However, they cannot be applied to natural language generation (NLG) modules because the post-processing of the utterance output by the NLG module requires a generative approach. In this study, we propose a new post-processing component for NLG, generative post-processing networks (GenPPNs). For optimizing GenPPNs via reinforcement learning, the reward function incorporates dialogue act contribution, a new measure to evaluate the contribution of GenPPN-generated utterances with regard to task completion in dialogue. Through simulation and human evaluation experiments based on the MultiWOZ dataset, we confirmed that GenPPNs improve the task completion performance of task-oriented dialogue systems.", "keywords": "Task-oriented Dialogue System;Reinforcement Learning;Natural Language Generation", "primary_area": "", "supplementary_material": "", "author": "Atsumoto Ohashi;Ryuichiro Higashinaka", "authorids": "~Atsumoto_Ohashi1;~Ryuichiro_Higashinaka1", "gender": "M;M", "homepage": "https://ohashi56225.github.io/;https://www.ds.is.i.nagoya-u.ac.jp/", "dblp": ";35/4482", "google_scholar": "cl4T6vIAAAAJ;ycBiJn8AAAAJ", "or_profile": "~Atsumoto_Ohashi1;~Ryuichiro_Higashinaka1", "aff": "Nagoya University;NTT", "aff_domain": "nagoya-u.ac.jp;ntt.co.jp", "position": "MS student;Researcher", "bibtex": "@inproceedings{\nohashi2023enhancing,\ntitle={Enhancing Task-oriented Dialogue Systems with Generative Post-processing Networks},\nauthor={Atsumoto Ohashi and Ryuichiro Higashinaka},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=kspXkK9PtA}\n}", "github": "", "project": "", "reviewers": "zF36;SfSx;LAnm;Bdnj", "site": "https://openreview.net/forum?id=kspXkK9PtA", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "5;4;4;3", "excitement": "3;3;3;3", "reproducibility": "4;4;4;4", "correctness": "4;4;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 12, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;1", "aff_unique_norm": "Nagoya University;NTT Corporation", "aff_unique_dep": ";", "aff_unique_url": "https://www.nagoya-u.ac.jp;https://www.ntt.co.jp", "aff_unique_abbr": "Nagoya U;NTT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "id": "ktzudN7JmJ", "title": "ROBBIE: Robust Bias Evaluation of Large Generative Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "As generative large language models (LLMs) grow more performant and prevalent, we must develop comprehensive enough tools to measure and improve their fairness. Different prompt-based datasets can be used to measure social bias across multiple text domains and demographic axes, meaning that testing LLMs on more datasets can potentially help us characterize their biases more fully, and better ensure equal and equitable treatment of marginalized demographic groups. In this work, our focus is two-fold:\n\n(1) Benchmarking: a comparison of 6 different prompt-based bias and toxicity metrics across 12 demographic axes and 5 families of generative LLMs. Out of those 6 metrics, AdvPromptSet and HolisticBiasR are novel datasets proposed in the paper. The comparison of those benchmarks gives us insights about the bias and toxicity of the compared models. Therefore, we explore the frequency of demographic terms in common LLM pre-training corpora and how this may relate to model biases.\n\n(2) Mitigation: we conduct a comprehensive study of how well 3 bias/toxicity mitigation techniques perform across our suite of measurements. ROBBIE aims to provide insights for practitioners while deploying a model, emphasizing the need to not only measure potential harms, but also understand how they arise by characterizing the data, mitigate harms once found, and balance any trade-offs. We open-source our analysis code in hopes of encouraging broader measurements of bias in future LLMs.", "keywords": "bias;fairness;toxicity;natural language generation;large language models;llm;evaluation", "primary_area": "", "supplementary_material": "", "author": "David Esiobu;Xiaoqing Tan;Saghar Hosseini;Megan Ung;Yuchen Zhang;Jude Fernandes;Jane Dwivedi-Yu;Eleonora Presani;Adina Williams;Eric Michael Smith", "authorids": "~David_Esiobu1;~Xiaoqing_Tan1;~Saghar_Hosseini1;~Megan_Ung1;~Yuchen_Zhang7;~Jude_Fernandes1;~Jane_Dwivedi-Yu1;~Eleonora_Presani1;~Adina_Williams1;~Eric_Michael_Smith1", "gender": ";F;F;;F;Non-Binary;F;Non-Binary;F;Non-Binary", "homepage": "https://github.com/davides;http://ellenxtan.github.io/;https://saghar-hosseini.com/;;;;http://www.adinawilliams.com;;https://janedwivedi.github.io/;", "dblp": ";;125/5437;;;;199/2104;;215/3352;", "google_scholar": ";_zvwtKAAAAAJ;XhTT61UAAAAJ;6_OfaGgAAAAJ;;;MUtbKt0AAAAJ;uOK8DfQAAAAJ;ev8Ilx0AAAAJ;PI_dKeAAAAAJ", "or_profile": "~David_Esiobu1;~Xiaoqing_Tan1;~Saghar_Hosseini1;~Megan_Ung1;~Yuchen_Zhang7;~Eleonora_Presani1;~Adina_Williams1;~Eric_Michael_Smith1;~Jane_Yu1;~Jude_Roque_Fernandes1", "aff": "Meta Facebook;Meta AI;Microsoft;Facebook AI Research;Meta Facebook;Meta Facebook;FAIR (Meta Platforms Inc.);Meta AI;Meta AI ;Fundamental AI Research", "aff_domain": "facebook.com;meta.com;microsoft.com;facebook.com;meta.com;meta.com;facebook.com;meta.com;meta.com;meta.com", "position": "Researcher;Researcher;Senior Researcher;Researcher;Researcher;Researcher;Research Scientist;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nesiobu2023robbie,\ntitle={{ROBBIE}: Robust Bias Evaluation of Large Generative Language Models},\nauthor={David Esiobu and Xiaoqing Tan and Saghar Hosseini and Megan Ung and Yuchen Zhang and Jude Fernandes and Jane Dwivedi-Yu and Eleonora Presani and Adina Williams and Eric Michael Smith},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ktzudN7JmJ}\n}", "github": "", "project": "", "reviewers": "4RU4;n5Eu;SybY", "site": "https://openreview.net/forum?id=ktzudN7JmJ", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "4;3;3", "reproducibility": "3;1;3", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 10, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0001-6254-9603;0000-0001-5281-3343;;;", "linkedin": "davidesio/;xiaoqing-tan/;sagharh/;meganung/;;epresani/;;;janeaisleyyu/;jude-f", "aff_unique_index": "0;0;1;0;0;0;0;0;0;2", "aff_unique_norm": "Meta;Microsoft;Fundamental AI Research", "aff_unique_dep": "Meta Platforms, Inc.;Microsoft Corporation;", "aff_unique_url": "https://meta.com;https://www.microsoft.com;", "aff_unique_abbr": "Meta;Microsoft;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States;" }, { "id": "kuYRp78Qnp", "title": "Non-compositional Expression Generation Based on Curriculum Learning and Continual Learning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Non-compositional expressions, by virtue of their non-compositionality, are a classic `pain in the neck' for NLP systems. Different from the general language modeling and generation tasks that are primarily compositional, generating non-compositional expressions is more challenging for current neural models, including large pre-trained language models. The main reasons are 1) their non-compositionality, and 2) the limited data resources. Therefore, to make the best use of available data for modeling non-compositionality, we propose a dynamic curriculum learning framework, which learns training examples from easy ones to harder ones thus optimizing the learning step by step but suffers from the forgetting problem. To alleviate the forgetting problem brought by the arrangement of training examples, we also apply a continual learning method into our curriculum learning framework. Our proposed method combined curriculum and continual learning, to gradually improve the model's performance on the task of non-compositional expression generation. Experiments on idiomatic expression generation and metaphor generation affirm the effectiveness of our proposed curriculum learning framework and the application of continual learning. Our codes are available at https://github.com/zhjjn/CL2Gen.git.", "keywords": "Non-compositional expression;Curriculum learning;continual learning", "primary_area": "", "supplementary_material": "", "author": "Jianing Zhou;Ziheng Zeng;Hongyu Gong;Suma Bhat", "authorids": "~Jianing_Zhou1;~Ziheng_Zeng1;~Hongyu_Gong1;~Suma_Bhat1", "gender": "M;M;F;", "homepage": "https://www.zhjjn.com/;;https://hongyugong.github.io/;", "dblp": "159/6589;;163/7318;66/9013", "google_scholar": "5LrgBS8AAAAJ;;Jam1IpgAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Jianing_Zhou1;~Ziheng_Zeng1;~Hongyu_Gong1;~Suma_Bhat1", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;FAIR at Meta;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;illinois.edu;meta.com;illinois.edu", "position": "PhD student;PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nzhou2023noncompositional,\ntitle={Non-compositional Expression Generation Based on Curriculum Learning and Continual Learning},\nauthor={Jianing Zhou and Ziheng Zeng and Hongyu Gong and Suma Bhat},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=kuYRp78Qnp}\n}", "github": "", "project": "", "reviewers": "ZgeL;vrK3;gkQD", "site": "https://openreview.net/forum?id=kuYRp78Qnp", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;3;4", "reproducibility": "3;2;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;Meta", "aff_unique_dep": ";AI Research", "aff_unique_url": "https://illinois.edu;https://ai.facebook.com", "aff_unique_abbr": "UIUC;FAIR", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "kuwz9k061u", "title": "Measuring the Knowledge Acquisition-Utilization Gap in Pretrained Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "While pre-trained language models (PLMs) have shown evidence of acquiring vast amounts of knowledge, it remains unclear how much of this parametric knowledge is actually usable in performing downstream tasks. We propose a systematic framework to measure parametric knowledge utilization in PLMs. Our framework first extracts knowledge from a PLM's parameters and subsequently constructs a downstream task around this extracted knowledge. Performance on this task thus depends exclusively on utilizing the model's possessed knowledge, avoiding confounding factors like insufficient signal. As an instantiation, we study factual knowledge of PLMs and measure utilization across 125M to 13B parameter PLMs. We observe that: (1) PLMs exhibit two gaps - in acquired vs. utilized knowledge, (2) they show limited robustness in utilizing knowledge under distribution shifts, and (3) larger models close the acquired knowledge gap but the utilized knowledge gap remains. Overall, our study provides insights into PLMs' capabilities beyond their acquired knowledge.", "keywords": "Knowledge Acquisition;Knowledge Utilization;Pretrained Language Models", "primary_area": "", "supplementary_material": "", "author": "Amirhossein Kazemnejad;Mehdi Rezagholizadeh;Prasanna Parthasarathi;Sarath Chandar", "authorids": "~Amirhossein_Kazemnejad1;~Mehdi_Rezagholizadeh1;~Prasanna_Parthasarathi2;~Sarath_Chandar1", "gender": ";M;M;M", "homepage": ";;https://www.cs.mcgill.ca/~pparth2/;http://sarathchandar.in/", "dblp": ";;211/7503;45/8542", "google_scholar": ";MvXlF6kAAAAJ;https://scholar.google.co.in/citations?hl=en;https://scholar.google.co.in/citations?user=yxWtZLAAAAAJ", "or_profile": "~Amirhossein_Kazemnejad1;~Mehdi_Rezagholizadeh1;~Prasanna_Parthasarathi2;~Sarath_Chandar1", "aff": ";Huawei Technologies Ltd.;Huawei Technologies Ltd.;\u00c9cole Polytechnique de Montr\u00e9al", "aff_domain": ";huawei.com;huawei.com;polymtl.ca", "position": ";Principal Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nkazemnejad2023measuring,\ntitle={Measuring the Knowledge Acquisition-Utilization Gap in Pretrained Language Models},\nauthor={Amirhossein Kazemnejad and Mehdi Rezagholizadeh and Prasanna Parthasarathi and Sarath Chandar},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=kuwz9k061u}\n}", "github": "", "project": "", "reviewers": "GyXC;twpn;STEM", "site": "https://openreview.net/forum?id=kuwz9k061u", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "4;3;3", "reproducibility": "4;4;5", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;prasanna-parthasarathi/;", "aff_unique_index": "0;0;1", "aff_unique_norm": "Huawei;\u00c9cole Polytechnique de Montr\u00e9al", "aff_unique_dep": "Huawei Technologies;", "aff_unique_url": "https://www.huawei.com;https://www.polymtl.ca", "aff_unique_abbr": "Huawei;Polytechnique Montr\u00e9al", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montr\u00e9al", "aff_country_unique_index": "0;0;1", "aff_country_unique": "China;Canada" }, { "id": "kyHwalUpPu", "title": "Empowering Psychotherapy with Large Language Models: Cognitive Distortion Detection through Diagnosis of Thought Prompting", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Mental illness remains one of the most critical public health issues of our time, due to the severe scarcity and accessibility limit of professionals. Psychotherapy requires high-level expertise to conduct deep, complex reasoning and analysis on the cognition modeling of the patients. In the era of Large Language Models, we believe it is the right time to develop AI assistance for computational psychotherapy. We study the task of cognitive distortion detection and propose the Diagnosis of Thought (DoT) prompting. DoT performs diagnosis on the patient's speech via three stages: subjectivity assessment to separate the facts and the thoughts; contrastive reasoning to elicit the reasoning processes supporting and contradicting the thoughts; and schema analysis to summarize the cognition schemas. \nThe generated diagnosis rationales through the three stages are essential for assisting the professionals. Experiments demonstrate that DoT obtains significant improvements over ChatGPT for cognitive distortion detection, while generating high-quality rationales approved by human experts.", "keywords": "Psychotherapy;Cognitive distortion;Cognitive behavior therapy;Large language models", "primary_area": "", "supplementary_material": "", "author": "Zhiyu Chen;Yujie Lu;William Yang Wang", "authorids": "~Zhiyu_Chen1;~Yujie_Lu1;~William_Yang_Wang2", "gender": "F;;M", "homepage": "https://czyssrs.github.io/;https://yujielu10.github.io/;https://www.cs.ucsb.edu/~william/", "dblp": "71/1661-2.html;;08/9282", "google_scholar": "Wusd9LgAAAAJ;pcmr6GMAAAAJ;gf8Ms_8AAAAJ", "or_profile": "~Zhiyu_Chen1;~Yujie_Lu1;~William_Wang1", "aff": "Meta Facebook;UC Santa Barbara;UC Santa Barbara", "aff_domain": "facebook.com;ucsb.edu;ucsb.edu", "position": "Researcher;PhD student;Full Professor", "bibtex": "@inproceedings{\nchen2023empowering,\ntitle={Empowering Psychotherapy with Large Language Models: Cognitive Distortion Detection through Diagnosis of Thought Prompting},\nauthor={Zhiyu Chen and Yujie Lu and William Yang Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=kyHwalUpPu}\n}", "github": "", "project": "", "reviewers": "7GRj;sC35;eTZW", "site": "https://openreview.net/forum?id=kyHwalUpPu", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "2;4;2", "reproducibility": "4;2;3", "correctness": "2;3;2", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 2.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "zhiyu-zoey-chen-904805124/;;", "aff_unique_index": "0;1;1", "aff_unique_norm": "Meta;University of California, Santa Barbara", "aff_unique_dep": "Meta Platforms, Inc.;", "aff_unique_url": "https://meta.com;https://www.ucsb.edu", "aff_unique_abbr": "Meta;UCSB", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Santa Barbara", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "l4eviuXtBd", "title": "HadSkip: Homotopic and Adaptive Layer Skipping of Pre-trained Language Models for Efficient Inference", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Pre-trained language models~(LMs) have brought remarkable performance on numerous NLP tasks. However, they require significant resources and entail high computational costs for inference, making them challenging to deploy in real-world and real-time systems. Existing early exiting methods aim to reduce computational complexity by selecting the layer at which to exit, but suffer from the limitation that they have to sequentially traverse through all layers prior to the selected exit layer, which lacks flexibility and degrades their performance. To solve this problem, we propose a \\textbf{h}omotopic and \\textbf{ad}aptive layer \\textbf{skip}ping fine-tuning method named HadSkip. HadSkip adaptively selects the layers to skip based on a predefined budget. Specifically, we introduce a learnable gate before each layer of the LM to determine whether the current layer should be skipped. To tackle various challenges in training such as discrete gates and the budget constraint, we propose a fine-grained initialization strategy and homotopic optimization strategy. We conduct extensive experiments on the GLUE benchmark, and experimental results demonstrate the proposed HadSkip outperforms all state-of-the-art baselines significantly.", "keywords": "language model inference;efficiency", "primary_area": "", "supplementary_material": "", "author": "Haoyu Wang;Yaqing Wang;Tianci Liu;Tuo Zhao;Jing Gao", "authorids": "~Haoyu_Wang6;~Yaqing_Wang1;~Tianci_Liu1;~Tuo_Zhao2;~Jing_Gao2", "gender": "M;M;M;F;M", "homepage": "https://sites.google.com/view/haoyuwang/home;https://yaqingwang.github.io/;https://lliutianc.github.io;https://engineering.purdue.edu/~jinggao/;http://www2.isye.gatech.edu/~tzhao80", "dblp": "50/8499-4;147/1393;148/1911-3;67/4834-4;", "google_scholar": "https://scholar.google.com.hk/citations?user=5Lw9_jcAAAAJ;_Rfg2CAAAAAJ;;Ftj1h4cAAAAJ;EJXN6tYAAAAJ", "or_profile": "~Haoyu_Wang6;~Yaqing_Wang1;~Tianci_Liu1;~Jing_Gao2;~Tuo_Zhao1", "aff": "Purdue University;Research, Google;Purdue University;Purdue University;Georgia Institute of Technology", "aff_domain": "purdue.edu;research.google.com;purdue.edu;purdue.edu;gatech.edu", "position": "PhD student;Research Scientist;PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2023hadskip,\ntitle={HadSkip: Homotopic and Adaptive Layer Skipping of Pre-trained Language Models for Efficient Inference},\nauthor={Haoyu Wang and Yaqing Wang and Tianci Liu and Tuo Zhao and Jing Gao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=l4eviuXtBd}\n}", "github": "", "project": "", "reviewers": "6pFJ;ah3X;a8ng", "site": "https://openreview.net/forum?id=l4eviuXtBd", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;4", "excitement": "4;4;2", "reproducibility": "4;3;2", "correctness": "4;4;2", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-7485-6213;;;;", "linkedin": ";;;;", "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Purdue University;Google;Georgia Institute of Technology", "aff_unique_dep": ";Google Research;", "aff_unique_url": "https://www.purdue.edu;https://research.google;https://www.gatech.edu", "aff_unique_abbr": "Purdue;Google;Georgia Tech", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "lBAc5JgyMI", "title": "Narrative Style and the Spread of Health Misinformation on Twitter", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Using a narrative style is an effective way to communicate health information both on and off social media. Given the amount of misinformation being spread online and its potential negative effects, it is crucial to investigate the interplay between narrative communication style and misinformative health content on user engagement on social media platforms. To explore this in the context of Twitter, we start with previously annotated health misinformation tweets (n \u224815,000) and annotate a subset of the data (n=3,000) for the presence of narrative style. We then use these manually assigned labels to train text classifiers, experimenting with supervised fine-tuning and in-context learning for automatic narrative detection. We use our best model to label remaining portion of the dataset, then statistically analyze the relationship between narrative style, misinformation, and user-level features on engagement, finding that narrative use is connected to increased tweet engagement and can, in some cases, lead to increased engagement with misinformation. Finally, we analyze the general categories of language used in narratives and health misinformation in our dataset.", "keywords": "narrative communication;misinformation;computational social sciences;natural language processing;linguistic analysis;classification", "primary_area": "", "supplementary_material": "", "author": "Achyutarama R Ganti;Eslam Ali Hassan Hussein;Steven R. Wilson;Zexin Ma;Xinyan Zhao", "authorids": "~Achyutarama_R_Ganti1;~Eslam_Ali_Hassan_Hussein1;~Steven_R._Wilson2;~Zexin_Ma1;~Xinyan_Zhao3", "gender": "M;;F;F;", "homepage": "https://achyutganti.com;https://eslam-hussein.me/;;https://evazhaoxy.wixsite.com/zhao;https://steverw.com", "dblp": ";199/6341.html;;;163/6279.html", "google_scholar": "xrxlVNAAAAAJ;https://scholar.google.com/citations?hl=en;MWio1qEAAAAJ;;xO0OWLEAAAAJ", "or_profile": "~Achyutarama_R_Ganti1;~Eslam_Ali_Hassan_Hussein1;~Zexin_Ma1;~Xinyan_Zhao3;~Steven_R_Wilson1", "aff": "Oakland University;Virginia Polytechnic Institute and State University;Oakland University;University of North Carolina at Chapel Hill;Oakland University (Michigan)", "aff_domain": "oakland.edu;vt.edu;oakland.edu;unc.edu;oakland.edu", "position": "PhD student;PhD student;Assistant Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nganti2023narrative,\ntitle={Narrative Style and the Spread of Health Misinformation on Twitter},\nauthor={Achyutarama R Ganti and Eslam Ali Hassan Hussein and Steven R. Wilson and Zexin Ma and Xinyan Zhao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=lBAc5JgyMI}\n}", "github": "", "project": "", "reviewers": "93eC;uo6d;eVvg", "site": "https://openreview.net/forum?id=lBAc5JgyMI", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;3;4", "reproducibility": "4;2;3", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0004-7076-0459;0000-0003-1669-4839;0000-0002-6203-2740;;0000-0002-2458-0439", "linkedin": "achyut-ganti-5292bb113/;;;;steven-wilson-29a623b5/", "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Oakland University;Virginia Tech;University of North Carolina", "aff_unique_dep": ";;", "aff_unique_url": "https://www.oakland.edu;https://www.vt.edu;https://www.unc.edu", "aff_unique_abbr": "OU;VT;UNC", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Chapel Hill;Michigan", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "lC4vFCM2VA", "title": "Towards Zero-shot Relation Extraction in Web Mining: A Multimodal Approach with Relative XML Path", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The rapid growth of web pages and the increasing complexity of their structure poses a challenge for web mining models. Web mining models are required to understand semi-structured web pages, particularly when little is known about the subject or template of a new page. Current methods migrate language models to web mining by embedding the XML source code into the transformer or encoding the rendered layout with graph neural networks. However, these approaches do not take into account the relationships between text nodes within and across pages. In this paper, we propose a new approach, ReXMiner, for zero-shot relation extraction in web mining. ReXMiner encodes the shortest relative paths in the Document Object Model (DOM) tree of the web page which is a more accurate and efficient signal for key-value pair extraction within a web page. It also incorporates the popularity of each text node by counting the occurrence of the same text node across different web pages. We use contrastive learning to address the issue of sparsity in relation extraction. Extensive experiments on public benchmarks show that our method, ReXMiner, outperforms the state-of-the-art baselines in the task of zero-shot relation extraction in web mining.", "keywords": "web mining;xml path;document ai;zero shot;transfer learning", "primary_area": "", "supplementary_material": "", "author": "Zilong Wang;Jingbo Shang", "authorids": "~Zilong_Wang1;~Jingbo_Shang2", "gender": "M;M", "homepage": "https://zilongwang.me;https://shangjingbo1226.github.io/", "dblp": "42/898-2;151/3145.html", "google_scholar": "S_wQccsAAAAJ;0SkFI4MAAAAJ", "or_profile": "~Zilong_Wang1;~Jingbo_Shang2", "aff": "University of California, San Diego;University of California, San Diego", "aff_domain": "ucsd.edu;ucsd.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nwang2023towards,\ntitle={Towards Zero-shot Relation Extraction in Web Mining: A Multimodal Approach with Relative {XML} Path},\nauthor={Zilong Wang and Jingbo Shang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=lC4vFCM2VA}\n}", "github": "", "project": "", "reviewers": "tb1s;3bEb;KQqa;uVRd", "site": "https://openreview.net/forum?id=lC4vFCM2VA", "pdf_size": 0, "rating": "2;2;2;2", "confidence": "4;3;4;4", "excitement": "3;4;3;3", "reproducibility": "4;4;3;2", "correctness": "3;3;4;4", "rating_avg": 2.0, "confidence_avg": 3.75, "excitement_avg": 3.25, "reproducibility_avg": 3.25, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-1614-0943;", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "lCy3RwscMn", "title": "Deep Natural Language Feature Learning for Interpretable Prediction", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We propose a general method to break down a main complex task into a set of intermediary easier sub-tasks, which are formulated in natural language as binary questions related to the final target task. Our method allows for representing each example by a vector consisting of the answers to these questions. We call this representation Natural Language Learned Features (NLLF). \nNLLF is generated by a small transformer language model (e.g., BERT) that has been trained in a Natural Language Inference (NLI) fashion, using weak labels automatically obtained from a Large Language Model (LLM). We show that the LLM normally struggles for the main task using in-context learning, but can handle these easiest subtasks and produce useful weak labels to train a BERT. \nThe NLI-like training of the BERT allows for tackling zero-shot inference with any binary question, and not necessarily the ones seen during the training.\nWe show that this NLLF vector not only helps to reach better performances by enhancing any classifier, but that it can be used as input of an easy-to-interpret machine learning model like a decision tree. This decision tree is interpretable but also reaches high performances, surpassing those of a pre-trained transformer in some cases.\nWe have successfully applied this method to two completely different tasks: detecting incoherence in students' answers to open-ended mathematics exam questions, and screening abstracts for a systematic literature review of scientific papers on climate change and agroecology.", "keywords": "Large Language Model;Reasoning;Explanability;BERT", "primary_area": "", "supplementary_material": "", "author": "Felipe Urrutia;Cristian Buc Calderon;Valentin Barriere", "authorids": "~Felipe_Urrutia2;~Cristian_Buc_Calderon1;~Valentin_Barriere1", "gender": "M;M;", "homepage": "http://www.dim.uchile.cl/~furrutia/;;", "dblp": ";;169/0432", "google_scholar": "Go9aRCIAAAAJ;https://scholar.google.be/citations?hl=en;https://scholar.google.fr/citations?user=5HX-EfcAAAAJ", "or_profile": "~Felipe_Urrutia2;~Cristian_Buc_Calderon1;~Valentin_Barriere1", "aff": "Universidad de Chile;Centro Nacional de Inteligencia Artificial;CENIA", "aff_domain": "uchile.cl;cenia.cl;cenia.cl", "position": "Undergrad student;Researcher;Researcher", "bibtex": "@inproceedings{\nurrutia2023deep,\ntitle={Deep Natural Language Feature Learning for Interpretable Prediction},\nauthor={Felipe Urrutia and Cristian Buc Calderon and Valentin Barriere},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=lCy3RwscMn}\n}", "github": "", "project": "", "reviewers": "kRx7;wign;rrHR", "site": "https://openreview.net/forum?id=lCy3RwscMn", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "4;4;3", "reproducibility": "3;3;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-0809-5334;;", "linkedin": ";;", "aff_unique_index": "0;1;2", "aff_unique_norm": "Universidad de Chile;Centro Nacional de Inteligencia Artificial;CENIA", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uchile.cl;;https://www.cenia.cz", "aff_unique_abbr": "UCH;;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Chile;Spain;Czech Republic" }, { "id": "lKPReKSJio", "title": "FREDSum: A Dialogue Summarization Corpus for French Political Debates", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recent advances in deep learning, and especially the invention of encoder-decoder architectures, have significantly improved the performance of abstractive summarization systems. While the majority of research has focused on written documents, we have observed an increasing interest in the summarization of dialogues and multi-party conversations over the past few years. In this paper, we present a dataset of French political debates for the purpose of enhancing resources for multi-lingual dialogue summarization. Our dataset consists of manually transcribed and annotated political debates, covering a range of topics and perspectives. We highlight the importance of high-quality transcription and annotations for training accurate and effective dialogue summarization models, and emphasize the need for multilingual resources to support dialogue summarization in non-English languages. We also provide baseline experiments using state-of-the-art methods, and encourage further research in this area to advance the field of dialogue summarization. Our dataset will be made publicly available for use by the research community, enabling further advances in multilingual dialogue summarization.", "keywords": "Dialogue;Summarization;Corpus;Dataset;Political Debate", "primary_area": "", "supplementary_material": "", "author": "Virgile Rennard;Guokan Shang;Damien Grari;Julie Hunter;Michalis Vazirgiannis", "authorids": "~Virgile_Rennard1;~Guokan_Shang1;~Damien_Grari1;~Julie_Hunter1;~Michalis_Vazirgiannis1", "gender": "M;M;M;F;M", "homepage": ";;;https://www.juliejhunter.com/;", "dblp": ";220/3989;;;v/MVazirgiannis", "google_scholar": "https://scholar.google.com/citations?hl=en;EcBibPkAAAAJ;;https://scholar.google.fr/citations?user=A0t7pxQAAAAJ;https://scholar.google.gr/citations?user=aWGJYcMAAAAJ", "or_profile": "~Virgile_Rennard1;~Guokan_Shang1;~Damien_Grari1;~Julie_Hunter1;~Michalis_Vazirgiannis1", "aff": "\u00c9cole Polytechnique;LINAGORA;;LINAGORA;Ecole Polytechnique, France", "aff_domain": "polytechnique.fr;linagora.com;;linagora.com;polytechnique.fr", "position": "PhD student;Researcher;;Researcher;Full Professor", "bibtex": "@inproceedings{\nrennard2023fredsum,\ntitle={{FREDS}um: A Dialogue Summarization Corpus for French Political Debates},\nauthor={Virgile Rennard and Guokan Shang and Damien Grari and Julie Hunter and Michalis Vazirgiannis},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=lKPReKSJio}\n}", "github": "", "project": "", "reviewers": "jbix;Xx4E;P9qY", "site": "https://openreview.net/forum?id=lKPReKSJio", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;3;4", "reproducibility": "3;3;1", "correctness": "2;3;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-6874-0003;", "linkedin": ";guokan-shang;damien-grari-939277106?originalSubdomain=fr;;", "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Ecole Polytechnique;LINAGORA", "aff_unique_dep": ";", "aff_unique_url": "https://www.polytechnique.edu;https://www.linagora.com", "aff_unique_abbr": "X;LINAGORA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "id": "lKi1myznJe", "title": "ReasoningLM: Enabling Structural Subgraph Reasoning in Pre-trained Language Models for Question Answering over Knowledge Graph", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Question Answering over Knowledge Graph (KGQA) aims to seek answer entities for the natural language question from a large-scale Knowledge Graph (KG).\nTo better perform reasoning on KG, recent work typically adopts a pre-trained language model (PLM) to model the question, and a graph neural network (GNN) based module to perform multi-hop reasoning on the KG.\nDespite the effectiveness, due to the divergence in model architecture, the PLM and GNN are not closely integrated, limiting the knowledge sharing and fine-grained feature interactions.\nTo solve it, we aim to simplify the above two-module approach, and develop a more capable PLM that can directly support subgraph reasoning for KGQA, namely ReasoningLM.\nIn our approach, we propose a subgraph-aware self-attention mechanism to imitate the GNN for performing structured reasoning, and also adopt an adaptation tuning strategy to adapt the model parameters with 20,000 subgraphs with synthesized questions.\nAfter adaptation, the PLM can be parameter-efficient fine-tuned on downstream tasks.\nExperiments show that ReasoningLM surpasses state-of-the-art models by a large margin, even with fewer updated parameters and less training data.\nOur codes and data are publicly available at https://github.com/RUCAIBox/ReasoningLM.", "keywords": "Pre-trained Language Model;Knowledge Graph Question Answering", "primary_area": "", "supplementary_material": "", "author": "Jinhao Jiang;Kun Zhou;Xin Zhao;Yaliang Li;Ji-Rong Wen", "authorids": "~Jinhao_Jiang1;~Kun_Zhou2;~Xin_Zhao10;~Yaliang_Li1;~Ji-Rong_Wen1", "gender": ";M;M;M;M", "homepage": ";https://lancelot39.github.io/;https://gsai.ruc.edu.cn/addons/teacher/index/info.html?user_id=5&ruccode=20140041&ln=cn;https://sites.google.com/site/yaliangli/;https://gsai.ruc.edu.cn/english/jrwen", "dblp": "261/6942;48/3927-2.html;https://dblp.uni-trier.de/pid/52/8700.html;https://dblp.org/pers/hd/l/Li:Yaliang;w/JRWen", "google_scholar": ";bmRJVjwAAAAJ;JNhNacoAAAAJ;CCPBcdYAAAAJ;tbxCHJgAAAAJ", "or_profile": "~Jinhao_Jiang1;~Kun_Zhou2;~Xin_Zhao10;~Yaliang_Li1;~Ji-Rong_Wen1", "aff": "Renmin University of China;Renmin University of China;Renmin University of China;Alibaba Group;Renmin University of China", "aff_domain": "ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;alibaba-inc.com;ruc.edu.cn", "position": "PhD student;PhD student;Full Professor;Staff Engineer;Full Professor", "bibtex": "@inproceedings{\njiang2023reasoninglm,\ntitle={Reasoning{LM}: Enabling Structural Subgraph Reasoning in Pre-trained Language Models for Question Answering over Knowledge Graph},\nauthor={Jinhao Jiang and Kun Zhou and Xin Zhao and Yaliang Li and Ji-Rong Wen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=lKi1myznJe}\n}", "github": "", "project": "", "reviewers": "3YnV;UNw1;hcTZ", "site": "https://openreview.net/forum?id=lKi1myznJe", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "4;4;3", "reproducibility": "4;3;3", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-8333-6196;0000-0002-4204-6096;0000-0002-9777-9676", "linkedin": ";;;;", "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Renmin University of China;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "http://www.ruc.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "RUC;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "lOPMuJSVz8", "title": "Women Wearing Lipstick: Measuring the Bias Between an Object and Its Related Gender", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "In this paper, we investigate the impact of objects on gender bias in image captioning systems. Our results show that only gender-specific objects have a strong gender bias (e.g., women-lipstick). In addition, we propose a visual semantic-based gender score that measures the degree of bias and can be used as a plug-in for any image captioning system. Our experiments demonstrate the utility of the gender score, since we observe that our score can measure the bias relation between a caption and its related gender; therefore, our score can be used as an additional metric to the existing Object Gender Co-Occ approach.", "keywords": "image captioning;visual grounding;gender bias", "primary_area": "", "supplementary_material": "", "author": "Ahmed Sabir;Llu\u00eds Padr\u00f3", "authorids": "~Ahmed_Sabir1;~Llu\u00eds_Padr\u00f31", "gender": ";M", "homepage": "https://kodu.ut.ee/~ahmedabdulmajeed/;http://www.cs.upc.edu/~padro", "dblp": "227/6577;56/5747", "google_scholar": "https://scholar.google.es/citations?user=JJxcxCsAAAAJ;3o_UWq8AAAAJ", "or_profile": "~Ahmed_Sabir1;~Llu\u00eds_Padr\u00f31", "aff": "Universidad Polit\u00e9cnica de Cataluna;Universidad Polit\u00e9cnica de Cataluna", "aff_domain": "upc.edu;upc.edu", "position": "Postdoc;Associate Professor", "bibtex": "@inproceedings{\nsabir2023women,\ntitle={Women Wearing Lipstick: Measuring the Bias Between an Object and Its Related Gender},\nauthor={Ahmed Sabir and Llu{\\'\\i}s Padr{\\'o}},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=lOPMuJSVz8}\n}", "github": "", "project": "", "reviewers": "tUEX;EYYR;vgnb", "site": "https://openreview.net/forum?id=lOPMuJSVz8", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;3", "excitement": "3;4;2", "reproducibility": "4;4;3", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-4738-5019", "linkedin": "ahmed-sabir-00a612132/?originalSubdomain=es;", "aff_unique_index": "0;0", "aff_unique_norm": "Universitat Polit\u00e8cnica de Catalunya", "aff_unique_dep": "", "aff_unique_url": "https://www.upc.edu", "aff_unique_abbr": "UPC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Spain" }, { "id": "lReh4LaP8f", "title": "Structural Priming Demonstrates Abstract Grammatical Representations in Multilingual Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Abstract grammatical knowledge\u2014of parts of speech and grammatical patterns\u2014is key to the capacity for linguistic generalization in humans. But how abstract is grammatical knowledge in large language models? In the human literature, compelling evidence for grammatical abstraction comes from structural priming. A sentence that shares the same grammatical structure as a preceding sentence is processed and produced more readily. Because confounds exist when using stimuli in a single language, evidence of abstraction is even more compelling from crosslingual structural priming, where use of a syntactic structure in one language primes an analogous structure in another language. We measure crosslingual structural priming in large language models, comparing model behavior to human experimental results from eight crosslingual experiments covering six languages, and four monolingual structural priming experiments in three non-English languages. We find evidence for abstract monolingual and crosslingual grammatical representations in the models that function similarly to those found in humans. These results demonstrate that grammatical representations in multilingual language models are not only similar across languages, but they can causally influence text produced in different languages.", "keywords": "abstraction;representation;multilingual language models;psychlinguistics;linguistic structure", "primary_area": "", "supplementary_material": "", "author": "James Michaelov;Catherine Arnett;Tyler A. Chang;Ben Bergen", "authorids": "~James_Michaelov1;~Catherine_Arnett1;~Tyler_A._Chang1;~Ben_Bergen1", "gender": "M;F;M;M", "homepage": "https://jmichaelov.com/;https://catherinearnett.github.io/;https://cogsci.ucsd.edu/~bkbergen/;https://tylerachang.github.io/", "dblp": "276/5493;358/8873;12/3783-1.html;265/6105", "google_scholar": "https://scholar.google.co.uk/citations?user=_Urm8X4AAAAJ;gIDJdFAAAAAJ;pJ8u7AQAAAAJ;zkDuqfwAAAAJ", "or_profile": "~James_Michaelov1;~Catherine_Arnett1;~Benjamin_Bergen1;~Tyler_A_Chang1", "aff": "University of California, San Diego;University of California, San Diego;University of California, San Diego;University of California, San Diego", "aff_domain": "ucsd.edu;ucsd.edu;ucsd.edu;ucsd.edu", "position": "PhD student;PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nmichaelov2023structural,\ntitle={Structural Priming Demonstrates Abstract Grammatical Representations in Multilingual Language Models},\nauthor={James Michaelov and Catherine Arnett and Tyler A. Chang and Ben Bergen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=lReh4LaP8f}\n}", "github": "", "project": "", "reviewers": "ACY8;1oDq;HSdV", "site": "https://openreview.net/forum?id=lReh4LaP8f", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;3;4", "excitement": "4;3;3", "reproducibility": "5;5;5", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 5.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-2913-1103;0000-0003-0448-5415;0000-0002-9395-9151;", "linkedin": ";catherine-arnett96/;;tylerachang", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "lVat423gKI", "title": "Language Representation Projection: Can We Transfer Factual Knowledge across Languages in Multilingual Language Models?", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Multilingual pretrained language models serve as repositories of multilingual factual knowledge. Nevertheless, a substantial performance gap of factual knowledge probing exists between high-resource languages and low-resource languages, suggesting limited implicit factual knowledge transfer across languages in multilingual pretrained language models. This paper investigates the feasibility of explicitly transferring relatively rich factual knowledge from English to non-English languages. To accomplish this, we propose two parameter-free $\\textbf{L}$anguage $\\textbf{R}$epresentation $\\textbf{P}$rojection modules (LRP2). The first module converts non-English representations into English-like equivalents, while the second module reverts English-like representations back into representations of the corresponding non-English language. Experimental results on the mLAMA dataset demonstrate that LRP2 significantly improves factual knowledge retrieval accuracy and facilitates knowledge transferability across diverse non-English languages. We further investigate the working mechanism of LRP2 from the perspectives of representation space and cross-lingual knowledge neuron.", "keywords": "multilingual language models;factual knowledge;cross-lingual knowledge transfer", "primary_area": "", "supplementary_material": "", "author": "Shaoyang Xu;Junzhuo Li;Deyi Xiong", "authorids": "~Shaoyang_Xu1;~Junzhuo_Li1;~Deyi_Xiong2", "gender": "M;M;M", "homepage": "https://shaoyangxu.github.io/;https://junzhuoli.github.io/;https://dyxiong.github.io", "dblp": "360/4779;297/9738;55/6548", "google_scholar": "9BomjGkAAAAJ;;QPLO3myO5PkC", "or_profile": "~Shaoyang_Xu1;~Junzhuo_Li1;~Deyi_Xiong2", "aff": "Tianjin University;Tianjin University;Tianjin University", "aff_domain": "tju.edu.cn;tju.edu.cn;tju.edu.cn", "position": "MS student;MS student;Full Professor", "bibtex": "@inproceedings{\nxu2023language,\ntitle={Language Representation Projection: Can We Transfer Factual Knowledge across Languages in Multilingual Language Models?},\nauthor={Shaoyang Xu and Junzhuo Li and Deyi Xiong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=lVat423gKI}\n}", "github": "", "project": "", "reviewers": "LiBy;fiJ8;iytb", "site": "https://openreview.net/forum?id=lVat423gKI", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;2", "excitement": "3;3;3", "reproducibility": "3;3;3", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-2353-5038", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Tianjin University", "aff_unique_dep": "", "aff_unique_url": "http://www.tju.edu.cn", "aff_unique_abbr": "TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "lWlBAJTFOm", "title": "Small Language Models Fine-tuned to Coordinate Larger Language Models improve Complex Reasoning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large Language Models (LLMs) prompted to generate chain-of-thought (CoT) exhibit impressive reasoning capabilities. Recent attempts at prompt decomposition toward solving complex, multi-step reasoning problems depend on the ability of the LLM to simultaneously decompose and solve the problem. A significant disadvantage is that foundational LLMs are typically not available for fine-tuning, making adaptation computationally prohibitive. We believe (and demonstrate) that problem decomposition and solution generation are distinct capabilites, better addressed in separate modules, than by one monolithic LLM. We introduce DaSLaM, which uses a decomposition generator to decompose complex problems into subproblems that require fewer reasoning steps. These subproblems are answered by a solver. \nWe use a relatively small (13B parameters) LM as the decomposition generator, which we train using policy gradient optimization to interact with a solver LM (regarded as black-box) and guide it through subproblems, thereby rendering our method solver-agnostic. Evaluation on multiple different reasoning datasets reveal that with our method, a 175 billion parameter LM (text-davinci-003) can produce competitive or even better performance, compared to its orders-of-magnitude larger successor, GPT-4. Additionally, we show that DaSLaM is not limited by the solver's capabilities as a function of scale; e.g., solver LMs with diverse sizes give significant performance improvement with our solver-agnostic decomposition technique. Exhaustive ablation studies evince the superiority of our modular finetuning technique over exorbitantly large decomposer LLMs, based on prompting alone.", "keywords": "Language Model Reasoning;Problem decomposition;Multistep reasoning", "primary_area": "", "supplementary_material": "", "author": "Gurusha Juneja;Subhabrata Dutta;Soumen Chakrabarti;Sunny Manchanda;Tanmoy Chakraborty", "authorids": "~Gurusha_Juneja2;~Subhabrata_Dutta1;~Soumen_Chakrabarti1;~Sunny_Manchanda1;~Tanmoy_Chakraborty2", "gender": "M;Not Specified;M;M;F", "homepage": ";https://www.cse.iitb.ac.in/~soumen/;https://sites.google.com/view/sunnymanchanda/;http://tanmoychak.com;https://gurusha01.github.io/", "dblp": "204/6929.html;c/SChakrabarti;308/0870;65/2136-2.html;", "google_scholar": "aoaCs08AAAAJ;https://scholar.google.com.tw/citations?user=LfF2zfQAAAAJ;lAS7WHUAAAAJ;https://scholar.google.co.in/citations?user=C5S9JnIAAAAJ;https://scholar.google.com/citations?view_op=list_works", "or_profile": "~Subhabrata_Dutta1;~Soumen_Chakrabarti1;~Sunny_Manchanda1;~Tanmoy_Chakraborty2;~GURUSHA_JUNEJA1", "aff": "Jadavpur University;Indian Institute of Technology Bombay;DYSL-AI;Indian Institute of Technology, Delhi;", "aff_domain": "jdvu.ac.in;iitb.ac.in;drdo.gov.in;iitd.ac.in;", "position": "PhD student;Professor;Researcher;Associate Professor;", "bibtex": "@inproceedings{\njuneja2023small,\ntitle={Small Language Models Fine-tuned to Coordinate Larger Language Models improve Complex Reasoning},\nauthor={Gurusha Juneja and Subhabrata Dutta and Soumen Chakrabarti and Sunny Manchanda and Tanmoy Chakraborty},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=lWlBAJTFOm}\n}", "github": "", "project": "", "reviewers": "xvjb;fnE5;KUis", "site": "https://openreview.net/forum?id=lWlBAJTFOm", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;5;3", "excitement": "3;4;3", "reproducibility": "3;4;3", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0009-0009-6605-8065;0000-0002-0210-0369;", "linkedin": ";;manchandasunny/;tanmoy-chakraborty-89553324/;gurusha-juneja-7464371a0/", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Jadavpur University;Indian Institute of Technology Bombay;DYSL-AI;Indian Institute of Technology Delhi", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.jaduniv.edu.in;https://www.iitb.ac.in;;https://www.iitdelhi.ac.in", "aff_unique_abbr": "JU;IIT Bombay;;IIT Delhi", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Bombay;Delhi", "aff_country_unique_index": "0;0;0", "aff_country_unique": "India;" }, { "id": "lbtVebcVny", "title": "Revisiting De-Identification of Electronic Medical Records: Evaluation of Within- and Cross-Hospital Generalization", "track": "main", "status": "Short Main", "tldr": "", "abstract": "The de-identification task aims to detect and remove the protected health information from electronic medical records (EMRs). Previous studies generally focus on the within-hospital setting and achieve great successes, while the cross-hospital setting has been overlooked. This study introduces a new de-identification dataset comprising EMRs from three hospitals in China, creating a benchmark for evaluating both within- and cross-hospital generalization. We find significant domain discrepancy between hospitals. A model with almost perfect within-hospital performance struggles when transferred across hospitals. Further experiments show that pretrained language models and some domain generalization methods can alleviate this problem. We believe that our data and findings will encourage investigations on the generalization of medical NLP models.", "keywords": "De-Identification;Electronic Medical Records;Domain Generalization", "primary_area": "", "supplementary_material": "", "author": "Yiyang Liu;Jinpeng Li;Enwei Zhu", "authorids": "~Yiyang_Liu2;~Jinpeng_Li3;~Enwei_Zhu1", "gender": "M;M;M", "homepage": "https://github.com/lanyangyang93;;", "dblp": ";;227/3019", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;", "or_profile": "~Yiyang_Liu2;~Jinpeng_Li3;~Enwei_Zhu1", "aff": "University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;University of Chinese Academy of Sciences", "aff_domain": "ucas.ac.cn;ucas.ac.cn;ucas.ac.cn", "position": "Researcher;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nliu2023revisiting,\ntitle={Revisiting De-Identification of Electronic Medical Records: Evaluation of Within- and Cross-Hospital Generalization},\nauthor={Yiyang Liu and Jinpeng Li and Enwei Zhu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=lbtVebcVny}\n}", "github": "", "project": "", "reviewers": "iRFx;HyJ9;FbUh", "site": "https://openreview.net/forum?id=lbtVebcVny", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "3;4;3", "reproducibility": "3;3;2", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-6391-8914", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Chinese Academy of Sciences", "aff_unique_dep": "", "aff_unique_url": "http://www.ucas.ac.cn", "aff_unique_abbr": "UCAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "ldbYAF0ad0", "title": "Large Language Models are Not Yet Human-Level Evaluators for Abstractive Summarization", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "With the recent undeniable advancement in reasoning abilities in large language models (LLMs) like ChatGPT and GPT-4, there is a growing trend for using LLMs on various tasks. One area where LLMs can be employed is as an alternative evaluation metric for complex generative tasks, which generally demands expensive human judges to complement the traditional automatic metrics for various evaluation dimensions such as fluency and consistency. In this work, we conduct extensive analysis to investigate the stability and reliability of LLMs as automatic evaluators for abstractive summarization. We found that while ChatGPT and GPT-4 outperform the commonly used automatic metrics, they are not ready as human replacements due to significant limitations. That is, LLM evaluators rate each candidate system inconsistently and are dimension-dependent. They also struggle to compare candidates with close performance and become more unreliable with higher-quality summaries by obtaining a lower correlation with humans. In other words, with better abstractive summarization systems being introduced at a fast pace, LLMs may result in misleading and unreliable evaluations.", "keywords": "LLM evaluation;ChatGPT;abstractive summarization evaluation;GPT-4", "primary_area": "", "supplementary_material": "", "author": "Chenhui Shen;Liying Cheng;Xuan-Phi Nguyen;Yang You;Lidong Bing", "authorids": "~Chenhui_Shen2;~Liying_Cheng1;~Xuan-Phi_Nguyen1;~Yang_You1;~Lidong_Bing2", "gender": ";F;;M;", "homepage": ";https://liyingcheng95.github.io/;;https://www.comp.nus.edu.sg/~youy/;", "dblp": ";221/0115;;33/8167-1.html;", "google_scholar": ";https://scholar.google.com.sg/citations?user=xkZCRy0kBHEC;;jF4dPZwAAAAJ;", "or_profile": "~Chenhui_Shen2;~Liying_Cheng1;~Xuan-Phi_Nguyen1;~Yang_You1;~Lidong_Bing2", "aff": ";Alibaba Group;;National University of Singapore;", "aff_domain": ";alibaba-inc.com;;nus.edu.sg;", "position": ";Researcher;;Professor;", "bibtex": "@inproceedings{\nshen2023large,\ntitle={Large Language Models are Not Yet Human-Level Evaluators for Abstractive Summarization},\nauthor={Chenhui Shen and Liying Cheng and Xuan-Phi Nguyen and Yang You and Lidong Bing},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ldbYAF0ad0}\n}", "github": "", "project": "", "reviewers": "i7an;iAX6;LdaQ", "site": "https://openreview.net/forum?id=ldbYAF0ad0", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;4;4", "reproducibility": "4;4;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;yang-you-0b92914b/;", "aff_unique_index": "0;1", "aff_unique_norm": "Alibaba Group;National University of Singapore", "aff_unique_dep": ";", "aff_unique_url": "https://www.alibaba.com;https://www.nus.edu.sg", "aff_unique_abbr": "Alibaba;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "China;Singapore" }, { "id": "ldtjC7TSJ5", "title": "Non-Autoregressive Sentence Ordering", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Existing sentence ordering approaches generally employ encoder-decoder frameworks with the pointer net to recover the coherence by recurrently predicting each sentence step-by-step. Such an autoregressive manner only leverages unilateral dependencies during decoding and cannot fully explore the semantic dependency between sentences for ordering. To overcome these limitations, in this paper, we propose a novel Non-Autoregressive Ordering Network, dubbed \\textit{NAON}, which explores bilateral dependencies between sentences and predicts the sentence for each position in parallel. We claim that the non-autoregressive manner is not just applicable but also particularly suitable to the sentence ordering task because of two peculiar characteristics of the task: 1) each generation target is in deterministic length, and 2) the sentences and positions should match exclusively. Furthermore, to address the repetition issue of the naive non-autoregressive Transformer, we introduce an exclusive loss to constrain the exclusiveness between positions and sentences. To verify the effectiveness of the proposed model, we conduct extensive experiments on several common-used datasets and the experimental results show that our method outperforms all the autoregressive approaches and yields competitive performance compared with the state-of-the-arts. The codes are available at: \\url{https://github.com/steven640pixel/nonautoregressive-sentence-ordering}.", "keywords": "sentence ordering;non-autoregressive transformer", "primary_area": "", "supplementary_material": "", "author": "Yi Bin;WENHAO SHI;Bin Ji;Jipeng Zhang;Yujuan Ding;Yang Yang", "authorids": "~Yi_Bin1;~WENHAO_SHI1;~Bin_Ji3;~Jipeng_Zhang1;~Yujuan_Ding1;~Yang_Yang37", "gender": ";M;M;M;F;M", "homepage": ";https://github.com/steven640pixel;https://jibin5167.github.io/;https://2003pro.github.io/;;http://cfm.uestc.edu.cn/~yangyang/", "dblp": "172/9392;;119/1943-2.html;;;", "google_scholar": "KDdkZKQAAAAJ;;31ZXPVQAAAAJ;q0De288AAAAJ;7cLi1BoAAAAJ;", "or_profile": "~Yi_Bin1;~WENHAO_SHI1;~Bin_Ji3;~Jipeng_Zhang1;~Yujuan_Ding1;~Yang_Yang37", "aff": "National University of Singapore;University of Electronic Science and Technology of China;National University of Singapore;Department of Computer Science and Engineering, The Hong Kong University of Science and Technology;The Hong Kong Polytechnic University;University of Electronic Science and Technology of China", "aff_domain": "nus.edu;uestc.edu.cn;nus.edu.sg;cse.ust.hk;polyu.edu.hk;uestc.edu.cn", "position": "Researcher;MS student;Postdoc;PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nbin2023nonautoregressive,\ntitle={Non-Autoregressive Sentence Ordering},\nauthor={Yi Bin and WENHAO SHI and Bin Ji and Jipeng Zhang and Yujuan Ding and Yang Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ldtjC7TSJ5}\n}", "github": "", "project": "", "reviewers": "FhyS;8B3p;7JW7", "site": "https://openreview.net/forum?id=ldtjC7TSJ5", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "4;3;4", "reproducibility": "4;3;3", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-5508-5051;;;", "linkedin": ";;bin-ji-3b89a3269/;;;", "aff_unique_index": "0;1;0;2;3;1", "aff_unique_norm": "National University of Singapore;University of Electronic Science and Technology of China;Hong Kong University of Science and Technology;Hong Kong Polytechnic University", "aff_unique_dep": ";;Department of Computer Science and Engineering;", "aff_unique_url": "https://www.nus.edu.sg;https://www.uestc.edu.cn;https://www.ust.hk;https://www.polyu.edu.hk", "aff_unique_abbr": "NUS;UESTC;HKUST;PolyU", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;1;1;1", "aff_country_unique": "Singapore;China" }, { "id": "lhSLoOYLDv", "title": "Joint Semantic and Strategy Matching for Persuasive Dialogue", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Persuasive dialogue aims to persuade users to achieve some targets by conversations.\nWhile previous persuasion models have achieved notable successes, they mostly base themselves on utterance semantic matching, and an important aspect has been ignored, that is, the strategy of the conversations, for example, the agent can choose an \\textit{emotional-appeal} strategy to impress users.\nCompared with utterance semantics, conversation strategies are high-level concepts, which can be informative and provide complementary information to achieve effective persuasions.\nIn this paper, we propose to build a persuasion model by jointly modeling the conversation semantics and strategies, where we design a BERT-like module and an auto-regressive predictor to match the semantics and strategies, respectively.\nExperimental results indicate that our proposed approach can significantly improve the state-of-the-art baseline by 5\\% on a small dataset and 37\\% on a large dataset in terms of Recall@1. Detailed analyses show that the auto-regressive predictor contributes most to the final performance.", "keywords": "Persuasive dialogue;Application of dialogue;Retrieval-based dialogue", "primary_area": "", "supplementary_material": "", "author": "Chuhao Jin;Yutao Zhu;Lingzhen Kong;Shijie Li;Xiao Zhang;Ruihua Song;Xu Chen;huan chen;Yuchong Sun;Yu Chen;Jun Xu", "authorids": "~Chuhao_Jin1;~Yutao_Zhu1;~Lingzhen_Kong1;~Shijie_Li4;~Xiao_Zhang7;~Ruihua_Song1;~Xu_Chen13;~huan_chen5;~Yuchong_Sun1;~Yu_Chen29;~Jun_Xu1", "gender": "M;M;F;M;M;F;M;M;M;;M", "homepage": "https://chuhaojin.github.io/;https://daod.github.io;https://github.com/Thecw089;https://github.com/sparkingarthur;https://pinkfloyd1989.github.io/Xiao_Zhang/;;https://gsai.ruc.edu.cn/chenxu;http://none.com;;;https://gsai.ruc.edu.cn/~junxu", "dblp": "287/4999;71/9704-1;;;49/4478-34;s/RuihuaSong;83/6331-17;;206/8045;;90/514-1", "google_scholar": "qJmotIoAAAAJ;tBqVOWsAAAAJ;;;https://scholar.google.com.hk/citations?user=5FZ6wbAAAAAJ;v5LctN8AAAAJ;loPoqy0AAAAJ;;DuSxNqgAAAAJ;;su14mcEAAAAJ", "or_profile": "~Chuhao_Jin1;~Yutao_Zhu1;~Lingzhen_Kong1;~Shijie_Li4;~Xiao_Zhang7;~Ruihua_Song1;~Xu_Chen13;~huan_chen5;~Yuchong_Sun1;~Yu_Chen29;~Jun_Xu1", "aff": "Renmin University of China;University of Montreal;Renmin University of China;;Renmin University of China;Renmin University of China;Renmin University of China;meituan;Renmin University of China;Meituan;Renmin University of China", "aff_domain": "ruc.edu.cn;umontreal.ca;ruc.edu.cn;;ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;meituan.com;ruc.edu.cn;meituan.com;ruc.edu.cn", "position": "MS student;PhD student;MS student;;Assistant Professor;Associate Professor;Associate Professor;Researcher;PhD student;Technical Director;Full Professor", "bibtex": "@inproceedings{\njin2023joint,\ntitle={Joint Semantic and Strategy Matching for Persuasive Dialogue},\nauthor={Chuhao Jin and Yutao Zhu and Lingzhen Kong and Shijie Li and Xiao Zhang and Ruihua Song and Xu Chen and huan chen and Yuchong Sun and Yu Chen and Jun Xu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=lhSLoOYLDv}\n}", "github": "", "project": "", "reviewers": "zkSK;xjnT;urL8;pxCx", "site": "https://openreview.net/forum?id=lhSLoOYLDv", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;3;3;4", "excitement": "2;3;3;4", "reproducibility": "2;4;3;3", "correctness": "3;2;3;3", "rating_avg": 3.0, "confidence_avg": 3.25, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 2.75, "replies_avg": 14, "authors#_avg": 11, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-9432-3251;;;0000-0001-7397-5632;;0000-0003-0144-1775;;;0000-0003-3780-278X;", "linkedin": "chuhao-jin-995a97181/;;;;;;;;;;", "aff_unique_index": "0;1;0;0;0;0;2;0;2;0", "aff_unique_norm": "Renmin University of China;University of Montreal;Meituan", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ruc.edu.cn;https://wwwumontreal.ca;https://www.meituan.com", "aff_unique_abbr": "RUC;UM;Meituan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0;0;0;0;0", "aff_country_unique": "China;Canada" }, { "id": "ljjy0Sw5sx", "title": "Descriptive Prompt Paraphrasing for Target-Oriented Multimodal Sentiment Classification", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Target-Oriented Multimodal Sentiment Classification (TMSC) aims to perform sentiment polarity on a target jointly considering its corresponding multiple modalities including text, image, and others. Current researches mainly work on either of two types of targets in a decentralized manner. One type is entity, such as a person name, a location name, etc. and the other is aspect, such as 'food', 'service', etc. We believe that this target type based division in task modelling is not necessary because the sentiment polarity of the specific target is not governed by its type but its context. For this reason, we propose a unified model for target-oriented multimodal sentiment classification, so called UnifiedTMSC. It is prompt-based language modelling and performs well on four datasets spanning the above two target types. Specifically, we design descriptive prompt paraphrasing to reformulate TMSC task via (1) task paraphrasing, which obtains paraphrased prompts based on the task description through a paraphrasing rule, and (2) image prefix tuning, which optimizes a small continuous image vector throughout the multimodal representation space of text and images. Conducted on two entity-level multimodal datasets: Twitter-2015 and Twitter-2017, and two aspect-level multimodal datasets: Multi-ZOL and MASAD, the experimental results show the effectiveness of our UnifiedTMSC.", "keywords": "Target-Oriented Multimodal Sentiment Classification;Prompt Learning;Multimodal Sentiment Analysis", "primary_area": "", "supplementary_material": "", "author": "Dan Liu;Lin Li;Xiaohui Tao;Jian Cui;Qing Xie", "authorids": "~Dan_Liu6;~Lin_Li7;~Xiaohui_Tao1;~Jian_Cui3;~Qing_Xie1", "gender": "F;F;M;M;M", "homepage": ";http://cst.whut.edu.cn/xygk/szdw/201505/t20150527_876923.shtml;https://tao-xiaohui.info;;", "dblp": ";73/2252-1;58/3976;;98/2931-2.html", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;;", "or_profile": "~Dan_Liu6;~Lin_Li7;~Xiaohui_Tao1;~Jian_Cui3;~Qing_Xie1", "aff": "Wuhan University of Technology;Wuhan University of Technology;University of Southern Queensland;Wuhan University of Technology;Wuhan University of Technology", "aff_domain": "whut.edu.cn;whut.edu.cn;usq.edu.au;whut.edu.cn;whut.edu.cn", "position": "MS student;Full Professor;Full Professor;MS student;Associate Professor", "bibtex": "@inproceedings{\nliu2023descriptive,\ntitle={Descriptive Prompt Paraphrasing for Target-Oriented Multimodal Sentiment Classification},\nauthor={Dan Liu and Lin Li and Xiaohui Tao and Jian Cui and Qing Xie},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ljjy0Sw5sx}\n}", "github": "", "project": "", "reviewers": "5uYa;KMtj;xjQS", "site": "https://openreview.net/forum?id=ljjy0Sw5sx", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;5;3", "excitement": "3;3;4", "reproducibility": "4;4;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-5171-6736;0000-0001-7553-6916;0000-0002-0020-077X;0009-0009-3968-8061;0000-0003-4530-588X", "linkedin": ";;tao-xiaohui/;;", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Wuhan University of Technology;University of Southern Queensland", "aff_unique_dep": ";", "aff_unique_url": "http://www.wut.edu.cn;https://www.usq.edu.au", "aff_unique_abbr": "WUT;USQ", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;Australia" }, { "id": "ljsGKc8cVR", "title": "Longtriever: a Pre-trained Long Text Encoder for Dense Document Retrieval", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Pre-trained language models (PLMs) have achieved the preeminent position in dense retrieval due to their powerful capacity in modeling intrinsic semantics. However, most existing PLM-based retrieval models encounter substantial computational costs and are infeasible for processing long documents. In this paper, a novel retrieval model Longtriever is proposed to embrace three core challenges of long document retrieval: substantial computational cost, incomprehensive document understanding, and scarce annotations. Longtriever splits long documents into short blocks and then efficiently models the local semantics within a block and the global context semantics across blocks in a tightly-coupled manner. A pre-training phase is further proposed to empower Longtriever to achieve a better understanding of underlying semantic correlations. Experimental results on two popular benchmark datasets demonstrate the superiority of our proposal.", "keywords": "dense retrieval;document retrieval", "primary_area": "", "supplementary_material": "", "author": "Junhan Yang;Zheng Liu;Chaozhuo Li;Guangzhong Sun;Xing Xie", "authorids": "~Junhan_Yang1;~Zheng_Liu4;~Chaozhuo_Li1;~Guangzhong_Sun1;~Xing_Xie3", "gender": ";;;M;M", "homepage": ";https://www.microsoft.com/en-us/research/people/zhengliu/;https://scss.bupt.edu.cn/info/1063/5534.htm;;http://research.microsoft.com/en-us/people/xingx/", "dblp": "194/8898.html;06/3580-11;316/1269.html;44/1372;08/6809-1", "google_scholar": "https://scholar.google.com.hk/citations?user=8QHe4YYAAAAJ;https://scholar.google.com.hk/citations?user=k2SF4M0AAAAJ;https://scholar.google.com/citations?hl=zh-CN;;5EQfAFIAAAAJ", "or_profile": "~Junhan_Yang1;~Zheng_Liu4;~Chaozhuo_Li1;~Guangzhong_Sun1;~Xing_Xie3", "aff": "University of Science and Technology of China;Microsoft Research;Beijing University of Posts and Telecommunications;University of Science and Technology of China;Microsoft Research Asia", "aff_domain": "ustc.edu.cn;research.microsoft.com;bupt.edu.cn;ustc.edu.cn;microsoft.com", "position": "PhD student;Researcher;Associate Professor;Full Professor;Senior Principal Researcher", "bibtex": "@inproceedings{\nyang2023longtriever,\ntitle={Longtriever: a Pre-trained Long Text Encoder for Dense Document Retrieval},\nauthor={Junhan Yang and Zheng Liu and Chaozhuo Li and Guangzhong Sun and Xing Xie},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ljsGKc8cVR}\n}", "github": "", "project": "", "reviewers": "fFzZ;kLS1;Yhi7", "site": "https://openreview.net/forum?id=ljsGKc8cVR", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;3;4", "reproducibility": "3;4;2", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-7765-8466;0000-0002-8179-7503;0000-0002-0794-7681;0000-0002-8608-8482", "linkedin": ";;;;xingx/", "aff_unique_index": "0;1;2;0;1", "aff_unique_norm": "University of Science and Technology of China;Microsoft;Beijing University of Posts and Telecommunications", "aff_unique_dep": ";Microsoft Research;", "aff_unique_url": "http://www.ustc.edu.cn;https://www.microsoft.com/en-us/research;http://www.bupt.edu.cn/", "aff_unique_abbr": "USTC;MSR;BUPT", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Beijing;Asia", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;United States" }, { "id": "llv2GnH5bD", "title": "Retrofitting Light-weight Language Models for Emotions using Supervised Contrastive Learning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We present a novel retrofitting method to induce emotion aspects into pre-trained language models (PLMs) such as BERT and RoBERTa. Our method updates pre-trained network weights using contrastive learning so that the text fragments exhibiting similar emotions are encoded nearby in the representation space, and the fragments with different emotion content are pushed apart. While doing so, it also ensures that the linguistic knowledge already present in PLMs is not inadvertently perturbed. The language models retrofitted by our method, i.e., BERTEmo and RoBERTaEmo, produce emotion-aware text representations, as evaluated through different clustering and retrieval metrics. For the downstream tasks on sentiment analysis and sarcasm detection, they perform better than their pre-trained counterparts (about 1% improvement in F1-score) and other existing approaches. Additionally, a more significant boost in performance is observed for the retrofitted models over pre-trained ones in few-shot learning setting.", "keywords": "Emotion;Contrastive Learning;Retrofitting", "primary_area": "", "supplementary_material": "", "author": "Sapan Shah;Sreedhar Reddy;Pushpak Bhattacharyya", "authorids": "~Sapan_Shah1;~Sreedhar_Reddy1;~Pushpak_Bhattacharyya1", "gender": "M;;M", "homepage": ";https://www.linkedin.com/in/sreedhar-reddy-96a83326/;https://www.cse.iitb.ac.in/~pb/", "dblp": "20/3551;;p/PushpakBhattacharyya", "google_scholar": "https://scholar.google.co.in/citations?user=75eYxkAAAAAJ;;https://scholar.google.com.tw/citations?user=vvg-pAkAAAAJ", "or_profile": "~Sapan_Shah1;~Sreedhar_Reddy1;~Pushpak_Bhattacharyya1", "aff": "Tata Consultancy Services Limited, India;;Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology", "aff_domain": "tcs.com;;iitb.ac.in", "position": "Researcher;;Full Professor", "bibtex": "@inproceedings{\nshah2023retrofitting,\ntitle={Retrofitting Light-weight Language Models for Emotions using Supervised Contrastive Learning},\nauthor={Sapan Shah and Sreedhar Reddy and Pushpak Bhattacharyya},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=llv2GnH5bD}\n}", "github": "", "project": "", "reviewers": "HtpC;tXoc;MQD5", "site": "https://openreview.net/forum?id=llv2GnH5bD", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "3;4;3", "reproducibility": "3;4;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;pushpakbh/?originalSubdomain=in", "aff_unique_index": "0;1", "aff_unique_norm": "Tata Consultancy Services Limited;Indian Institute of Technology, Bombay", "aff_unique_dep": ";", "aff_unique_url": "https://www.tcs.com;https://www.iitb.ac.in", "aff_unique_abbr": "TCS;IIT Bombay", "aff_campus_unique_index": "1", "aff_campus_unique": ";Bombay", "aff_country_unique_index": "0;0", "aff_country_unique": "India" }, { "id": "lojtRAQOls", "title": "Using LLM for Improving Key Event Discovery: Temporal-Guided News Stream Clustering with Event Summaries", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Understanding and characterizing the discus- sions around key events in news streams is important for analyzing political discourse. In this work, we study the problem of identification of such key events and the news articles associated with those events from news streams. We propose a generic framework for news stream clustering that analyzes the temporal trend of news articles to automatically extract the underlying key news events that draw significant media attention. We characterize such key events by generating event summaries, based on which we form document clusters in an unsupervised fashion. We evaluate our simple yet effective framework, and show that it produces more coherent event-focused clusters. To demonstrate the utility of our approach, and facilitate future research along the line, we use our framework to construct KeyEvents, a dataset of 40k articles with 611 key events from 11 topics.", "keywords": "news stream clustering;event discovery;political discourse characterization;LLM", "primary_area": "", "supplementary_material": "", "author": "Nishanth Sridhar Nakshatri;Siyi Liu;Sihao Chen;Dan Roth;Dan Goldwasser;Daniel Hopkins", "authorids": "~Nishanth_Sridhar_Nakshatri1;~Siyi_Liu2;~Sihao_Chen1;~Dan_Roth3;~Dan_Goldwasser1;~Daniel_Hopkins1", "gender": "M;M;M;M;M;M", "homepage": "https://nnakshat.github.io;https://liusiyi641.github.io/;https://sihaoc.github.io;https://www.cis.upenn.edu/~danroth/;https://www.cs.purdue.edu/homes/dgoldwas/;https://web.sas.upenn.edu/danhop/", "dblp": "283/4986;;;r/DanRoth;38/3382;203/9711", "google_scholar": "z0tNU24AAAAJ;Z4vfzqkAAAAJ;PQ9dRCgAAAAJ;E-bpPWgAAAAJ;https://scholar.google.com.tw/citations?user=u8358QgAAAAJ;4XO7BBcAAAAJ", "or_profile": "~Nishanth_Sridhar_Nakshatri1;~Siyi_Liu2;~Sihao_Chen1;~Dan_Roth3;~Dan_Goldwasser1;~Daniel_Hopkins1", "aff": "Purdue University;University of Pennsylvania;Tencent AI Lab;Amazon;Purdue University;University of Pennsylvania", "aff_domain": "cs.purdue.edu;seas.upenn.edu;tencent.com;amazon.com;purdue.edu;upenn.edu", "position": "PhD student;PhD student;Intern;VP and Distinguished Scientist;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nnakshatri2023using,\ntitle={Using {LLM} for Improving Key Event Discovery: Temporal-Guided News Stream Clustering with Event Summaries},\nauthor={Nishanth Sridhar Nakshatri and Siyi Liu and Sihao Chen and Dan Roth and Dan Goldwasser and Daniel Hopkins},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=lojtRAQOls}\n}", "github": "", "project": "", "reviewers": "9es6;DScZ;RPBd", "site": "https://openreview.net/forum?id=lojtRAQOls", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "3;3;3", "reproducibility": "4;3;3", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-2540-3231;;;;;", "linkedin": "nishanth-nakshatri;;;dan-roth-8667361/;;", "aff_unique_index": "0;1;2;3;0;1", "aff_unique_norm": "Purdue University;University of Pennsylvania;Tencent;Amazon", "aff_unique_dep": ";;Tencent AI Lab;Amazon.com, Inc.", "aff_unique_url": "https://www.purdue.edu;https://www.upenn.edu;https://ai.tencent.com;https://www.amazon.com", "aff_unique_abbr": "Purdue;UPenn;Tencent AI Lab;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "United States;China" }, { "id": "lpyU0zyEsS", "title": "Identifying Informational Sources in News Articles", "track": "main", "status": "Long Main", "tldr": "", "abstract": "News articles are driven by the informational sources journalists use in reporting. Modeling when, how and why sources get used together in stories can help us better understand the information we consume and even help journalists with the task of producing it.\nIn this work, we take steps toward this goal by constructing the largest and widest-ranging annotated dataset, to date, of informational sources used in news writing. We first show that our dataset can be used to train high-performing models for information detection and source attribution. Then, we introduce a novel task, source prediction, to study the compositionality of sources in news articles -- i.e. how they are chosen to complement each other. We show good modeling performance on this task, indicating that there is a pattern to the way different sources are used \\textit{together} in news storytelling. This insight opens the door for a focus on sources in narrative science (i.e. planning-based language generation) and computational journalism (i.e. a source-recommendation system to aid journalists writing stories). All data and model code can be found at https://github.com/alex2awesome/source-exploration.", "keywords": "computational journalism;source prediction;document-level modeling", "primary_area": "", "supplementary_material": "", "author": "Alexander Spangher;Nanyun Peng;Emilio Ferrara;Jonathan May", "authorids": "~Alexander_Spangher2;~Nanyun_Peng1;~Emilio_Ferrara1;~Jonathan_May1", "gender": "M;F;;M", "homepage": "http://alexander-spangher.com/;https://violetpeng.github.io/;;http://jonmay.net", "dblp": "227/2512;117/4036;38/8773;00/4758", "google_scholar": "https://scholar.google.com/citations?hl=en;XxRXvX0AAAAJ;;tmK5EPEAAAAJ", "or_profile": "~Alexander_Spangher2;~Nanyun_Peng1;~Emilio_Ferrara1;~Jonathan_May1", "aff": "University of Southern California;University of California, Los Angeles;University of Southern California;USC/ISI", "aff_domain": "usc.edu;ucla.edu;usc.edu;isi.edu", "position": "PhD student;Assistant Professor;Full Professor;Research Scientist", "bibtex": "@inproceedings{\nspangher2023identifying,\ntitle={Identifying Informational Sources in News Articles},\nauthor={Alexander Spangher and Nanyun Peng and Emilio Ferrara and Jonathan May},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=lpyU0zyEsS}\n}", "github": "", "project": "", "reviewers": "6Nn9;d5Gh;5ZFc", "site": "https://openreview.net/forum?id=lpyU0zyEsS", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;3", "excitement": "4;4;4", "reproducibility": "3;3;3", "correctness": "4;3;3", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 9, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-5284-477X", "linkedin": ";;;jonmayjonmay/", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Southern California;University of California, Los Angeles", "aff_unique_dep": ";", "aff_unique_url": "https://www.usc.edu;https://www.ucla.edu", "aff_unique_abbr": "USC;UCLA", "aff_campus_unique_index": "0;0;0;1", "aff_campus_unique": "Los Angeles;ISI", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "lqe06F5OiU", "title": "Chain-of-Thought Embeddings for Stance Detection on Social Media", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Stance detection on social media is challenging for Large Language Models (LLMs), as emerging slang and colloquial language in online conversations often contain deeply implicit stance labels. Chain-of-Thought (COT) prompting has recently been shown to improve performance on stance detection tasks --- alleviating some of these issues. However, COT prompting still struggles with implicit stance identification. This challenge arises because many samples are initially challenging to comprehend before a model becomes familiar with the slang and evolving knowledge related to different topics, all of which need to be acquired through the training data. In this study, we address this problem by introducing COT Embeddings which improve COT performance on stance detection tasks by embedding COT reasonings and integrating them into a traditional RoBERTa-based stance detection pipeline. Our analysis demonstrates that 1) text encoders can leverage COT reasonings with minor errors or hallucinations that would otherwise distort the COT output label. 2) Text encoders can overlook misleading COT reasoning when a sample's prediction heavily depends on domain-specific patterns. Our model achieves SOTA performance on multiple stance detection datasets collected from social media.", "keywords": "chain of thought;prompting;chatgpt;stance detection", "primary_area": "", "supplementary_material": "", "author": "Joseph Gatto;Omar Sharif;Sarah Masud Preum", "authorids": "~Joseph_Gatto1;~Omar_Sharif2;~Sarah_Masud_Preum1", "gender": "M;M;", "homepage": "https://josephgatto.github.io;https://omar-sharif03.github.io/;https://web.cs.dartmouth.edu/people/sarah-masud-preum", "dblp": "254/2754;270/0002;165/8174.html", "google_scholar": "QOCT60gAAAAJ;TBBRv2wAAAAJ;TyO23NgAAAAJ", "or_profile": "~Joseph_Gatto1;~Omar_Sharif2;~Sarah_Masud_Preum1", "aff": "Dartmouth College;Dartmouth College;Dartmouth College", "aff_domain": "dartmouth.edu;dartmouth.edu;dartmouth.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ngatto2023chainofthought,\ntitle={Chain-of-Thought Embeddings for Stance Detection on Social Media},\nauthor={Joseph Gatto and Omar Sharif and Sarah Masud Preum},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=lqe06F5OiU}\n}", "github": "", "project": "", "reviewers": "XojB;pvkN;bcgD;n7UD", "site": "https://openreview.net/forum?id=lqe06F5OiU", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;4;4;3", "excitement": "4;3;3;3", "reproducibility": "5;3;3;3", "correctness": "4;3;2;3", "rating_avg": 3.0, "confidence_avg": 3.5, "excitement_avg": 3.25, "reproducibility_avg": 3.5, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-7013-2445;0000-0002-1971-6522;0000-0002-7771-8323", "linkedin": ";omar1303/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Dartmouth College", "aff_unique_dep": "", "aff_unique_url": "https://www.dartmouth.edu", "aff_unique_abbr": "Dartmouth", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "ls4Pfsl2jZ", "title": "Multi-step Jailbreaking Privacy Attacks on ChatGPT", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "With the rapid progress of large language models (LLMs), many downstream NLP tasks can be well solved given appropriate prompts. Though model developers and researchers work hard on dialog safety to avoid generating harmful content from LLMs, it is still challenging to steer AI-generated content (AIGC) for the human good. As powerful LLMs are devouring existing text data from various domains (e.g., GPT-3 is trained on 45TB texts), it is natural to doubt whether the private information is included in the training data and what privacy threats can these LLMs and their downstream applications bring. In this paper, we study the privacy threats from OpenAI's ChatGPT and the New Bing enhanced by ChatGPT and show that application-integrated LLMs may cause new privacy threats. To this end, we conduct extensive experiments to support our claims and discuss LLMs' privacy implications.", "keywords": "Large language models;privacy;extraction attacks", "primary_area": "", "supplementary_material": "", "author": "Haoran Li;Dadi Guo;Wei Fan;Mingshi Xu;Jie Huang;Fanpu Meng;Yangqiu Song", "authorids": "~Haoran_Li3;~Dadi_Guo1;~Wei_Fan7;~Mingshi_Xu2;~Jie_Huang3;~Fanpu_Meng1;~Yangqiu_Song1", "gender": "M;M;M;;;M;M", "homepage": "https://hlibt.student.ust.hk/;http://guodadi.github.io;https://alexfan.cn/;https://jeffhj.github.io/;;https://www.cse.ust.hk/~yqsong/;", "dblp": "50/10038-3.html;;;29/6643-9;;86/2159;", "google_scholar": "O3JRO9AAAAAJ;;ivOeRAkAAAAJ;GIoPkMoAAAAJ;;MdQZ-q8AAAAJ;", "or_profile": "~Haoran_Li3;~Dadi_Guo1;~Wei_Fan7;~Jie_Huang3;~Fanpu_Meng1;~Yangqiu_Song1;~MINGSHI_XU1", "aff": "Hong Kong University of Science and Technology;Peking University;Hong Kong University of Science and Technology;University of Illinois, Urbana Champaign;University of Notre Dame;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": "connect.ust.hk;stu.pku.edu.cn;ust.hk;illinois.edu;nd.edu;ust.hk;ust.hk", "position": "PhD student;MS student;MS student;PhD student;PhD student;Associate Professor;Undergrad student", "bibtex": "@inproceedings{\nli2023multistep,\ntitle={Multi-step Jailbreaking Privacy Attacks on Chat{GPT}},\nauthor={Haoran Li and Dadi Guo and Wei Fan and Mingshi Xu and Jie Huang and Fanpu Meng and Yangqiu Song},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ls4Pfsl2jZ}\n}", "github": "", "project": "", "reviewers": "ur2W;5zeZ;fTiA", "site": "https://openreview.net/forum?id=ls4Pfsl2jZ", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "3;4;3", "reproducibility": "3;3;5", "correctness": "4;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-1656-1278;;0009-0008-1900-7081;;;0000-0002-7818-6090;", "linkedin": "%E6%B5%A9%E7%84%B6-%E6%9D%8E-9b743817a/;;;jie-huang-4b0104151/;fanpu-meng-/;yqsong/;%E6%98%8E%E7%9F%B3-%E5%BE%90-341708236/", "aff_unique_index": "0;1;0;2;3;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology;Peking University;University of Illinois Urbana-Champaign;University of Notre Dame", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ust.hk;http://www.pku.edu.cn;https://illinois.edu;https://www.nd.edu", "aff_unique_abbr": "HKUST;Peking U;UIUC;Notre Dame", "aff_campus_unique_index": "0;0;2;0;0", "aff_campus_unique": "Hong Kong SAR;;Urbana-Champaign", "aff_country_unique_index": "0;0;0;1;1;0;0", "aff_country_unique": "China;United States" }, { "id": "m1TV5K9Cvc", "title": "Evaluating Emotion Arcs Across Languages: Bridging the Global Divide in Sentiment Analysis", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Emotion arcs capture how an individual (or a population) feels over time. They are widely used in industry and research; however, there is little work on evaluating the automatically generated arcs. This is because of the difficulty of establishing the true (gold) emotion arc. Our work, for the first time, systematically and quantitatively evaluates automatically generated emotion arcs. We also compare two common ways of generating emotion arcs: Machine-Learning (ML) models and Lexicon-Only (LexO) methods. By running experiments on 18 diverse datasets in 9 languages, we show that despite being markedly poor at instance level emotion classification, LexO methods are highly accurate at generating emotion arcs when aggregating information from hundreds of instances. We also show, through experiments on six indigenous African languages, as well as Arabic, and Spanish, that automatic translations of English emotion lexicons can be used to generate high-quality emotion arcs in less-resource languages. This opens up avenues for work on emotions in languages from around the world; which is crucial for commerce, public policy, and health research in service of speakers often left behind. Code and resources: https://github.com/dteodore/EmotionArcs", "keywords": "Emotion Arcs;Sentiment Analysis;Low-Resource NLP;Multilingual;Emotion Lexicons", "primary_area": "", "supplementary_material": "", "author": "Daniela Teodorescu;Saif M. Mohammad", "authorids": "~Daniela_Teodorescu1;~Saif_M._Mohammad1", "gender": ";M", "homepage": ";http://saifmohammad.com", "dblp": ";58/380", "google_scholar": ";zJHymXh9EVwC", "or_profile": "~Daniela_Teodorescu1;~Saif_M._Mohammad1", "aff": ";National Research Council Canada", "aff_domain": ";nrc-cnrc.gc.ca", "position": ";Researcher", "bibtex": "@inproceedings{\nteodorescu2023evaluating,\ntitle={Evaluating Emotion Arcs Across Languages: Bridging the Global Divide in Sentiment Analysis},\nauthor={Daniela Teodorescu and Saif M. Mohammad},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=m1TV5K9Cvc}\n}", "github": "", "project": "", "reviewers": "ZKef;MGk7;QjH9", "site": "https://openreview.net/forum?id=m1TV5K9Cvc", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "3;2;4", "reproducibility": "2;2;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-2716-7516", "linkedin": ";", "aff_unique_index": "0", "aff_unique_norm": "National Research Council Canada", "aff_unique_dep": "", "aff_unique_url": "https://www.nrc-cnrc.gc.ca", "aff_unique_abbr": "NRC-CNRC", "aff_country_unique_index": "0", "aff_country_unique": "Canada" }, { "id": "mAxs9qiXbo", "title": "Debiasing Multimodal Models via Causal Information Minimization", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Most existing debiasing methods for multimodal models, including causal intervention and inference methods, utilize approximate heuristics to represent the biases, such as shallow features from early stages of training or unimodal features for multimodal tasks like VQA, etc., which may not be accurate. In this paper, we study bias arising from confounders in a causal graph for multimodal data, and examine a novel approach that leverages causally-motivated information minimization to learn the confounder representations. Robust predictive features contain diverse information that helps a model generalize to out-of-distribution data. Hence, minimizing the information content of features obtained from a pretrained biased model helps learn the simplest predictive features that capture the underlying data distribution. We treat these features as confounder representations and use them via methods motivated by causal theory to remove bias from models. We find that the learned confounder representations indeed capture dataset biases and the proposed debiasing methods improve out-of-distribution (OOD) performance on multiple multimodal datasets without sacrificing in-distribution performance. Additionally, we introduce a novel metric to quantify the sufficiency of spurious features in models' predictions that further demonstrates the effectiveness of our proposed methods.", "keywords": "Multimodal;Causality;Debiasing;Out-of-distribution", "primary_area": "", "supplementary_material": "", "author": "Vaidehi Patil;Adyasha Maharana;Mohit Bansal", "authorids": "~Vaidehi_Patil1;~Adyasha_Maharana1;~Mohit_Bansal2", "gender": "F;F;M", "homepage": "https://vaidehi99.github.io/;https://adymaharana.github.io/;https://www.cs.unc.edu/~mbansal/", "dblp": "294/5205;204/6962.html;32/5243.html", "google_scholar": "wCt6wSAAAAAJ;1sh3MMgAAAAJ;DN8QtscAAAAJ", "or_profile": "~Vaidehi_Patil1;~Adyasha_Maharana1;~Mohit_Bansal2", "aff": "Department of Computer Science, University of North Carolina at Chapel Hill;Department of Computer Science, University of North Carolina, Chapel Hill;University of North Carolina at Chapel Hill", "aff_domain": "cs.unc.edu;cs.unc.edu;unc.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\npatil2023debiasing,\ntitle={Debiasing Multimodal Models via Causal Information Minimization},\nauthor={Vaidehi Patil and Adyasha Maharana and Mohit Bansal},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=mAxs9qiXbo}\n}", "github": "", "project": "", "reviewers": "JDbk;nFie;kR71", "site": "https://openreview.net/forum?id=mAxs9qiXbo", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;4;5", "excitement": "2;3;3", "reproducibility": "3;4;4", "correctness": "3;3;2", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 2.6666666666666665, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;1;1", "aff_unique_norm": "University of North Carolina at Chapel Hill;University of North Carolina", "aff_unique_dep": "Department of Computer Science;Department of Computer Science", "aff_unique_url": "https://www.unc.edu;https://www.unc.edu", "aff_unique_abbr": "UNC Chapel Hill;UNC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Chapel Hill", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "mCnBRLJuhY", "title": "The Curious Case of Hallucinatory (Un)answerability: Finding Truths in the Hidden States of Over-Confident Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) have been shown to possess impressive capabilities, while also raising crucial concerns about the faithfulness of their responses. A primary issue arising in this context is the management of (un)answerable queries by LLMs, which often results in hallucinatory behavior due to overconfidence. In this paper, we explore the behavior of LLMs when presented with (un)answerable queries. We ask: do models \\textit{represent} the fact that the question is (un)answerable when generating a hallucinatory answer?\nOur results show strong indications that such models encode the answerability of an input query, with the representation of the first decoded token often being a strong indicator. These findings shed new light on the spatial organization within the latent representations of LLMs, unveiling previously unexplored facets of these models. Moreover, they pave the way for the development of improved decoding techniques with better adherence to factual generation, particularly in scenarios where query (un)answerability is a concern.", "keywords": "LLMs;explainability;answerability;hallucinations", "primary_area": "", "supplementary_material": "", "author": "Aviv Slobodkin;Omer Goldman;Avi Caciularu;Ido Dagan;Shauli Ravfogel", "authorids": "~Aviv_Slobodkin2;~Omer_Goldman1;~Avi_Caciularu1;~Ido_Dagan1;~Shauli_Ravfogel1", "gender": "M;;M;M;M", "homepage": "https://lovodkin93.github.io/;;http://aviclu.github.io/;http://u.cs.biu.ac.il/~dagan/;https://github.com/Shaul1321", "dblp": "290/2100.html;;https://dblp.uni-trier.de/pid/207/8509;95/284;227/2231", "google_scholar": "oAy77cgAAAAJ;;https://scholar.google.co.il/citations?user=fPG_0aQAAAAJ;https://scholar.google.com.tw/citations?user=YzGAGtoAAAAJ;", "or_profile": "~Aviv_Slobodkin2;~Omer_Goldman1;~Avi_Caciularu1;~Ido_Dagan1;~Shauli_Ravfogel1", "aff": "Bar-Ilan University;;Google;Bar-Ilan University;Bar-Ilan University", "aff_domain": "biu.ac.il;;google.com;biu.ac.il;biu.ac.il", "position": "PhD student;;Researcher;Full Professor;PhD student", "bibtex": "@inproceedings{\nslobodkin2023the,\ntitle={The Curious Case of Hallucinatory (Un)answerability: Finding Truths in the Hidden States of Over-Confident Large Language Models},\nauthor={Aviv Slobodkin and Omer Goldman and Avi Caciularu and Ido Dagan and Shauli Ravfogel},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=mCnBRLJuhY}\n}", "github": "", "project": "", "reviewers": "9CW2;CavA;t5Sd", "site": "https://openreview.net/forum?id=mCnBRLJuhY", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "4;4;4", "reproducibility": "4;4;3", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "aviv-slobodkin-73926515a/;;avicaciularu/;;", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Bar-Ilan University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.biu.ac.il;https://www.google.com", "aff_unique_abbr": "BIU;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "Israel;United States" }, { "id": "mDPUF7ubAv", "title": "An Empirical Study of Instruction-tuning Large Language Models in Chinese", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The success of ChatGPT validates the potential of large language models (LLMs) in artificial general intelligence (AGI). Subsequently, the release of LLMs has sparked the open-source community's interest in instruction-tuning, which is deemed to accelerate ChatGPT's replication process. However, research on instruction-tuning LLMs in Chinese, the world's most spoken language, is still in its early stages. Therefore, this paper makes an in-depth empirical study of instruction-tuning LLMs in Chinese, which can serve as a cookbook that provides valuable findings for effectively customizing LLMs that can better respond to Chinese instructions. Specifically, we systematically explore the impact of LLM bases, parameter-efficient methods, instruction data types, which are the three most important elements for instruction-tuning. Besides, we also conduct experiment to study the impact of other factors, e.g., chain-of-thought data and human-value alignment. We hope that this empirical study can make a modest contribution to the open Chinese version of ChatGPT. This paper will release a powerful Chinese LLM that is comparable to ChatGLM. The code and data are available at https: //github.com/PhoebusSi/Alpaca-CoT.", "keywords": "large language models;instruction fine-tune", "primary_area": "", "supplementary_material": "", "author": "Qingyi Si;Tong Wang;Zheng Lin;Xu Zhang;Yanan Cao;Weiping Wang", "authorids": "~Qingyi_Si1;~Tong_Wang13;~Zheng_Lin5;~Xu_Zhang27;~Yanan_Cao1;~Weiping_Wang4", "gender": "M;M;M;F;M;F", "homepage": "https://phoebussi.github.io/;https://fullstack.love/;https://github.com/AiLMe-AI;;https://teacher.ucas.ac.cn/~0012246;http://people.ucas.edu.cn/~linzheng", "dblp": "227/6822.html;;;97/5152-1;72/4134-5.html;51/3740-1.html", "google_scholar": "5oH_wMEAAAAJ;;;;zH_wmdwAAAAJ;", "or_profile": "~Qingyi_Si1;~Tong_Wang13;~Xu_Zhang27;~Yanan_Cao1;~Weiping_Wang4;~zheng_Lin4", "aff": "Institute of Information Engineering, Chinese Academy of Sciences, Beijing, China;University of Chinese Academy of Sciences;APUS AI Lab;Institute of Information Engineering, Chinese Academy of Sciences;IIE;Institute of Information Engineering, Chinese Academy of Sciences", "aff_domain": "iie.ac.cn;ucas.ac.cn;apusai.com;iie.ac.cn;iie.ac.cn;iie.ac.cn", "position": "PhD student;MS student;Chief Scientist;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nsi2023an,\ntitle={An Empirical Study of Instruction-tuning Large Language Models in Chinese},\nauthor={Qingyi Si and Tong Wang and Zheng Lin and Xu Zhang and Yanan Cao and Weiping Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=mDPUF7ubAv}\n}", "github": "", "project": "", "reviewers": "CCaJ;3VDv;L1A9", "site": "https://openreview.net/forum?id=mDPUF7ubAv", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "3;3;3", "reproducibility": "4;4;4", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-3534-1094;0000-0002-8618-4992;0000-0002-8432-1658", "linkedin": ";;;;;", "aff_unique_index": "0;1;2;0;3;0", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences;American Public University System;Institute of Industrial Engineers", "aff_unique_dep": "Institute of Information Engineering;;AI Lab;", "aff_unique_url": "http://www.cas.cn;http://www.ucas.ac.cn;https://www.apus.edu;https://www.iie.org", "aff_unique_abbr": "CAS;UCAS;APUS;IIE", "aff_campus_unique_index": "0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;0;1;0;1;0", "aff_country_unique": "China;United States" }, { "id": "mDgLGrL6ze", "title": "ECHo: A Visio-Linguistic Dataset for Event Causality Inference via Human-Centric Reasoning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We introduce ECHo (Event Causality Inference via Human-Centric Reasoning), a diagnostic dataset of event causality inference grounded in visio-linguistic social scenarios. ECHo employs real-world human-centric deductive information building on a television crime drama. ECHo requires the Theory-of-Mind (ToM) ability to understand and reason about social interactions based on multimodal information. Using ECHo, we propose a unified Chain-of-Thought (CoT) framework to assess the reasoning capability of current AI systems. Our ToM-enhanced CoT pipeline accommodates various large foundation models in both zero-shot and few-shot visio-linguistic reasoning. We use this framework to scrutinize recent large foundation models such as InstructGPT and MiniGPT-4 on three diagnostic human-centric tasks. Further analysis demonstrates ECHo as a challenging dataset to expose imperfections and inconsistencies in reasoning. Our data and code are publicly available at [https://github.com/YuxiXie/ECHo](https://github.com/YuxiXie/ECHo).", "keywords": "visio-linguistic commonsense reasoning;theory of mind;chain of thought", "primary_area": "", "supplementary_material": "", "author": "Yuxi Xie;Guanzhen Li;Min-Yen Kan", "authorids": "~Yuxi_Xie1;~Guanzhen_Li1;~Min-Yen_Kan1", "gender": "F;M;M", "homepage": "https://yuxixie.github.io/;;https://www.comp.nus.edu.sg/~kanmy/", "dblp": ";;k/MinYenKan", "google_scholar": "LNLECx0AAAAJ;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com.tw/citations?user=aNVcd3EAAAAJ", "or_profile": "~Yuxi_Xie1;~Guanzhen_Li1;~Min-Yen_Kan1", "aff": "National University of Singapore;National University of Singapore;National University of Singapore", "aff_domain": "u.nus.edu;u.nus.edu;nus.edu.sg", "position": "PhD student;MS student;Associate Professor", "bibtex": "@inproceedings{\nxie2023echo,\ntitle={{ECH}o: A Visio-Linguistic Dataset for Event Causality Inference via Human-Centric Reasoning},\nauthor={Yuxi Xie and Guanzhen Li and Min-Yen Kan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=mDgLGrL6ze}\n}", "github": "", "project": "", "reviewers": "CzKR;Jxpn;DSz5", "site": "https://openreview.net/forum?id=mDgLGrL6ze", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;2", "excitement": "3;4;4", "reproducibility": "4;4;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "yuxi-xie-494265181;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "id": "mERmlOPxPY", "title": "Definitions Matter: Guiding GPT for Multi-label Classification", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Large language models have recently risen in popularity due to their ability to perform many natural language tasks without requiring any fine-tuning. In this work, we focus on two novel ideas: (1) generating definitions from examples and using them for zero-shot classification, and (2) investigating how an LLM makes use of the definitions. We thoroughly analyze the performance of GPT-3 model for fine-grained multi-label conspiracy theory classification of tweets using zero-shot labeling. In doing so, we asses how to improve the labeling by providing minimal but meaningful context in the form of the definitions of the labels. We compare descriptive noun phrases, human-crafted definitions, introduce a new method to help the model generate definitions from examples, and propose a method to evaluate GPT-3's understanding of the definitions. We demonstrate that improving definitions of class labels has a direct consequence on the downstream classification results.", "keywords": "GPT-3;zero-shot classification;LLM", "primary_area": "", "supplementary_material": "", "author": "Youri Peskine;Damir Koren\u010di\u0107;Ivan Grubisic;Paolo Papotti;Raphael Troncy;Paolo Rosso", "authorids": "~Youri_Peskine1;~Damir_Koren\u010di\u01071;~Ivan_Grubisic1;~Paolo_Papotti1;~Raphael_Troncy1;~Paolo_Rosso1", "gender": "M;M;M;M;M;M", "homepage": ";;https://www.irb.hr/O-IRB-u/Ljudi/Ivan-Grubisic;http://www.eurecom.fr/en/people/papotti-paolo;http://www.eurecom.fr/~troncy/;http://personales.upv.es/prosso/", "dblp": "267/8146;;;p/PaoloPapotti.html;01/1768;05/3463", "google_scholar": "https://scholar.google.com/citations?hl=fr;PJLphUQAAAAJ;;https://scholar.google.com.tw/citations?user=YwoezYX7JVgJ;1BxhcigAAAAJ;https://scholar.google.es/citations?user=HFKXPH8AAAAJ", "or_profile": "~Youri_Peskine1;~Damir_Koren\u010di\u01071;~Ivan_Grubisic1;~Paolo_Papotti1;~Raphael_Troncy1;~Paolo_Rosso1", "aff": "Eurecom;Universidad Polit\u00e9cnica de Valencia;Faculty of Electrical Engineering and Computing, University of Zagreb;Eurecom;Eurecom;Universitat Polit\u00e8cnica de Val\u00e8ncia", "aff_domain": "eurecom.fr;upv.es;fer.hr;eurecom.fr;eurecom.fr;upv.es", "position": "PhD student;Postdoc;PhD student;Associate Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\npeskine2023definitions,\ntitle={Definitions Matter: Guiding {GPT} for Multi-label Classification},\nauthor={Youri Peskine and Damir Koren{\\v{c}}i{\\'c} and Ivan Grubisic and Paolo Papotti and Raphael Troncy and Paolo Rosso},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=mERmlOPxPY}\n}", "github": "", "project": "", "reviewers": "hx9P;rBbv;u3c7", "site": "https://openreview.net/forum?id=mERmlOPxPY", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "3;3;2", "reproducibility": "3;4;4", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0002-8160-019X;0000-0003-4645-2937;;;0000-0003-0457-1436;0000-0002-8922-1242", "linkedin": ";damirkorencic/;;papotti/;troncy/;paolo-rosso-753b1016/?originalSubdomain=es", "aff_unique_index": "0;1;2;0;0;3", "aff_unique_norm": "EURECOM;Universidad Polit\u00e9cnica de Valencia;University of Zagreb;Universitat Polit\u00e8cnica de Val\u00e8ncia", "aff_unique_dep": ";;Faculty of Electrical Engineering and Computing;", "aff_unique_url": "https://www.eurecom.fr;https://www.upv.es;https://www.unizg.hr;https://www.upv.es", "aff_unique_abbr": ";UPV;UNIZG;UPV", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;0;0;1", "aff_country_unique": "France;Spain;Croatia" }, { "id": "mGEfAu17Rk", "title": "Hallucination Detection for Grounded Instruction Generation", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "We investigate the problem of generating instructions to guide humans to navigate in simulated residential environments. A major issue with current models is hallucination: they generate references to actions or objects that are inconsistent with what a human follower would perform or encounter along the described path. We develop a model that detects these hallucinated references by adopting a model pre-trained on a large corpus of image-text pairs, and fine-tuning it with a contrastive loss that separates correct instructions from instructions containing synthesized hallucinations. Our final model outperforms several baselines, including using word probability estimated by the instruction-generation model, and supervised models based on LSTM and Transformer.", "keywords": "Hallucination detection;multimodality;natural language generation", "primary_area": "", "supplementary_material": "", "author": "Lingjun Zhao;Khanh Xuan Nguyen;Hal Daum\u00e9 III", "authorids": "~Lingjun_Zhao2;~Khanh_Xuan_Nguyen1;~Hal_Daum\u00e9_III1", "gender": "F;M;M", "homepage": "https://lingjunzhao.github.io;http://machineslearner.com;http://hal3.name", "dblp": ";53/6791;77/2856.html", "google_scholar": "XdD7-6IAAAAJ;SmqouhIAAAAJ;PbEw81gAAAAJ", "or_profile": "~Lingjun_Zhao2;~Khanh_Xuan_Nguyen1;~Hal_Daum\u00e9_III1", "aff": "University of Maryland, College Park;Princeton University;Microsoft", "aff_domain": "umd.edu;princeton.edu;microsoft.com", "position": "PhD student;Postdoc;Senior Principle Researcher", "bibtex": "@inproceedings{\nzhao2023hallucination,\ntitle={Hallucination Detection for Grounded Instruction Generation},\nauthor={Lingjun Zhao and Khanh Xuan Nguyen and Hal Daum{\\'e} III},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=mGEfAu17Rk}\n}", "github": "", "project": "", "reviewers": "Kkpf;CKdk;ieLz", "site": "https://openreview.net/forum?id=mGEfAu17Rk", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "3;2;3", "reproducibility": "3;3;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Maryland;Princeton University;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://www/umd.edu;https://www.princeton.edu;https://www.microsoft.com", "aff_unique_abbr": "UMD;Princeton;Microsoft", "aff_campus_unique_index": "0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "mIsrzEjeG4", "title": "When Do Decompositions Help for Machine Reading?", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Answering complex questions often requires multi-step reasoning in order to obtain the final answer. Most research into decompositions of complex questions involves open-domain systems, which have shown success in using these decompositions for improved retrieval. In the machine reading setting, however, work to understand when decompositions are helpful is understudied. We conduct experiments on decompositions in machine reading to unify recent work in this space, using a range of models and datasets. We find that decompositions can be helpful in zero or limited-data settings, giving several points of improvement in exact match. However, we also show that when models are given access to around a few hundred or more examples, decompositions are not helpful (and can actually be detrimental). Thus, our analysis implies that models can learn decompositions implicitly even with limited data.", "keywords": "decomposition;machine reading;question answering", "primary_area": "", "supplementary_material": "", "author": "Kangda Wei;Dawn Lawrie;Benjamin Van Durme;Yunmo Chen;Orion Weller", "authorids": "~Kangda_Wei1;~Dawn_Lawrie1;~Benjamin_Van_Durme2;~Yunmo_Chen1;~Orion_Weller1", "gender": "M;F;;M;M", "homepage": "https://weikangda.github.io/kangda.github.io/;https://hltcoe.jhu.edu/researcher/dawn-lawrie/;;https://omnuy.me;https://orionweller.github.io/", "dblp": "276/1064.html;l/DawnLawrie.html;;252/7831;248/7910", "google_scholar": "hQ1bio8AAAAJ;Ij9zwyoAAAAJ;;V-g2Tx8AAAAJ;SYYd4iAAAAAJ", "or_profile": "~Kangda_Wei1;~Dawn_Lawrie1;~Benjamin_Van_Durme2;~Yunmo_Chen1;~Orion_Weller1", "aff": "Department of Computer Science, University of North Carolina at Chapel Hill;Johns Hopkins University;;Johns Hopkins University;Johns Hopkins University", "aff_domain": "cs.unc.edu;jhu.edu;;jhu.edu;jhu.edu", "position": "MS student;Researcher;;PhD student;PhD student", "bibtex": "@inproceedings{\nwei2023when,\ntitle={When Do Decompositions Help for Machine Reading?},\nauthor={Kangda Wei and Dawn Lawrie and Benjamin Van Durme and Yunmo Chen and Orion Weller},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=mIsrzEjeG4}\n}", "github": "", "project": "", "reviewers": "dFMJ;mgx8;SCmL", "site": "https://openreview.net/forum?id=mIsrzEjeG4", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;4;3", "excitement": "3;3;3", "reproducibility": "5;4;3", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-7347-7086;;;", "linkedin": "kangdawei-40ab541b4/;dawnjlawrie/;;yunmochen;", "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University of North Carolina at Chapel Hill;Johns Hopkins University", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.unc.edu;https://www.jhu.edu", "aff_unique_abbr": "UNC Chapel Hill;JHU", "aff_campus_unique_index": "0", "aff_campus_unique": "Chapel Hill;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "mJCXoiIeJU", "title": "On the Automatic Generation and Simplification of Children's Stories", "track": "main", "status": "Long Main", "tldr": "", "abstract": "With recent advances in large language models (LLMs), the concept of automatically generating children\u2019s educational materials has become increasingly realistic. Working toward the goal of age-appropriate simplicity in generated educational texts, we first examine the ability of several popular LLMs to generate stories with properly adjusted lexical and readability levels. We find that, in spite of the growing capabilities of LLMs, they do not yet possess the ability to limit their vocabulary to levels appropriate for younger age groups. As a second experiment, we explore the ability of state-of-the-art lexical simplification models to generalize to the domain of children\u2019s stories and, thus, create an efficient pipeline for their automatic generation. In order to test these models, we develop a dataset of child-directed lexical simplification instances, with examples taken from the LLM-generated stories in our first experiment. We find that, while the strongest-performing current lexical simplification models do not perform as well on material designed for children due to their reliance on large language models behind the scenes, some models that still achieve fairly strong results on general data can mimic or even improve their performance on children-directed data with proper fine-tuning, which we conduct using our newly created child-directed simplification dataset.", "keywords": "Natural Language Generation;NLP for Education;LLMs", "primary_area": "", "supplementary_material": "", "author": "Maria Valentini;Jennifer Weber;Jesus Salcido;T\u00e9a Wright;Eliana Colunga;Katharina von der Wense", "authorids": "~Maria_Valentini1;~Jennifer_Weber1;~Jesus_Salcido1;~T\u00e9a_Wright1;~Eliana_Colunga2;~Katharina_von_der_Wense1", "gender": "F;;;F;F;", "homepage": "https://www.colorado.edu/ics/maria-valentini;;;;https://www.colorado.edu/psych-neuro/eliana-colunga;", "dblp": ";;;;72/8702;", "google_scholar": "JZL5_aIAAAAJ;;;;;", "or_profile": "~Maria_Valentini1;~Jennifer_Weber1;~Jesus_Salcido1;~T\u00e9a_Wright1;~Eliana_Colunga2;~Katharina_von_der_Wense1", "aff": "University of Colorado at Boulder;University of Colorado Boulder;;University of Colorado at Boulder;University of Colorado at Boulder;", "aff_domain": "colorado.edu;colorado.edu;;colorado.edu;colorado.edu;", "position": "PhD student;PhD student;;Undergrad student;Associate Professor;", "bibtex": "@inproceedings{\nvalentini2023on,\ntitle={On the Automatic Generation and Simplification of Children's Stories},\nauthor={Maria Valentini and Jennifer Weber and Jesus Salcido and T{\\'e}a Wright and Eliana Colunga and Katharina von der Wense},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=mJCXoiIeJU}\n}", "github": "", "project": "", "reviewers": "6sAM;Psov;Si5t", "site": "https://openreview.net/forum?id=mJCXoiIeJU", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;5;4", "excitement": "3;4;3", "reproducibility": "3;4;3", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0003-2818-9389;", "linkedin": "maria-valentini-057756139;;;teawright;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Colorado", "aff_unique_dep": "", "aff_unique_url": "https://www.colorado.edu", "aff_unique_abbr": "CU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Boulder", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "mKuH13Oq3x", "title": "Adaptive Gating in Mixture-of-Experts based Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models have demonstrated exceptional language understanding capabilities in many NLP tasks. Sparsely activated mixture-of-experts (MoE) has emerged as a promising solution for scaling models while maintaining a constant number of computational operations. Existing MoE models adopt a fixed gating network where each token is computed by the same number of experts. This contradicts our intuition that the tokens in each sequence vary in terms of their linguistic complexity and, consequently, require different computational costs. Little is discussed in prior research on the trade-off between computation per token and model performance. \n\nThis paper introduces adaptive gating in MoE, a flexible training strategy that allows tokens to be processed by a variable number of experts based on expert probability distribution. Adaptive gating preserves sparsity while improving training efficiency. We further draw upon curriculum learning to better align the order of training samples and maximize the training time savings. Extensive experiments on diverse NLP tasks show that adaptive gating reduces at most 22.5% training time while maintaining inference quality. Moreover, we conduct a comprehensive analysis of the gating decisions and present our insights on which tokens are inherently difficult to process, depending on the specific language task.", "keywords": "Mixture of Experts;Adaptive Computation;Training Efficiency", "primary_area": "", "supplementary_material": "", "author": "Jiamin Li;Qiang Su;Yitao Yang;Yimin Jiang;Cong Wang;Hong Xu", "authorids": "~Jiamin_Li1;~Qiang_Su2;~Yitao_Yang2;~Yimin_Jiang1;~Cong_Wang10;~Hong_Xu3", "gender": "F;M;M;M;;", "homepage": "https://serendipitycoding.github.io/;https://qiangsu97.github.io/;https://yyyyyt123.github.io/;https://www.linkedin.com/incareer/in/yimin-jiang-409a2a151;;https://henryhxu.github.io/", "dblp": "81/3437-2;;;;;01/5265-1", "google_scholar": "2-LOeCMAAAAJ;https://scholar.google.com/citations?hl=en;;;;BZHzIFIAAAAJ", "or_profile": "~Jiamin_Li1;~Qiang_Su2;~Yitao_Yang2;~Yimin_Jiang1;~Cong_Wang10;~Hong_Xu3", "aff": "City University of Hong Kong;City University of Hong Kong;Northwest Polytechnical University Xi'an;ByteDance;;The Chinese University of Hong Kong", "aff_domain": "cityu.edu.hk;cityu.edu.hk;nwpu.edu.cn;bytedance.com;;cuhk.edu.hk", "position": "PhD student;PhD student;Undergrad student;Researcher;;Associate Professor", "bibtex": "@inproceedings{\nli2023adaptive,\ntitle={Adaptive Gating in Mixture-of-Experts based Language Models},\nauthor={Jiamin Li and Qiang Su and Yitao Yang and Yimin Jiang and Cong Wang and Hong Xu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=mKuH13Oq3x}\n}", "github": "", "project": "", "reviewers": "P1xP;ggKr;SPuz", "site": "https://openreview.net/forum?id=mKuH13Oq3x", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;2;2", "excitement": "3;3;2", "reproducibility": "4;4;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-8110-2436;0000-0002-4482-6248;;;;", "linkedin": "jiamin-li-9998ab129/;;;;;", "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "City University of Hong Kong;Northwest Polytechnical University;ByteDance;Chinese University of Hong Kong", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cityu.edu.hk;http://www.nwpu.edu.cn;https://www.bytedance.com;https://www.cuhk.edu.hk", "aff_unique_abbr": "CityU;NWPU;ByteDance;CUHK", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Hong Kong SAR;Xi'an;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "mLJOMUwQyz", "title": "INFORM : Information eNtropy based multi-step reasoning FOR large language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) have demonstrated exceptional performance in reasoning tasks with dedicated Chain-of-Thought (CoT) prompts. Further enhancing CoT prompts with exquisite exemplars can significantly improve reasoning performance.However, the effectiveness of CoT prompts may fluctuate dramatically with different choices of in-context examples. Additionally, manual construction of rationale steps can be time-consuming, presenting challenges for the widespread adoption of CoT prompting. In this work, we propose a novel approach by introducing information entropy (IE) as a criteria on for CoT prompt selection. We extend this criterion to the CoT generation and inference stages, automatically generating CoT prompts with higher information entropy scores and adaptively determining the number of samples. These three stages together form our proposed information- entropy-based multi-step reasoning for large language models, named INFORM. Our experiments across seven reasoning benchmarks utilizing two language models(GPT-3.5-Turbo and text-davinci-003) demonstrate the superiority of INFORM both in performance and efficiency.", "keywords": "Chain-of-Thoughts;Multi-Step Reasoning;Large Language Models;Prompting;In-context Learning", "primary_area": "", "supplementary_material": "", "author": "Chuyue Zhou;WangJie You;Juntao Li;Jing Ye;Kehai Chen;Min Zhang", "authorids": "~Chuyue_Zhou1;~WangJie_You1;~Juntao_Li2;~Jing_Ye2;~Kehai_Chen2;~Min_Zhang9", "gender": ";M;M;;M;M", "homepage": "https://github.com/oneningt;https://github.com/Moriarty0923;https://lijuntaopku.github.io/;https://github.com/1245244103;https://chenkehai.github.io;https://zhangmin-nlp-ai.github.io/", "dblp": ";357/3296.html;;;78/9623;83/5342-5", "google_scholar": ";;sZSygsYAAAAJ;;_M4Am0AAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Chuyue_Zhou1;~WangJie_You1;~Juntao_Li2;~Jing_Ye2;~Kehai_Chen2;~Min_Zhang9", "aff": "Suzhou University;Soochow University;Soochow University, China;Soochow University;Harbin Institute of Technology (Shenzhen);Harbin Institute of Technology, Shenzhen", "aff_domain": "suda.edu.cn;stu.suda.edu.cn;suda.edu.cn;stu.suda.edu.cn;hit.edu.cn;hit.edu.cn", "position": "MS student;Undergrad student;Associate Professor;MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhou2023inform,\ntitle={{INFORM} : Information eNtropy based multi-step reasoning {FOR} large language Models},\nauthor={Chuyue Zhou and WangJie You and Juntao Li and Jing Ye and Kehai Chen and Min Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=mLJOMUwQyz}\n}", "github": "", "project": "", "reviewers": "ZjNo;tD1V;ZqC6", "site": "https://openreview.net/forum?id=mLJOMUwQyz", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;5;3", "excitement": "3;4;4", "reproducibility": "2;4;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-6286-7529;;0000-0002-4346-7618;", "linkedin": ";;;;;", "aff_unique_index": "0;1;1;1;2;2", "aff_unique_norm": "Suzhou University;Soochow University;Harbin Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://www.suda.edu.cn;https://www.soochow.edu.cn;http://en.hhit.edu.cn/", "aff_unique_abbr": "Suda;Soochow U;HIT", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "mLlJavL0PB", "title": "InstructExcel: A Benchmark for Natural Language Instruction in Excel", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "With the evolution of Large Language Models (LLMs) we can solve increasingly more complex NLP tasks across various domains, including spreadsheets. This work investigates whether LLMs can generate code (Excel OfficeScripts, a TypeScript API for executing many tasks in Excel) that solves Excel specific tasks provided via natural language user instructions. To do so we introduce a new large-scale benchmark, InstructExcel, created by leveraging the \u2018Automate\u2019 feature in Excel to automatically generate OfficeScripts from users\u2019 actions. Our benchmark includes over 10k samples covering 170+ Excel operations across 2,000 publicly available Excel spreadsheets. Experiments across various zero-shot and few-shot settings show that InstructExcel is a hard benchmark for state of the art models like GPT-4. We observe that (1) using GPT-4 over GPT-3.5, (2) providing more in-context examples, and (3) dynamic prompting can help improve performance on this benchmark.", "keywords": "Code generation;program synthesis;benchmark;large language models", "primary_area": "", "supplementary_material": "", "author": "Justin Payan;Swaroop Mishra;Mukul Singh;Carina Suzana Negreanu;Christian Poelitz;Chitta Baral;Subhro Roy;Rasika Chakravarthy;Benjamin Van Durme;Elnaz Nouri", "authorids": "~Justin_Payan1;~Swaroop_Mishra1;~Mukul_Singh1;~Carina_Suzana_Negreanu1;~Christian_Poelitz1;~Chitta_Baral1;~Subhro_Roy1;~Rasika_Chakravarthy1;~Benjamin_Van_Durme2;~Elnaz_Nouri1", "gender": "M;M;M;F;M;M;M;;;Not Specified", "homepage": "http://justinpayan.github.io;https://swarooprm.github.io/;https://www.microsoft.com/research/people/singhmukul;;;http://chitta.orissalinks.com;https://sroy9.github.io/;https://www.linkedin.com/in/rasika-m-chakravarthy;;https://www.microsoft.com/en-us/research/people/elnouri/", "dblp": "289/0987;249/2784;291/1609;276/1629;66/4776.html;b/ChittaBaral;47/9962;54/7528;;131/8497", "google_scholar": "gc9w_eYAAAAJ;-7LK2SwAAAAJ;3O7KjiIAAAAJ;63f9xyYAAAAJ;https://scholar.google.de/citations?user=xkzwZeoAAAAJ;9Yd716IAAAAJ;l2pAq_0AAAAJ;;;roaZYUcAAAAJ", "or_profile": "~Justin_Payan1;~Swaroop_Mishra1;~Mukul_Singh1;~Carina_Suzana_Negreanu1;~Christian_Poelitz1;~Chitta_Baral1;~Subhro_Roy1;~Rasika_Chakravarthy1;~Benjamin_Van_Durme2;~Elnaz_Nouri1", "aff": "University of Massachusetts Amherst;Arizona State University;Microsoft;Microsoft;Microsoft Research;Arizona State University;Microsoft Semantic Machines;Microsoft;;Microsoft Research", "aff_domain": "umass.edu;asu.edu;microsoft.com;microsoft.com;research.microsoft.com;asu.edu;microsoft.com;microsoft.com;;research.microsoft.com", "position": "PhD student;PhD student;Researcher;Researcher;Researcher;Full Professor;Senior Researcher;Data Scientist;;Researcher", "bibtex": "@inproceedings{\npayan2023instructexcel,\ntitle={InstructExcel: A Benchmark for Natural Language Instruction in Excel},\nauthor={Justin Payan and Swaroop Mishra and Mukul Singh and Carina Suzana Negreanu and Christian Poelitz and Chitta Baral and Subhro Roy and Rasika Chakravarthy and Benjamin Van Durme and Elnaz Nouri},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=mLlJavL0PB}\n}", "github": "", "project": "", "reviewers": "em7S;6cHD;DQNq", "site": "https://openreview.net/forum?id=mLlJavL0PB", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "3;3;3", "reproducibility": "4;4;5", "correctness": "4;4;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 10, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-7601-3500;;0000-0001-9510-4512;;;0000-0002-7549-723X;;;;", "linkedin": "justin-payan-028b43a0;;mukulsingh105/;;christian-p\u00f6litz-6bb370127/;chitta-baral-8a8438b;;rasika-m-chakravarthy;;", "aff_unique_index": "0;1;2;2;2;1;2;2;2", "aff_unique_norm": "University of Massachusetts Amherst;Arizona State University;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://www.umass.edu;https://www.asu.edu;https://www.microsoft.com", "aff_unique_abbr": "UMass Amherst;ASU;Microsoft", "aff_campus_unique_index": "0", "aff_campus_unique": "Amherst;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "mN62FSvZVW", "title": "Sociocultural Norm Similarities and Differences via Situational Alignment and Explainable Textual Entailment", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Designing systems that can reason across cultures requires that they are grounded in the norms of the contexts in which they operate. However, current research on developing computational models of social norms has primarily focused on American society. Here, we propose a novel approach to discover and compare descriptive social norms across Chinese and American cultures. We demonstrate our approach by leveraging discussions on a Chinese Q\\&A platform\u2014\u77e5\u4e4e (Zhihu)\u2014and the existing SocialChemistry dataset as proxies for contrasting cultural axes, align social situations cross-culturally, and extract social norms from texts using in-context learning. Embedding Chain-of-Thought prompting in a human-AI collaborative framework, we build a high-quality dataset of 3,069 social norms aligned with social situations across Chinese and American cultures alongside corresponding free-text explanations. To test the ability of models to reason about social norms across cultures, we introduce the task of explainable social norm entailment, showing that existing models under 3B parameters have significant room for improvement in both automatic and human evaluation. Further analysis of cross-cultural norm differences based on our dataset shows empirical alignment with the social orientations framework, revealing several situational and descriptive nuances in norms across these cultures.", "keywords": "social norms;culture;chain-of-thought reasoning", "primary_area": "", "supplementary_material": "", "author": "Sky CH-Wang;Arkadiy Saakyan;Oliver Li;Zhou Yu;Smaranda Muresan", "authorids": "~Sky_CH-Wang1;~Arkadiy_Saakyan1;~Oliver_Li1;~Zhou_Yu1;~Smaranda_Muresan3", "gender": "M;M;M;F;", "homepage": "https://skywang.me;https://asaakyan.github.io/;https://github.com/Aochong-Li/aochong-li.github.io;http://www.cs.columbia.edu/~zhouyu/;http://www.cs.columbia.edu/~smara/", "dblp": "301/9138;294/5397;308/3312;83/3205;44/70", "google_scholar": "6lHNfVoAAAAJ;oPegqXQAAAAJ;rZ186jcAAAAJ;https://scholar.google.com.tw/citations?user=jee2Dy0AAAAJ;Esbx2VcAAAAJ", "or_profile": "~Sky_CH-Wang1;~Arkadiy_Saakyan1;~Oliver_Li1;~Zhou_Yu1;~Smaranda_Muresan3", "aff": "Columbia University;Amazon;Columbia University;Columbia University;Columbia University", "aff_domain": "columbia.edu;amazon.com;columbia.edu;columbia.edu;columbia.edu", "position": "PhD student;Intern;Undergrad student;Assistant Professor;Principal Researcher", "bibtex": "@inproceedings{\nch-wang2023sociocultural,\ntitle={Sociocultural Norm Similarities and Differences via Situational Alignment and Explainable Textual Entailment},\nauthor={Sky CH-Wang and Arkadiy Saakyan and Oliver Li and Zhou Yu and Smaranda Muresan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=mN62FSvZVW}\n}", "github": "", "project": "", "reviewers": "7iE3;nK7w;3gbm", "site": "https://openreview.net/forum?id=mN62FSvZVW", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;3;4", "excitement": "3;3;3", "reproducibility": "4;4;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "skychwang/;;;;", "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Columbia University;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.columbia.edu;https://www.amazon.com", "aff_unique_abbr": "Columbia;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "mNMwIwydgr", "title": "Are we biased on bias? Characterizing social bias research in the ACL community", "track": "main", "status": "Reject", "tldr": "", "abstract": "Recent events in business, politics and society have shed light on the importance and potential dangers of Natural Language processing (NLP) in the real world. NLP applications have gained unprecedented popularity not just among scientist and practitioners, but also the general public. As we develop new methodologies and curate new benchmarks and datasets it is more important than ever to consider the implications and societal impact of our work. In this paper, we characterize the landscape of societal bias research within the ACL community and provide a quantitative and qualitative survey by analyzing an categorized corpus of \\textit{348} papers. More specifically, we present a definition of social bias based on ethical principals and investigate (i) types of bias, (ii) languages, and (iii) type of paper. We find that there is significantly more work on gender biases and English than other languages. Finally, we discuss the possible causes behind our findings and provide pointers to future opportunities.", "keywords": "Social Bias;Survey;Ethics", "primary_area": "", "supplementary_material": "", "author": "Annika Marie Schoene;Ricardo A. Baeza-Yates;Kenneth Church;Laura Haaber Ihle;Cansu Canca", "authorids": "~Annika_Marie_Schoene1;~Ricardo_A._Baeza-Yates1;~Kenneth_Church1;~Laura_Haaber_Ihle1;~Cansu_Canca1", "gender": ";;;;F", "homepage": ";;;;https://aiethicslab.com/cansu-canca/", "dblp": ";;;;", "google_scholar": ";;;;", "or_profile": "~Annika_Marie_Schoene1;~Ricardo_A._Baeza-Yates1;~Kenneth_Church1;~Laura_Haaber_Ihle1;~Cansu_Canca1", "aff": ";;;;Northeastern University", "aff_domain": ";;;;northeastern.edu", "position": ";;;;Associate Professor", "bibtex": "@misc{\nschoene2023are,\ntitle={Are we biased on bias? Characterizing social bias research in the {ACL} community },\nauthor={Annika Marie Schoene and Ricardo A. Baeza-Yates and Kenneth Church and Laura Haaber Ihle and Cansu Canca},\nyear={2023},\nurl={https://openreview.net/forum?id=mNMwIwydgr}\n}", "github": "", "project": "", "reviewers": "2HNJ;RQ6n;ysLf", "site": "https://openreview.net/forum?id=mNMwIwydgr", "pdf_size": 0, "rating": "1;1;1", "confidence": "5;3;4", "excitement": "2;3;2", "reproducibility": "4;2;2", "correctness": "3;2;2", "rating_avg": 1.0, "confidence_avg": 4.0, "excitement_avg": 2.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 2.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;;", "aff_unique_index": "0", "aff_unique_norm": "Northeastern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northeastern.edu", "aff_unique_abbr": "NEU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "mPaNp1eglz", "title": "Comparing the Evaluation and Production of Loophole Behavior in Humans and Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In law, lore, and everyday life, loopholes are commonplace. When people exploit a loophole, they understand the intended meaning or goal of another person, but choose to go with a different interpretation. Past and current AI research has shown that artificial intelligence engages in what seems superficially like the exploitation of loopholes, but this is likely anthropomorphization. It remains unclear to what extent current models, especially Large Language Models (LLMs), capture the pragmatic understanding required for engaging in loopholes. We examined the performance of LLMs on two metrics developed for studying loophole behavior in humans: evaluation (ratings of trouble, upset, and humor), and generation (coming up with new loopholes in a given context). We conducted a fine-grained comparison of state-of-the-art LLMs to humans, and find that while many of the models rate loophole behaviors as resulting in less trouble and upset than outright non-compliance (in line with adults), they struggle to recognize the humor in the creative exploitation of loopholes in the way that humans do. Furthermore, only two of the models, GPT 3 and 3.5, are capable of generating loopholes of their own, with GPT3.5 performing closest to the human baseline.", "keywords": "theory of mind;pragmatics;social reasoning;loopholes;large-language models;artificial intelligence", "primary_area": "", "supplementary_material": "", "author": "Sonia Krishna Murthy;Kiera Maria Parece;Sophie Bridgers;Peng Qian;Tomer Ullman", "authorids": "~Sonia_Krishna_Murthy1;~Kiera_Maria_Parece1;~Sophie_Bridgers1;~Peng_Qian1;~Tomer_Ullman1", "gender": "F;F;;;", "homepage": "https://www.soniamurthy.com/;https://eccl.mit.edu/team-profiles/kiera-parece;;;", "dblp": ";;;74/8662;", "google_scholar": "MFLudqMAAAAJ;;1Wlj18oAAAAJ;;", "or_profile": "~Sonia_Krishna_Murthy1;~Kiera_Maria_Parece1;~Sophie_Bridgers1;~Peng_Qian1;~Tomer_Ullman1", "aff": "Harvard University, Harvard University;Harvard University;Google DeepMind;Harvard University;", "aff_domain": "g.harvard.edu;harvard.edu;google.com;harvard.edu;", "position": "PhD student;Researcher;Researcher;Postdoc;", "bibtex": "@inproceedings{\nmurthy2023comparing,\ntitle={Comparing the Evaluation and Production of Loophole Behavior in Humans and Large Language Models},\nauthor={Sonia Krishna Murthy and Kiera Maria Parece and Sophie Bridgers and Peng Qian and Tomer Ullman},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=mPaNp1eglz}\n}", "github": "", "project": "", "reviewers": "HszC;4KS3;4z98", "site": "https://openreview.net/forum?id=mPaNp1eglz", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;4;4", "excitement": "3;4;4", "reproducibility": "3;4;4", "correctness": "2;4;3", "rating_avg": 2.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Harvard University;Google", "aff_unique_dep": ";Google DeepMind", "aff_unique_url": "https://www.harvard.edu;https://deepmind.com", "aff_unique_abbr": "Harvard;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "mQxqo1di63", "title": "KAPALM: Knowledge grAPh enhAnced Language Models for Fake News Detection", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Social media has not only facilitated news consumption, but also led to the wide spread of fake news. Because news articles in social media is usually condensed and full of knowledge entities, existing methods of fake news detection use external entity knowledge. However, majority of these methods focus on news entity information and ignore the structured knowledge among news entities. To address this issue, in this work, we propose a Knowledge grAPh enhAnced Language Model (KAPALM) which is a novel model that fuses coarse- and fine-grained representations of entity knowledge from Knowledge Graphs (KGs). Firstly, we identify entities in news content and link them to entities in KGs. Then, a subgraph of KGs is extracted to provide structured knowledge of entities in KGs and fed into a graph neural network to obtain the coarse-grained knowledge representation. This subgraph is pruned to provide fine-grained knowledge and fed into the attentive graph and graph pooling layer. Finally, we integrate the coarse- and fine-grained entity knowledge representations with the textual representation for fake news detection. The experimental results on two benchmark datasets show that our method is superior to state-of-the-art baselines. In addition, it is competitive in the few-shot scenario.", "keywords": "Knowledge Graph; Fake news detection", "primary_area": "", "supplementary_material": "", "author": "Jing Ma;Chen Chen;Chunyan Hou;Xiaojie Yuan", "authorids": "~Jing_Ma6;~Chen_Chen47;~Chunyan_Hou1;~Xiaojie_Yuan1", "gender": "F;;;", "homepage": "https://github.com/SilenceMJ99;;;https://dbis.nankai.edu.cn/2023/0322/c12139a506919/page.htm", "dblp": ";65/4423-12;66/3541;79/2280", "google_scholar": ";;;", "or_profile": "~Jing_Ma6;~Chen_Chen47;~Chunyan_Hou1;~Xiaojie_Yuan1", "aff": "Nankai University;Nankai University;Tianjin University of Technology;Nankai University", "aff_domain": "nku.nankai.edu.cn;nankai.edu.cn;tjut.edu.cn;nankai.edu.cn", "position": "MS student;Associate Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nma2023kapalm,\ntitle={{KAPALM}: Knowledge gr{AP}h enhAnced~Language~Models for Fake News Detection},\nauthor={Jing Ma and Chen Chen and Chunyan Hou and Xiaojie Yuan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=mQxqo1di63}\n}", "github": "", "project": "", "reviewers": "nxyk;oZm4;4XMg", "site": "https://openreview.net/forum?id=mQxqo1di63", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;4;3", "excitement": "1;2;3", "reproducibility": "3;2;4", "correctness": "2;2;3", "rating_avg": 2.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.0, "reproducibility_avg": 3.0, "correctness_avg": 2.3333333333333335, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-5876-6856", "linkedin": ";;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Nankai University;Tianjin University of Technology", "aff_unique_dep": ";", "aff_unique_url": "http://www.nankai.edu.cn;http://www.tjut.edu.cn", "aff_unique_abbr": "NKU;TUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "mRETTyZEJa", "title": "GROVE: A Retrieval-augmented Complex Story Generation Framework with A Forest of Evidence", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Conditional story generation is significant in human-machine interaction, particularly in producing stories with complex plots. While Large language models (LLMs) perform well on multiple NLP tasks, including story generation, it is challenging to generate stories with both complex and creative plots. Existing methods often rely on detailed prompts to guide LLMs to meet target conditions, which inadvertently restrict the creative potential of the generated stories. We argue that leveraging information from exemplary human-written stories facilitates generating more diverse plotlines. Delving deeper into story details helps build complex and credible plots. In this paper, we propose a retrieval-auGmented stoRy generation framework with a fOrest of eVidEnce (GROVE) to enhance stories' complexity. We build a retrieval repository for target conditions to produce few-shot examples to prompt LLMs. Additionally, we design an \"asking-why\" prompting scheme that extracts a forest of evidence, providing compensation for the ambiguities that may occur in the generated story. This iterative process uncovers underlying story backgrounds. Finally, we select the most fitting chains of evidence from the evidence forest and integrate them into the generated story, thereby enhancing the narrative's complexity and credibility. Experimental results and numerous examples verify the effectiveness of our method.", "keywords": "story generation;large language model;iterative prompting", "primary_area": "", "supplementary_material": "", "author": "Zhihua Wen;Zhiliang Tian;Wei Wu;Yuxin Yang;Yanqi Shi;Zhen Huang;Dongsheng Li", "authorids": "~Zhihua_Wen2;~Zhiliang_Tian2;~Wei_Wu30;~Yuxin_Yang4;~Yanqi_Shi1;~Zhen_Huang3;~Dongsheng_Li3", "gender": ";M;F;F;;M;", "homepage": ";https://scholar.google.com.hk/citations?hl=en&user=ClvGvccAAAAJ#;https://github.com/Rowena-929;https://github.com/cloudacardia;https://github.com/Sconcer;;", "dblp": ";203/9265;;;;22/3870-6;", "google_scholar": ";https://scholar.google.com.hk/citations?hl=en;;;;;", "or_profile": "~Zhihua_Wen2;~Zhiliang_Tian2;~Wei_Wu30;~Yuxin_Yang4;~Yanqi_Shi1;~Zhen_Huang3;~Dongsheng_Li3", "aff": ";National University of Defense Technology;Central China Normal University;National University of Defense Technology;National University of Defense Technology;National University of Defense Technology;", "aff_domain": ";nudt.edu.cn;ccnu.edu.cn;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;", "position": ";Assistant Professor;Undergrad student;Undergrad student;MS student;Full Professor;", "bibtex": "@inproceedings{\nwen2023grove,\ntitle={{GROVE}: A Retrieval-augmented Complex Story Generation Framework with A Forest of Evidence},\nauthor={Zhihua Wen and Zhiliang Tian and Wei Wu and Yuxin Yang and Yanqi Shi and Zhen Huang and Dongsheng Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=mRETTyZEJa}\n}", "github": "", "project": "", "reviewers": "C2GE;ALpk;xerB", "site": "https://openreview.net/forum?id=mRETTyZEJa", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;3;3", "excitement": "3;3;3", "reproducibility": "3;3;4", "correctness": "4;2;4", "rating_avg": 3.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0003-4819-373X;", "linkedin": ";;;;;;", "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "National University of Defense Technology;Central China Normal University", "aff_unique_dep": ";", "aff_unique_url": "http://www.nudt.edu.cn/;http://www.ccnu.edu.cn", "aff_unique_abbr": "NUDT;CCNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "mTiHLHu3sP", "title": "GPT-RE: In-context Learning for Relation Extraction using Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In spite of the potential for ground-breaking achievements offered by large language models (LLMs) (e.g., GPT-3) via in-context learning (ICL), they still lag significantly behind fully-supervised baselines (e.g., fine-tuned BERT) in relation extraction (RE). This is due to the two major shortcomings of ICL for RE: (1) low relevance regarding entity and relation in existing sentence-level demonstration retrieval approaches for ICL; and (2) the lack of explaining input-label mappings of demonstrations leading to poor ICL effectiveness. \nIn this paper, we propose GPT-RE to successfully address the aforementioned issues by (1) incorporating task-aware representations in demonstration retrieval; and (2) enriching the demonstrations with gold label-induced reasoning logic. We evaluate GPT-RE on four widely-used RE datasets, and observe that GPT-RE achieves improvements over not only existing GPT-3 baselines, but also fully-supervised baselines as in Figure 1. Specifically, GPT-RE achieves SOTA performances on the Semeval and SciERC datasets, and competitive performances on the TACRED and ACE05 datasets. Additionally, a critical issue of LLMs revealed by previous work, the strong inclination to wrongly classify NULL examples into other pre-defined labels, is substantially alleviated by our method. We show an empirical analysis.", "keywords": "Large language models;relation extraction;in-context learning", "primary_area": "", "supplementary_material": "", "author": "Zhen Wan;Fei Cheng;Zhuoyuan Mao;Qianying Liu;Haiyue Song;Jiwei Li;Sadao Kurohashi", "authorids": "~Zhen_Wan1;~Fei_Cheng2;~Zhuoyuan_Mao1;~Qianying_Liu1;~Haiyue_Song1;~Jiwei_Li1;~Sadao_Kurohashi1", "gender": "M;M;M;F;M;M;M", "homepage": ";https://researchmap.jp/chengfei?lang=en;;https://yiyunya.github.io;https://shyyhs.github.io/;https://nlp.stanford.edu/~bdlijiwei/;https://nlp.ist.i.kyoto-u.ac.jp/member/kuro/index.html", "dblp": ";06/5591-2.html;256/9496;227/6808;https://dblp.org/pers/s/Song:Haiyue.html;73/5746-1;42/2149", "google_scholar": "OH_1qwMAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.jp/citations?user=gIzJ2sQAAAAJ;DTm20CoAAAAJ;https://scholar.google.co.jp/citations?user=IP5UyqcAAAAJ;PwU16JEAAAAJ;https://scholar.google.co.jp/citations?user=gpKS5P0AAAAJ", "or_profile": "~Zhen_Wan1;~Fei_Cheng2;~Zhuoyuan_Mao1;~Qianying_Liu1;~Haiyue_Song1;~Jiwei_Li1;~Sadao_Kurohashi1", "aff": "Kyoto University;Kyoto University;Apple;Kyoto University;Kyoto University;Zhejiang University;Kyoto University", "aff_domain": "kyoto-u.ac.jp;kyoto-u.ac.jp;apple.com;kyoto-u.ac.jp;kyoto-u.ac.jp;zju.edu.cn;kyoto-u.ac.jp", "position": "MS student;Assistant Professor;Intern;PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nwan2023gptre,\ntitle={{GPT}-{RE}: In-context Learning for Relation Extraction using Large Language Models},\nauthor={Zhen Wan and Fei Cheng and Zhuoyuan Mao and Qianying Liu and Haiyue Song and Jiwei Li and Sadao Kurohashi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=mTiHLHu3sP}\n}", "github": "", "project": "", "reviewers": "wnQk;Ucne;UZWY", "site": "https://openreview.net/forum?id=mTiHLHu3sP", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;3", "excitement": "4;4;4", "reproducibility": "4;4;5", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0003-1159-0918;;0000-0001-5398-8399", "linkedin": "zhen-wan-8531251ab/;;zhuoyuan-mao-44124720b/;;haiyue-song-844a74186/;;", "aff_unique_index": "0;0;1;0;0;2;0", "aff_unique_norm": "Kyoto University;Apple;Zhejiang University", "aff_unique_dep": ";Apple Inc.;", "aff_unique_url": "https://www.kyoto-u.ac.jp;https://www.apple.com;https://www.zju.edu.cn", "aff_unique_abbr": "Kyoto U;Apple;ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;2;0", "aff_country_unique": "Japan;United States;China" }, { "id": "mU6C04mAJk", "title": "ToViLaG: Your Visual-Language Generative Model is Also An Evildoer", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recent large-scale Visual-Language Generative Models (VLGMs) have achieved unprecedented improvement in multimodal image/text generation. However, these models might also generate toxic content, e.g., offensive text and pornography images, raising significant ethical risks. Despite exhaustive studies on toxic degeneration of language models, this problem remains largely unexplored within the context of visual-language generation. This work delves into the propensity for toxicity generation and susceptibility to toxic data across various VLGMs. For this purpose, we built ToViLaG, a dataset comprising 32K co-toxic/mono-toxic text-image pairs and 1K innocuous but evocative text that tends to stimulate toxicity. Furthermore, we propose WInToRe, a novel toxicity metric tailored to visual-language generation, which theoretically reflects different aspects of toxicity considering both input and output. On such a basis, we benchmarked the toxicity of a diverse spectrum of VLGMs and discovered that some models do more evil than expected while some are more vulnerable to infection, underscoring the necessity of VLGMs detoxification. Therefore, we develop an innovative bottleneck-based detoxification method. Our method could reduce toxicity while maintaining comparable generation quality, providing a promising initial solution to this line of research.", "keywords": "Toxicity;text-to-image generation;image-to-text generation;Detoxification;Multimodality;generation", "primary_area": "", "supplementary_material": "", "author": "Xinpeng Wang;Xiaoyuan Yi;Han Jiang;Shanlin Zhou;Zhihua Wei;Xing Xie", "authorids": "~Xinpeng_Wang2;~Xiaoyuan_Yi1;~Han_Jiang2;~Shanlin_Zhou2;~Zhihua_Wei1;~Xing_Xie3", "gender": "M;M;;M;F;M", "homepage": "https://victorup.github.io/;;https://github.com/Salomeeeee;;;http://research.microsoft.com/en-us/people/xingx/", "dblp": "156/1668-1.html;179/2248;;;55/3674-1;08/6809-1", "google_scholar": "https://scholar.google.com.hk/citations?user=2euMY5oAAAAJ;BdpXcLgAAAAJ;;;;5EQfAFIAAAAJ", "or_profile": "~Xinpeng_Wang2;~Xiaoyuan_Yi1;~Han_Jiang2;~Shanlin_Zhou2;~Zhihua_Wei1;~Xing_Xie3", "aff": "Tongji University;Microsoft Research;Tongji University;Tongji University;Tongji University;Microsoft Research Asia", "aff_domain": "tongji.edu.cn;research.microsoft.com;tongji.edu.cn;tongji.edu.cn;tongji.edu.cn;microsoft.com", "position": "PhD student;Researcher;MS student;PhD student;Full Professor;Senior Principal Researcher", "bibtex": "@inproceedings{\nwang2023tovilag,\ntitle={ToViLaG: Your Visual-Language Generative Model is Also An Evildoer},\nauthor={Xinpeng Wang and Xiaoyuan Yi and Han Jiang and Shanlin Zhou and Zhihua Wei and Xing Xie},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=mU6C04mAJk}\n}", "github": "", "project": "", "reviewers": "D6Ca;jEZr;9Kaw", "site": "https://openreview.net/forum?id=mU6C04mAJk", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;3;3", "reproducibility": "5;3;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-1857-0346;0000-0003-2710-1613;;0000-0003-1016-1281;;0000-0002-8608-8482", "linkedin": ";xiaoyuan-yi-471212a5/;;;;xingx/", "aff_unique_index": "0;1;0;0;0;1", "aff_unique_norm": "Tongji University;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.tongji.edu.cn;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "Tongji;MSR", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "China;United States" }, { "id": "mW5M8qkAxt", "title": "Confidence-based Ensembling of Perspective-aware Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Research in the field of NLP has recently focused on the variability that people show in selecting labels when performing an annotation task. Exploiting disagreements in annotations has been shown to offer advantages for accurate modelling and fair evaluation. In this paper, we propose a strongly perspectivist model for supervised classification of natural language utterances. Our approach combines the predictions of several perspective-aware models using key information of their individual confidence to capture the subjectivity encoded in the annotation of linguistic phenomena.\nWe validate our method through experiments on two case studies, irony and hate speech detection, in in-domain and cross-domain settings. The results show that confidence-based ensembling of perspective-aware models seems beneficial for classification performance in all scenarios. In addition, we demonstrate the effectiveness of our method with automatically extracted perspectives from annotations when the annotators' metadata are not available.", "keywords": "Data perspectivism;irony;hate speech;confidence;ensemble", "primary_area": "", "supplementary_material": "", "author": "Silvia Casola;Soda Marem Lo;Valerio Basile;Simona Frenda;Alessandra Teresa Cignarella;Viviana Patti;Cristina Bosco", "authorids": "~Silvia_Casola1;~Soda_Marem_Lo1;~Valerio_Basile2;~Simona_Frenda1;~Alessandra_Teresa_Cignarella1;~Viviana_Patti1;~Cristina_Bosco1", "gender": "F;F;M;F;F;F;F", "homepage": ";;http://valeriobasile.github.io/;http://www.di.unito.it/~frenda/;https://alessandrateresacignarella.github.io/;https://www.unito.it/persone/vpatti;", "dblp": "210/2666;351/5327;https://dblp.uni-trier.de/pid/86/11425.html;191/4074;206/1114.html;p/VivianaPatti;27/3697", "google_scholar": "h6Nw1QIAAAAJ;UN-oq-cAAAAJ;https://scholar.google.it/citations?user=5VCe4aAAAAAJ;https://scholar.google.it/citations?user=Zfb1b10AAAAJ;https://scholar.google.it/citations?user=SPV3SSUAAAAJ;https://scholar.google.it/citations?user=Th0j7coAAAAJ;", "or_profile": "~Silvia_Casola1;~Soda_Marem_Lo1;~Valerio_Basile2;~Simona_Frenda1;~Alessandra_Teresa_Cignarella1;~Viviana_Patti1;~Cristina_Bosco1", "aff": "Fondazione Bruno Kessler;University of Turin;University of Turin;University of Turin;University of Turin;University of Turin;University of Turin", "aff_domain": "fbk.eu;unito.it;unito.it;unito.it;unito.it;di.unito.it;unito.it", "position": "PhD student;PhD student;Assistant Professor;Postdoc;Postdoc;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\ncasola2023confidencebased,\ntitle={Confidence-based Ensembling of Perspective-aware Models},\nauthor={Silvia Casola and Soda Marem Lo and Valerio Basile and Simona Frenda and Alessandra Teresa Cignarella and Viviana Patti and Cristina Bosco},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=mW5M8qkAxt}\n}", "github": "", "project": "", "reviewers": "ZE2f;cKGw;17tq", "site": "https://openreview.net/forum?id=mW5M8qkAxt", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;4;2", "reproducibility": "4;4;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-5810-0093;0000-0001-8110-6832;0000-0002-6215-3374;0000-0002-4409-6679;0000-0001-5991-370X;0000-0002-8857-4484", "linkedin": ";soda-marem-lo/;valeriobasile/;simona-frenda-95948712b/;;viviana-patti-9757652/;", "aff_unique_index": "0;1;1;1;1;1;1", "aff_unique_norm": "Fondazione Bruno Kessler;University of Turin", "aff_unique_dep": ";", "aff_unique_url": "https://www.fbk.eu;https://www.unito.it", "aff_unique_abbr": "FBK;UNITO", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "Italy" }, { "id": "mYniPxMGLL", "title": "Dancing Between Success and Failure: Edit-level Simplification Evaluation using SALSA", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (e.g., GPT-4) are uniquely capable of producing highly rated text simplification, yet current human evaluation methods fail to provide a clear understanding of systems' specific strengths and weaknesses. To address this limitation, we introduce SALSA, an edit-based human annotation framework that enables holistic and fine-grained text simplification evaluation. We develop twenty one linguistically grounded edit types, covering the full spectrum of success and failure across dimensions of conceptual, syntactic and lexical simplicity. Using SALSA, we collect 19K edit annotations on 840 simplifications, revealing discrepancies in the distribution of simplification strategies performed by fine-tuned models, prompted LLMs and humans, and find GPT-3.5 performs more quality edits than humans, but still exhibits frequent errors. Using our fine-grained annotations, we develop LENS-SALSA, a reference-free automatic simplification metric, trained to predict sentence- and word-level quality simultaneously. Additionally, we introduce word-level quality estimation for simplification and report promising baseline results. Our data, new metric, and annotation toolkit are available at https://salsa-eval.com.", "keywords": "model evaluation;text simplification;fine-grained annotation;language model analysis;human evaluation", "primary_area": "", "supplementary_material": "", "author": "David Heineman;Yao Dou;Mounica Maddela;Wei Xu", "authorids": "~David_Heineman1;~Yao_Dou1;~Mounica_Maddela1;~Wei_Xu5", "gender": "M;M;F;F", "homepage": "https://davidheineman.com;https://yao-dou.github.io/;https://mounicam.github.io/;https://cocoxu.github.io/", "dblp": "336/4616;262/0556;228/5563;32/1213-4.html", "google_scholar": "JO2Q6CUAAAAJ;6_aoS74AAAAJ;ajCtVL0AAAAJ;BfOdG-oAAAAJ", "or_profile": "~David_Heineman1;~Yao_Dou1;~Mounica_Maddela1;~Wei_Xu5", "aff": "Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology;Georgia Institute of Technology", "aff_domain": "gatech.edu;gatech.edu;gatech.edu;gatech.edu", "position": "Undergrad student;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nheineman2023dancing,\ntitle={Dancing Between Success and Failure: Edit-level Simplification Evaluation using {SALSA}},\nauthor={David Heineman and Yao Dou and Mounica Maddela and Wei Xu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=mYniPxMGLL}\n}", "github": "", "project": "", "reviewers": "nj2X;6XdU;AAK9;v4Lp", "site": "https://openreview.net/forum?id=mYniPxMGLL", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "4;3;4;4", "excitement": "4;4;3;3", "reproducibility": "4;4;4;3", "correctness": "5;3;4;3", "rating_avg": 5.0, "confidence_avg": 3.75, "excitement_avg": 3.5, "reproducibility_avg": 3.75, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;mounica-maddela-2485aa90/;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Georgia Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.gatech.edu", "aff_unique_abbr": "Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "mb35Pb69e8", "title": "TCFLE-8: a Corpus of Learner Written Productions for French as a Foreign Language and its Application to Automated Essay Scoring", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Automated Essay Scoring (AES) aims to automatically assess the quality of essays. Automation enables large-scale assessment, improvements in consistency, reliability, and standardization. Those characteristics are of particular relevance in the context of language certification exams. However, a major bottleneck in the development of AES systems is the availability of corpora, which, unfortunately, are scarce, especially for languages other than English. In this paper, we aim to foster the development of AES for French by providing the TCFLE-8 corpus, a corpus of 6.5k essays collected in the context of the \\textit{Test de Connaissance du Fran\u00e7ais} (TCF - French Knowledge Test) certification exam. We report the strict quality procedure that led to the scoring of each essay by at least two raters according to the CEFR levels and to the creation of a balanced corpus. In addition, we describe how linguistic properties of the essays relate to the learners' proficiency in TCFLE-8. We also advance the state-of-the-art performance for the AES task in French by experimenting with two strong baselines (i.e. RoBERTa and feature-based). Finally, we discuss the challenges of AES using TCFLE-8.", "keywords": "Learner corpus;AES;French;Learner written essays;French certification exam", "primary_area": "", "supplementary_material": "", "author": "Rodrigo Wilkens;Alice Pintard;David Alfter;Vincent Folny;Thomas Fran\u00e7ois", "authorids": "~Rodrigo_Wilkens1;~Alice_Pintard1;~David_Alfter1;~Vincent_Folny2;~Thomas_Fran\u00e7ois1", "gender": "M;;;M;M", "homepage": "https://sites.google.com/view/rodrigowilkens;https://uclouvain.be/fr/repertoires/alice.pintard;;;https://thomasfrancoisucl.wixsite.com/homepage", "dblp": "16/8694;;168/0437;;70/4215", "google_scholar": "-sIkqlEAAAAJ;;LL_Y7TYAAAAJ;;https://scholar.google.be/citations?user=weONVQMAAAAJ", "or_profile": "~Rodrigo_Wilkens1;~Alice_Pintard1;~David_Alfter1;~Vincent_Folny2;~Thomas_Fran\u00e7ois1", "aff": "UCL;Universit\u00e9 Catholique de Louvain;UCL;;UCL", "aff_domain": "uclouvain.be;uclouvain.be;uclouvain.be;;uclouvain.be", "position": "Postdoc;PhD student;Postdoc;;Associate Professor", "bibtex": "@inproceedings{\nwilkens2023tcfle,\ntitle={{TCFLE}-8: a Corpus of Learner Written Productions for French as a Foreign Language and its Application to Automated Essay Scoring},\nauthor={Rodrigo Wilkens and Alice Pintard and David Alfter and Vincent Folny and Thomas Fran{\\c{c}}ois},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=mb35Pb69e8}\n}", "github": "", "project": "", "reviewers": "cs99;5vDV;p5TN", "site": "https://openreview.net/forum?id=mb35Pb69e8", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;4", "excitement": "4;4;4", "reproducibility": "4;4;5", "correctness": "5;5;5", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 5.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-4366-1215;;;;0000-0001-9415-7639", "linkedin": ";;david-alfter-48a92117a/;vincent-folny-50446746/?originalSubdomain=fr;", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University College London;Universit\u00e9 catholique de Louvain", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucl.ac.uk;https://www.uclouvain.be", "aff_unique_abbr": "UCL;UCLouvain", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United Kingdom;Belgium" }, { "id": "mkEkfHveEL", "title": "Interview Evaluation: A Novel Approach for Automatic Evaluation of Conversational Question Answering Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Conversational Question Answering (CQA) aims to provide natural language answers to users in information-seeking dialogues. Existing CQA benchmarks often evaluate models using pre-collected human-human conversations. However, replacing the model-predicted dialogue history with ground truth compromises the naturalness and sustainability of CQA evaluation. While previous studies proposed using predicted history and rewriting techniques to address unresolved coreferences and incoherencies, this approach renders the question self-contained from the conversation. In this paper, we propose a novel automatic evaluation approach, interview evaluation. Specifically, ChatGPT acts as the interviewer (Q agent) with a set of carefully designed prompts, and the CQA model under test serves as the interviewee (A agent). During the interview evaluation, questions are dynamically generated by the Q agent to guide the A agent in predicting the correct answer through an interactive process. We evaluated four different models on QuAC and two models on CoQA in our experiments. The experiment results demonstrate that our interview evaluation has advantages over previous CQA evaluation approaches, particularly in terms of naturalness and coherence. The source code is made publicly available.", "keywords": "Conversational Question Answering;Evaluation metrics;Conversational history;Conversational question generation;Prompting", "primary_area": "", "supplementary_material": "", "author": "Xibo Li;Bowei Zou;Yifan Fan;Yanling Li;AiTi Aw;Yu Hong", "authorids": "~Xibo_Li2;~Bowei_Zou1;~Yifan_Fan1;~Yanling_Li2;~AiTi_Aw1;~Yu_Hong1", "gender": ";M;F;F;;M", "homepage": ";;;https://github.com/YanLingLi-AI;;", "dblp": ";136/9191;https://dblp.org/;;;66/5306", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;;;;", "or_profile": "~Xibo_Li2;~Bowei_Zou1;~Yifan_Fan1;~Yanling_Li2;~AiTi_Aw1;~Yu_Hong1", "aff": ";A*STAR;Suzhou University;Suzhou University;;Suzhou University", "aff_domain": ";a-star.edu.sg;suda.edu.cn;suda.edu.cn;;suda.edu.cn", "position": ";Researcher;PhD student;MS student;;Full Professor", "bibtex": "@inproceedings{\nli2023interview,\ntitle={Interview Evaluation: A Novel Approach for Automatic Evaluation of Conversational Question Answering Models},\nauthor={Xibo Li and Bowei Zou and Yifan Fan and Yanling Li and AiTi Aw and Yu Hong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=mkEkfHveEL}\n}", "github": "", "project": "", "reviewers": "deJG;uuh1;5zAa", "site": "https://openreview.net/forum?id=mkEkfHveEL", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;2;3", "excitement": "4;3;3", "reproducibility": "3;2;3", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";;;;;", "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Agency for Science, Technology and Research;Suzhou University", "aff_unique_dep": ";", "aff_unique_url": "https://www.a-star.edu.sg;https://www.suda.edu.cn", "aff_unique_abbr": "A*STAR;Suda", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "Singapore;China" }, { "id": "mmlQICRJMc", "title": "AdaSent: Efficient Domain-Adapted Sentence Embeddings for Few-Shot Classification", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recent work has found that few-shot sentence classification based on pre-trained Sentence Encoders (SEs) is efficient, robust, and effective. In this work, we investigate strategies for domain-specialization in the context of few-shot sentence classification with SEs. \nWe first establish that unsupervised Domain-Adaptive Pre-Training (DAPT) of a base Pre-trained Language Model (PLM) (i.e., not an SE) substantially improves the accuracy of few-shot sentence classification by up to 8.4 points. However, applying DAPT on SEs, on the one hand, disrupts the effects of their (general-domain) Sentence Embedding Pre-Training (SEPT). On the other hand, applying general-domain SEPT on top of a domain-adapted base PLM (i.e., after DAPT) is effective but inefficient, since the computationally expensive SEPT needs to be executed on top of a DAPT-ed PLM of each domain. As a solution, we propose AdaSent, which decouples SEPT from DAPT by training a SEPT adapter on the base PLM. The adapter can be inserted into DAPT-ed PLMs from any domain. We demonstrate AdaSent's effectiveness in extensive experiments on 17 different few-shot sentence classification datasets. AdaSent matches or surpasses the performance of full SEPT on DAPT-ed PLM, while substantially reducing the training costs. The code for AdaSent is available.", "keywords": "few-shot text classification;sentence embedding;domain adaptation;parameter-efficient fine-tuning", "primary_area": "", "supplementary_material": "", "author": "Yongxin Huang;Kexin Wang;Sourav Dutta;Raj Nath Patel;Goran Glava\u0161;Iryna Gurevych", "authorids": "~Yongxin_Huang1;~Kexin_Wang1;~Sourav_Dutta1;~Raj_Nath_Patel1;~Goran_Glava\u01611;~Iryna_Gurevych1", "gender": ";M;M;M;M;", "homepage": "https://www.informatik.tu-darmstadt.de/ukp/ukp_home/staff_ukp/ukp_home_content_staff_1_details_125056.en.jsp;https://kwang2049.github.io/;;https://patelrajnath.github.io/;https://sites.google.com/view/goranglavas;", "dblp": ";44/8799;62/8171;185/5563;50/11059;", "google_scholar": ";3gqLwNUAAAAJ;9y1l5IoAAAAJ;https://scholar.google.com/citations?hl=en;Ym0myOwAAAAJ;", "or_profile": "~Yongxin_Huang1;~Kexin_Wang1;~Sourav_Dutta1;~Raj_Nath_Patel1;~Goran_Glava\u01611;~Iryna_Gurevych1", "aff": "Technische Universit\u00e4t Darmstadt;Ubiquitous Knowledge Processing Lab, Technical University of Darmstadt;Huawei Research Center;Huawei Technologies Ltd.;Julius-Maximilians-Universit\u00e4t W\u00fcrzburg;", "aff_domain": "tu-darmstadt.de;ukp.informatik.tu-darmstadt.de;huawei.com;huawei.com;uni-wuerzburg.de;", "position": "PhD student;PhD student;Principal Scientist;NLP Researcher Scientist;Full Professor;", "bibtex": "@inproceedings{\nhuang2023adasent,\ntitle={AdaSent: Efficient Domain-Adapted Sentence Embeddings for Few-Shot Classification},\nauthor={Yongxin Huang and Kexin Wang and Sourav Dutta and Raj Nath Patel and Goran Glava{\\v{s}} and Iryna Gurevych},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=mmlQICRJMc}\n}", "github": "", "project": "", "reviewers": "jeTA;gtNi;cXZX", "site": "https://openreview.net/forum?id=mmlQICRJMc", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "4;4;3", "reproducibility": "4;5;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-1175-7829;0000-0002-8934-9166;0000-0001-7560-9601;;", "linkedin": ";kexin-wang-191318184/;;raj-nath-patel-2262b024/?originalSubdomain=ie;goran-glava\u0161-8484b420;", "aff_unique_index": "0;1;2;2;3", "aff_unique_norm": "Technische Universit\u00e4t Darmstadt;Technical University of Darmstadt;Huawei;Julius-Maximilians-Universit\u00e4t W\u00fcrzburg", "aff_unique_dep": ";Ubiquitous Knowledge Processing Lab;Research Center;", "aff_unique_url": "https://www.tu-darmstadt.de;https://www.tu-darmstadt.de;https://www.huawei.com/en/;https://www.uni-wuerzburg.de", "aff_unique_abbr": "TUD;TUD;Huawei;JMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "Germany;China" }, { "id": "mnzjuOhkR2", "title": "Struct-XLM: A Structure Discovery Multilingual Language Model for Enhancing Cross-lingual Transfer through Reinforcement Learning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Cross-lingual transfer learning heavily relies on well-aligned cross-lingual representations. The syntactic structure is recognized as beneficial for cross-lingual transfer, but limited researches utilize it for aligning representation in multilingual pre-trained language models (PLMs). Additionally, existing methods require syntactic labels that are difficult to obtain and of poor quality for low-resource languages. To address this gap, we propose Struct-XLM, a novel multilingual language model that leverages reinforcement learning (RL) to autonomously discover universal syntactic structures for improving the cross-lingual representation alignment of PLM. Struct-XLM integrates a policy network (PNet) and a translation ranking task. The PNet is designed to discover structural information and integrate it into the last layer of the PLM through the structural multi-head attention module to obtain structural representation. The translation ranking task obtains a delayed reward based on the structural representation to optimize the PNet while improving the alignment of cross-lingual representation. Experiments show the effectiveness of the proposed approach for enhancing cross-lingual transfer of multilingual PLM on the XTREME benchmark.", "keywords": "Cross-lingual Representaion Alignment;Cross-lingual Transfer Learning;Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Linjuan Wu;Weiming Lu", "authorids": "~Linjuan_Wu1;~Weiming_Lu1", "gender": "F;", "homepage": ";", "dblp": "https://dblp.uni-trier.de/pid/262/2608;", "google_scholar": "https://scholar.google.com.hk/citations?user=lZbrKQEAAAAJ;", "or_profile": "~Linjuan_Wu1;~Weiming_Lu1", "aff": "Zhejiang University;", "aff_domain": "zju.edu.cn;", "position": "PhD student;", "bibtex": "@inproceedings{\nwu2023structxlm,\ntitle={Struct-{XLM}: A Structure Discovery Multilingual Language Model for Enhancing Cross-lingual Transfer through Reinforcement Learning},\nauthor={Linjuan Wu and Weiming Lu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=mnzjuOhkR2}\n}", "github": "", "project": "", "reviewers": "xL1S;Jy7e;eQtC", "site": "https://openreview.net/forum?id=mnzjuOhkR2", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;4", "excitement": "4;4;4", "reproducibility": "4;3;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "mpL9ikuYez", "title": "Proto-lm: A Prototypical Network-Based Framework for Built-in Interpretability in Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large Language Models (LLMs) have significantly advanced the field of Natural Language Processing (NLP), but their lack of interpretability has been a major concern. Current methods for interpreting LLMs are post hoc, applied after inference time, and have limitations such as their focus on low-level features and lack of explainability at higher-level text units. In this work, we introduce proto-lm, a prototypical network-based white-box framework that allows LLMs to learn immediately interpretable embeddings during the fine-tuning stage while maintaining competitive performance. Our method's applicability and interpretability are demonstrated through experiments on a wide range of NLP tasks, and our results indicate a new possibility of creating interpretable models without sacrificing performance. This novel approach to interpretability in LLMs can pave the way for more interpretable models without the need to sacrifice performance. We release our code at https://github.com/yx131/proto-lm}{https://github.com/yx131/proto-lm.", "keywords": "Prototypes;NLP;Interpretability;Faithfulness", "primary_area": "", "supplementary_material": "", "author": "Sean Xie;Soroush Vosoughi;Saeed Hassanpour", "authorids": "~Sean_Xie1;~Soroush_Vosoughi1;~Saeed_Hassanpour1", "gender": ";;", "homepage": ";https://www.cs.dartmouth.edu/~soroush/;", "dblp": "360/0267;01/1709;", "google_scholar": ";45DAXkwAAAAJ;", "or_profile": "~Sean_Xie1;~Soroush_Vosoughi1;~Saeed_Hassanpour1", "aff": "Dartmouth College;Dartmouth College;", "aff_domain": "dartmouth.edu;dartmouth.edu;", "position": "PhD student;Assistant Professor;", "bibtex": "@inproceedings{\nxie2023protolm,\ntitle={Proto-lm: A Prototypical Network-Based Framework for Built-in Interpretability in Large Language Models},\nauthor={Sean Xie and Soroush Vosoughi and Saeed Hassanpour},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=mpL9ikuYez}\n}", "github": "", "project": "", "reviewers": "9tN3;pjWZ;9W3P", "site": "https://openreview.net/forum?id=mpL9ikuYez", "pdf_size": 0, "rating": "2;2;2", "confidence": "3;3;3", "excitement": "3;4;3", "reproducibility": "4;5;5", "correctness": "3;4;4", "rating_avg": 2.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.666666666666667, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-2564-8909;", "linkedin": ";;", "aff_unique_index": "0;0", "aff_unique_norm": "Dartmouth College", "aff_unique_dep": "", "aff_unique_url": "https://www.dartmouth.edu", "aff_unique_abbr": "Dartmouth", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "mqnK19Dm80", "title": "Generative Emotion Cause Triplet Extraction in Conversations with Commonsense Knowledge", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Emotion Cause Triplet Extraction in Conversations (ECTEC) aims to simultaneously extract emotion utterances, emotion categories, and cause utterances from conversations.\nHowever, existing studies mainly decompose the ECTEC task into multiple subtasks and solve them in a pipeline manner.\nMoreover, since conversations tend to contain many informal and implicit expressions, it often requires external knowledge and reasoning-based inference to accurately identify emotional and causal clues implicitly mentioned in the context, which are ignored by previous work. To address these limitations, in this paper, we propose a commonSense knowledge-enHanced generAtive fRameworK named SHARK, which formulates the ECTEC task as an index generation problem and generates the emotion-cause-category triplets in an end-to-end manner with a sequence-to-sequence model.\nFurthermore, we propose to incorporate both retrieved and generated commonsense knowledge into the generative model via a dual-view gate mechanism and a graph attention layer.\nExperimental results show that our SHARK model consistently outperforms several competitive systems on two benchmark datasets. Our source codes are publicly released at https://github.com/NUSTM/SHARK.", "keywords": "Emotion Cause Analysis;Commonsense Knowledge;Emotion Recognition in Conversations", "primary_area": "", "supplementary_material": "", "author": "Fanfan Wang;Jianfei Yu;Rui Xia", "authorids": "~Fanfan_Wang1;~Jianfei_Yu1;~Rui_Xia1", "gender": "F;;M", "homepage": ";;http://www.nustm.cn/member/rxia/", "dblp": "278/4915;;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;;https://scholar.google.com.hk/citations?user=Znde6gwAAAAJ", "or_profile": "~Fanfan_Wang1;~Jianfei_Yu1;~Rui_Xia1", "aff": "Nanjing University of Science and Technology;;Nanjing University of Science and Technology", "aff_domain": "njust.edu.cn;;njust.edu.cn", "position": "PhD student;;Full Professor", "bibtex": "@inproceedings{\nwang2023generative,\ntitle={Generative Emotion Cause Triplet Extraction in Conversations with Commonsense Knowledge},\nauthor={Fanfan Wang and Jianfei Yu and Rui Xia},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=mqnK19Dm80}\n}", "github": "", "project": "", "reviewers": "yf8h;xHNc;63gB", "site": "https://openreview.net/forum?id=mqnK19Dm80", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;5;4", "excitement": "3;3;3", "reproducibility": "4;3;3", "correctness": "3;4;4", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-8955-9411;;", "linkedin": ";;", "aff_unique_index": "0;0", "aff_unique_norm": "Nanjing University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.nust.edu.cn/", "aff_unique_abbr": "NUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "mrARDvuKi2", "title": "2INER: Instructive and In-Context Learning on Few-Shot Named Entity Recognition", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Prompt-based learning has emerged as a powerful technique in natural language processing (NLP) due to its ability to leverage pre-training knowledge for downstream few-shot tasks. In this paper, we propose 2INER, a novel text-to-text framework for Few-Shot Named Entity Recognition (NER) tasks. Our approach employs instruction finetuning based on InstructionNER to enable the model to effectively comprehend and process task-specific instructions, including both main and auxiliary tasks. We also introduce a new auxiliary task, called Type Extracting, to enhance the model's understanding of entity types in the overall semantic context of a sentence. To facilitate in-context learning, we concatenate examples to the input, enabling the model to learn from additional contextual information. Experimental results on four datasets demonstrate that our approach outperforms existing Few-Shot NER methods and remains competitive with state-of-the-art standard NER algorithms.", "keywords": "prompt-based learning;instruction finetuning;in-context learning;NER", "primary_area": "", "supplementary_material": "", "author": "Jiasheng Zhang;Xikai Liu;Xinyi Lai;Yan Gao;Shusen Wang;Yao Hu;YIQING LIN", "authorids": "~Jiasheng_Zhang3;~Xikai_Liu1;~Xinyi_Lai1;~Yan_Gao10;~Shusen_Wang1;~Yao_Hu4;~YIQING_LIN1", "gender": ";M;F;M;M;M;M", "homepage": "https://github.com/ZhangMaoTai;https://github.com/Xikai-Liu;https://laixinyi.github.io/;;http://wangshusen.github.io;https://www.math.sjtu.edu.cn/Default/teachershow/tags/MDAwMDAwMDAwMLKIdpc;", "dblp": ";;;;77/9625;;", "google_scholar": ";;;https://scholar.google.com.hk/citations?hl=zh-CN;HAf4pEoAAAAJ;;LIu7k7wAAAAJ", "or_profile": "~Jiasheng_Zhang3;~Xikai_Liu1;~Xinyi_Lai1;~Yan_Gao10;~Shusen_Wang1;~YIQING_LIN1;~Yao_Hu1", "aff": "Shanghai Jiaotong University;;Chongqing University;Xiaohongshu;Xiaohongshu;Shanghai Jiaotong University;Zhejiang University of Technology", "aff_domain": "sjtu.edu.cn;;cqu.edu.cn;xiaohongshu.com;xiaohongshu.com;sjtu.edu.cn;zjut.edu.cn", "position": "MS student;;MS student;Researcher;Researcher;Associate Professor;Researcher", "bibtex": "@inproceedings{\nzhang2023iner,\ntitle={2{INER}: Instructive and In-Context Learning on Few-Shot Named Entity Recognition},\nauthor={Jiasheng Zhang and Xikai Liu and Xinyi Lai and Yan Gao and Shusen Wang and Yao Hu and YIQING LIN},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=mrARDvuKi2}\n}", "github": "", "project": "", "reviewers": "aEJr;WRbf;Cuzv", "site": "https://openreview.net/forum?id=mrARDvuKi2", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;3", "excitement": "3;3;3", "reproducibility": "4;5;4", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0009-0004-5960-1684;;;0009-0006-1274-7111", "linkedin": ";;;;;;", "aff_unique_index": "0;1;2;2;0;3", "aff_unique_norm": "Shanghai Jiao Tong University;Chongqing University;Xiaohongshu;Zhejiang University of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.cqu.edu.cn;https://www.xiaohongshu.com;https://www.zjut.edu.cn", "aff_unique_abbr": "SJTU;CQU;XHS;ZJUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "mrD5HN7ZNR", "title": "APP: Adaptive Prototypical Pseudo-Labeling for Few-shot OOD Detection", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Detecting out-of-domain (OOD) intents from user queries is essential for a task-oriented dialogue system. Previous OOD detection studies generally work on the assumption that plenty of labeled IND intents exist. In this paper, we focus on a more practical few-shot OOD setting where there are only a few labeled IND data and massive unlabeled mixed data that may belong to IND or OOD. The new scenario carries two key challenges: learning discriminative representations using limited IND data and leveraging unlabeled mixed data. Therefore, we propose an adaptive prototypical pseudo-labeling(APP) method for few-shot OOD detection, including a prototypical OOD detection framework (ProtoOOD) to facilitate low-resourceOOD detection using limited IND data, and an adaptive pseudo-labeling method to produce high-quality pseudo OOD and IND labels. Extensive experiments and analysis demonstrate the effectiveness of our method for few-shot OOD detection.", "keywords": "OOD;Intent Detection;Few-shot;Prototype", "primary_area": "", "supplementary_material": "", "author": "Pei Wang;Keqing He;Yutao Mou;Xiaoshuai Song;Yanan Wu;Jingang Wang;Yunsen Xian;Xunliang Cai;Weiran Xu", "authorids": "~Pei_Wang12;~Keqing_He1;~Yutao_Mou1;~Xiaoshuai_Song1;~Yanan_Wu2;~Jingang_Wang1;~Yunsen_Xian1;~Xunliang_Cai1;~Weiran_Xu1", "gender": ";;;M;;M;;M;M", "homepage": ";https://helicqin.github.io/about/index.html;;;;https://sites.google.com/site/bitwjg/;http://faculty.dlut.edu.cn/GuoHe/en/xsxx/791119/content/132173.htm;https://maimai.cn/contact/share/card?u=fudmdwckxlwi;", "dblp": ";79/2314;;45/9576;135/9598;59/7807;;;41/5448", "google_scholar": ";811USNoAAAAJ;;https://scholar.google.com/citations?view_op=list_works;aucxPZEAAAAJ;janU39IAAAAJ;;;https://scholar.google.com/citations?view_op=list_works", "or_profile": "~Pei_Wang12;~Keqing_He1;~Yutao_Mou1;~Xiaoshuai_Song1;~Yanan_Wu2;~Jingang_Wang1;~Yunsen_Xian1;~Xunliang_Cai1;~Weiran_Xu1", "aff": "Beijing University of Posts and Telecommunications;Meituan Group;;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Meituan;;Meituan;Beijing University of Post and Telecommunication", "aff_domain": "bupt.edu.cn;meituan.com;;bupt.edu.cn;bupt.edu.cn;meituan.com;;meituan.com;bupt.edu.cn", "position": "MS student;Researcher;;MS student;MS student;Researcher;;Principal Researcher;Associate Professor", "bibtex": "@inproceedings{\nwang2023app,\ntitle={{APP}: Adaptive Prototypical Pseudo-Labeling for Few-shot {OOD} Detection},\nauthor={Pei Wang and Keqing He and Yutao Mou and Xiaoshuai Song and Yanan Wu and Jingang Wang and Yunsen Xian and Xunliang Cai and Weiran Xu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=mrD5HN7ZNR}\n}", "github": "", "project": "", "reviewers": "2RaW;tHSS;LnTL", "site": "https://openreview.net/forum?id=mrD5HN7ZNR", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;3", "excitement": "2;3;3", "reproducibility": "2;3;3", "correctness": "2;3;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 2.6666666666666665, "replies_avg": 12, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;;0000-0002-9416-7666", "linkedin": "https://www.linkedin.cn/in/%E9%9C%88-%E7%8E%8B-18a94a174;;;;;;;;", "aff_unique_index": "0;1;0;0;2;2;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications;Meituan Group;Meituan", "aff_unique_dep": ";;", "aff_unique_url": "http://www.bupt.edu.cn/;https://www.meituan.com;https://www.meituan.com", "aff_unique_abbr": "BUPT;Meituan;Meituan", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "muTWDq9bVs", "title": "Speculative Decoding: Exploiting Speculative Execution for Accelerating Seq2seq Generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We propose Speculative Decoding (SpecDec), for the first time ever, to formally study exploiting the idea of speculative execution to accelerate autoregressive (AR) decoding. Speculative Decoding has two innovations: Spec-Drafter -- an independent model specially optimized for efficient and accurate drafting -- and Spec-Verification -- a reliable method for verifying the drafted tokens efficiently in the decoding paradigm. Experimental results on various seq2seq tasks including machine translation and abstractive summarization show our approach can achieve around 5x speedup for the popular Transformer architectures with comparable generation quality to beam search decoding, refreshing the impression that the draft-then-verify paradigm introduces only 1.4x~2x speedup. In addition to the remarkable speedup, we also demonstrate 3 additional advantages of SpecDec, revealing its practical value for accelerating generative models in real-world applications. Our models and codes are available at https://github.com/hemingkx/SpecDec.", "keywords": "speculative decoding;efficient seq2seq generation", "primary_area": "", "supplementary_material": "", "author": "Heming Xia;Tao Ge;Peiyi Wang;Si-Qing Chen;Furu Wei;Zhifang Sui", "authorids": "~Heming_Xia1;~Tao_Ge1;~Peiyi_Wang1;~Si-Qing_Chen1;~Furu_Wei1;~Zhifang_Sui1", "gender": "M;M;M;F;M;F", "homepage": "https://hemingkx.github.io/;https://getao.github.io/;;;https://www.microsoft.com/en-us/research/people/fuwei/;http://eecs.pku.edu.cn/EN/People/Faculty/Detail/?ID=6024", "dblp": "278/2940;136/7923;236/6569.html;;72/5870;", "google_scholar": "6r2ESKkAAAAJ;LYbs7Q8AAAAJ;K0uQ3ygAAAAJ;;G-V1VpwAAAAJ;", "or_profile": "~Heming_Xia1;~Tao_Ge1;~Peiyi_Wang1;~Si-Qing_Chen1;~Furu_Wei1;~Zhifang_Sui1", "aff": "Peking University;Microsoft Research;Peking University;Microsoft;Microsoft Research;Peking University", "aff_domain": "pku.edu.cn;microsoft.com;pku.edu.cn;microsoft.com;microsoft.com;pku.edu.cn", "position": "MS student;Principal Researcher;PhD student;Partner Applied Science Manager;Distinguished Scientist;Full Professor", "bibtex": "@inproceedings{\nxia2023speculative,\ntitle={Speculative Decoding: Exploiting Speculative Execution for Accelerating Seq2seq Generation},\nauthor={Heming Xia and Tao Ge and Peiyi Wang and Si-Qing Chen and Furu Wei and Zhifang Sui},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=muTWDq9bVs}\n}", "github": "", "project": "", "reviewers": "PLjh;c6y8;u3hL", "site": "https://openreview.net/forum?id=muTWDq9bVs", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;2;4", "excitement": "3;3;3", "reproducibility": "3;4;3", "correctness": "2;3;4", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-5074-3441;;;0000-0002-6945-4540;;", "linkedin": ";;;si-qing-chen-seattle/;;", "aff_unique_index": "0;1;0;1;1;0", "aff_unique_norm": "Peking University;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "http://www.pku.edu.cn;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "Peking U;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;1;0", "aff_country_unique": "China;United States" }, { "id": "mvtjk1mlrq", "title": "Knowledge Rumination for Pre-trained Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Previous studies have revealed that vanilla pre-trained language models (PLMs) lack the capacity to handle knowledge-intensive NLP tasks alone; thus, several works have attempted to integrate external knowledge into PLMs. However, despite the promising outcome, we empirically observe that PLMs may have already encoded rich knowledge in their pre-trained parameters but fails to fully utilize them when applying to knowledge-intensive tasks. In this paper, we propose a new paradigm dubbed \\textbf{Knowledge Rumination} to help the pre-trained language model utilize that related latent knowledge without retrieving them from the external corpus. By simply adding a prompt like \\emph{``As far as I know''} to the PLMs, we try to review related latent knowledge and inject them back into the model for knowledge consolidation. We apply the proposed knowledge rumination to various language models, including RoBERTa, DeBERTa, and GPT-3. Experimental results on six commonsense reasoning tasks and GLUE benchmarks demonstrate the effectiveness of our proposed approach, which proves that the knowledge stored in PLMs can be better exploited to enhance performance\\footnote{Code is in the supplementary and will be released.}.", "keywords": "Knowledge rumination;pretrained language model", "primary_area": "", "supplementary_material": "", "author": "Yunzhi Yao;Peng Wang;Shengyu Mao;Chuanqi Tan;Fei Huang;Huajun Chen;Ningyu Zhang", "authorids": "~Yunzhi_Yao1;~Peng_Wang28;~Shengyu_Mao1;~Chuanqi_Tan3;~Fei_Huang1;~Huajun_Chen1;~Ningyu_Zhang1", "gender": "M;M;M;M;M;M;M", "homepage": "http://yyzcowtodd.cn;;;;;https://person.zju.edu.cn/en/ningyu;https://sites.google.com/view/fei-huang", "dblp": "295/9476;;;148/4497;94/5089;139/4181-1.html;h/FeiHuang.html", "google_scholar": "https://scholar.google.com.hk/citations?user=nAagIwEAAAAJ;vLN6gsMAAAAJ;n_424pEAAAAJ;tOfo4ncAAAAJ;;xQDOPvsAAAAJ;9r98PpoAAAAJ", "or_profile": "~Yunzhi_Yao1;~Peng_Wang28;~Shengyu_Mao1;~Chuanqi_Tan3;~Huajun_Chen1;~Ningyu_Zhang1;~Fei_Huang2", "aff": "Zhejiang University;Zhejiang University;Zhejiang University;Alibaba Group;Zhejiang University;Zhejiang University;Alibaba Group US", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn;alibaba-inc.com;zju.edu.cn;zju.edu.cn;alibaba-inc.com", "position": "PhD student;MS student;MS student;Full-time employee;Full Professor;Associate Professor;Senior Research Director", "bibtex": "@inproceedings{\nyao2023knowledge,\ntitle={Knowledge Rumination for Pre-trained Language Models},\nauthor={Yunzhi Yao and Peng Wang and Shengyu Mao and Chuanqi Tan and Fei Huang and Huajun Chen and Ningyu Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=mvtjk1mlrq}\n}", "github": "", "project": "", "reviewers": "urGt;4uX8;6u65;3JN6", "site": "https://openreview.net/forum?id=mvtjk1mlrq", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "1;4;3;3", "excitement": "4;4;4;4", "reproducibility": "4;4;4;4", "correctness": "4;5;4;4", "rating_avg": 4.0, "confidence_avg": 2.75, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.25, "replies_avg": 14, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-6676-3057;;0000-0002-1970-0678;", "linkedin": ";;;;;ningyuzhang/;fei-huang-cas-cmu", "aff_unique_index": "0;0;0;1;0;0;1", "aff_unique_norm": "Zhejiang University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.zju.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "ZJU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;1", "aff_country_unique": "China;United States" }, { "id": "mx0ltXW10S", "title": "TopWORDS-Poetry: Simultaneous Text Segmentation and Word Discovery for Classical Chinese Poetry via Bayesian Inference", "track": "main", "status": "Long Main", "tldr": "", "abstract": "As a precious cultural heritage of human beings, classical Chinese poetry has a very unique writing style and often contains special words that rarely appear in general Chinese texts, posting critical challenges for natural language processing.\nLittle effort has been made in the literature for processing texts from classical Chinese poetry.\nThis study fills in this gap with TopWORDS-Poetry, an unsupervised method that can achieve reliable text segmentation and word discovery for classical Chinese poetry simultaneously without pre-given vocabulary or training corpus.\nExperimental studies confirm that TopWORDS-Poetry can successfully recognize unique poetry words, such as named entities and literary allusions, from metrical poems of\u300a\u5168\u5510\u8bd7\u300b(*Complete Tang Poetry*) and segment these poetry lines into sequences of meaningful words with high quality.", "keywords": "Word Discovery;Text Segmentation;Classical Chinese Poetry;Bayesian Inference;Unsupervised Method", "primary_area": "", "supplementary_material": "", "author": "Changzai Pan;Feiyue Li;Ke Deng", "authorids": "~Changzai_Pan1;~Feiyue_Li1;~Ke_Deng3", "gender": "M;M;M", "homepage": ";https://www.zhongwen.tsinghua.edu.cn/info/1172/1394.htm;http://www.stat.tsinghua.edu.cn/kdeng/", "dblp": "320/5196;;", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;;", "or_profile": "~Changzai_Pan1;~Feiyue_Li1;~Ke_Deng3", "aff": "Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\npan2023topwordspoetry,\ntitle={Top{WORDS}-Poetry: Simultaneous Text Segmentation and Word Discovery for Classical Chinese Poetry via Bayesian Inference},\nauthor={Changzai Pan and Feiyue Li and Ke Deng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=mx0ltXW10S}\n}", "github": "", "project": "", "reviewers": "BAFH;qve5;m3qs", "site": "https://openreview.net/forum?id=mx0ltXW10S", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;4", "excitement": "3;3;4", "reproducibility": "3;3;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-4383-8319", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "n1Sx9ZjJRs", "title": "TOD-Flow: Modeling the Structure of Task-Oriented Dialogues", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Task-Oriented Dialogue (TOD) systems have become crucial components in interactive artificial intelligence applications. While recent advances have capitalized on pre-trained language models (PLMs), they exhibit limitations regarding transparency and controllability. To address these challenges, we propose a novel approach focusing on inferring the TOD-flow graph from dialogue data annotated with dialog acts, uncovering the underlying task structure in the form of a graph. The inferred TOD-flow graph can be easily integrated with any dialogue model to improve its prediction performance, transparency, and controllability. Our TOD-flow graph learns what a model can, should, and should not predict, effectively reducing the search space and providing a rationale for the model's prediction. We show that the proposed TOD-flow graph better resemble human-annotated graphs compared to prior approaches. Furthermore, when combined with several dialogue policies and end-to-end dialogue models, we demonstrate that our approach significantly improves dialog act classification and end-to-end response generation performance in the MultiWOZ and SGD benchmarks.", "keywords": "Task-oriented Dialogue;Dialog policy learning;interpretability;precondition inference", "primary_area": "", "supplementary_material": "", "author": "Sungryull Sohn;Yiwei Lyu;Anthony Zhe Liu;Lajanugen Logeswaran;Dong-Ki Kim;Dongsub Shim;Honglak Lee", "authorids": "~Sungryull_Sohn1;~Yiwei_Lyu1;~Anthony_Zhe_Liu1;~Lajanugen_Logeswaran1;~Dong-Ki_Kim1;~Dongsub_Shim1;~Honglak_Lee2", "gender": "M;M;;M;;;M", "homepage": ";;https://anthliu.github.io/;https://sites.google.com/umich.edu/llajan/;https://dkkim93.github.io/;;http://web.eecs.umich.edu/~honglak", "dblp": "172/9884;230/7891;264/2652.html;157/3603;199/2089;274/1579;58/2562", "google_scholar": "https://scholar.google.com/citations?hl=en;fV5fYpsAAAAJ;TjEqCOAAAAAJ;dcv4kpIAAAAJ;https://scholar.google.com/citations?hl=en;NxE-ZasAAAAJ;fmSHtE8AAAAJ", "or_profile": "~Sungryull_Sohn1;~Yiwei_Lyu1;~Anthony_Zhe_Liu1;~Lajanugen_Logeswaran1;~Dong-Ki_Kim1;~Dongsub_Shim1;~Honglak_Lee1", "aff": "LG AI Research;University of Michigan - Ann Arbor;University of Michigan;LG AI Research;Massachusetts Institute of Technology;LG AI Research;University of Michigan", "aff_domain": "lgresearch.ai;umich.edu;umich.edu;lgresearch.ai;mit.edu;lgresearch.ai;umich.edu", "position": "Researcher;PhD student;PhD student;Researcher;PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nsohn2023todflow,\ntitle={{TOD}-Flow: Modeling the Structure of Task-Oriented Dialogues},\nauthor={Sungryull Sohn and Yiwei Lyu and Anthony Zhe Liu and Lajanugen Logeswaran and Dong-Ki Kim and Dongsub Shim and Honglak Lee},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=n1Sx9ZjJRs}\n}", "github": "", "project": "", "reviewers": "uYLX;CAsX;irFm", "site": "https://openreview.net/forum?id=n1Sx9ZjJRs", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "4;3;3", "reproducibility": "3;4;3", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0009-0006-8178-0476;", "linkedin": ";yiwei-lyu-209176151/;;;;;", "aff_unique_index": "0;1;1;0;2;0;1", "aff_unique_norm": "LG;University of Michigan;Massachusetts Institute of Technology", "aff_unique_dep": "LG AI Research;;", "aff_unique_url": "https://www.lgaires.com;https://www.umich.edu;https://web.mit.edu", "aff_unique_abbr": "LG AI;UM;MIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;1;1;0;1;0;1", "aff_country_unique": "South Korea;United States" }, { "id": "n20PghmZaD", "title": "A New Benchmark and Reverse Validation Method for Passage-level Hallucination Detection", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large Language Models (LLMs) have shown their ability to collaborate effectively with humans in real-world scenarios. However, LLMs are apt to generate hallucinations, i.e., makeup incorrect text and unverified information, which can cause significant damage when deployed for mission-critical tasks.\nIn this paper, we propose a self-check approach based on reverse validation to detect factual errors automatically in a zero-resource fashion. To facilitate future studies and assess different methods, we construct a hallucination detection benchmark named PHD, which is generated by ChatGPT and annotated by human annotators. Contrasting previous studies of zero-resource hallucination detection, our method and benchmark concentrate on passage-level detection instead of sentence-level.\nWe empirically evaluate our method and existing zero-resource detection methods on two datasets. The experimental results demonstrate that the proposed method considerably outperforms the baselines while costing fewer tokens and less time. Furthermore, we manually analyze some hallucination cases that LLM failed to capture, revealing the shared limitation of zero-resource methods.", "keywords": "hallucination detection; LLM", "primary_area": "", "supplementary_material": "", "author": "Shiping Yang;Renliang Sun;Xiaojun Wan", "authorids": "~Shiping_Yang1;~Renliang_Sun2;~Xiaojun_Wan1", "gender": "M;;M", "homepage": "https://maybenotime.github.io/;;https://wanxiaojun.github.io", "dblp": ";;07/1521", "google_scholar": "https://scholar.google.cz/citations?hl=zh-CN;;lTTeBdkAAAAJ", "or_profile": "~Shiping_Yang1;~Renliang_Sun2;~Xiaojun_Wan1", "aff": "Peking University;;Peking University", "aff_domain": "pku.edu.cn;;pku.edu.cn", "position": "Intern;;Full Professor", "bibtex": "@inproceedings{\nyang2023a,\ntitle={A New Benchmark and Reverse Validation Method for Passage-level Hallucination Detection},\nauthor={Shiping Yang and Renliang Sun and Xiaojun Wan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=n20PghmZaD}\n}", "github": "", "project": "", "reviewers": "2PhM;JYbz;KooH", "site": "https://openreview.net/forum?id=n20PghmZaD", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;5", "excitement": "3;3;3", "reproducibility": "2;5;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0005-9589-7408;;", "linkedin": "shiping-yang-8b6b27263/;;", "aff_unique_index": "0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "n6qiOfZVYp", "title": "VIBE: Topic-Driven Temporal Adaptation for Twitter Classification", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Language features are evolving in real-world social media, resulting in the deteriorating performance of text classification in dynamics. To address this challenge, we study temporal adaptation, where models trained on past data are tested in the future. Most prior work focused on continued pretraining or knowledge updating, which may compromise their performance on noisy social media data. To tackle this issue, we reflect feature change via modeling latent topic evolution and propose a novel model, VIBE: Variational Information Bottleneck for Evolutions. Concretely, we first employ two Information Bottleneck (IB) regularizers to distinguish past and future topics. Then, the distinguished topics work as adaptive features via multi-task training with timestamp and class label prediction. In adaptive learning, VIBE utilizes retrieved unlabeled data from online streams created posterior to training data time. Substantial Twitter experiments on three classification tasks show that our model, with only 3% of data, significantly outperforms previous state-of-the-art continued-pretraining methods.", "keywords": "temporal adaptation; neural topic model; social media; twitter classification", "primary_area": "", "supplementary_material": "", "author": "Yuji Zhang;Jing Li;Wenjie Li", "authorids": "~Yuji_Zhang2;~Jing_Li18;~Wenjie_Li1", "gender": ";F;F", "homepage": ";http://www4.comp.polyu.edu.hk/~jing1li/;https://web.comp.polyu.edu.hk/cswjli/", "dblp": ";181/2820-49;33/3999-2.html", "google_scholar": ";jvjOLx4AAAAJ;Rx5swD4AAAAJ", "or_profile": "~Yuji_Zhang2;~Jing_Li18;~Wenjie_Li1", "aff": ";The Hong Kong Polytechnic University;The Hong Kong Polytechnic University, The Hong Kong Polytechnic University", "aff_domain": ";polyu.edu.hk;comp.polyu.edu.hk", "position": ";Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2023vibe,\ntitle={{VIBE}: Topic-Driven Temporal Adaptation for Twitter Classification},\nauthor={Yuji Zhang and Jing Li and Wenjie Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=n6qiOfZVYp}\n}", "github": "", "project": "", "reviewers": "7trR;vJ1w;QHXk;MQvq", "site": "https://openreview.net/forum?id=n6qiOfZVYp", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "4;3;3;4", "excitement": "4;4;4;4", "reproducibility": "4;4;4;4", "correctness": "5;4;4;4", "rating_avg": 5.0, "confidence_avg": 3.5, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.25, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-8044-2284;0000-0002-7360-8864", "linkedin": ";jing-li-b815b7a5/;", "aff_unique_index": "0;0", "aff_unique_norm": "Hong Kong Polytechnic University", "aff_unique_dep": "", "aff_unique_url": "https://www.polyu.edu.hk", "aff_unique_abbr": "PolyU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "n9y4IDFcCr", "title": "GROOViST: A Metric for Grounding Objects in Visual Storytelling", "track": "main", "status": "Short Main", "tldr": "", "abstract": "A proper evaluation of stories generated for a sequence of images---the task commonly referred to as visual storytelling---must consider multiple aspects, such as coherence, grammatical correctness, and visual grounding. In this work, we focus on evaluating the degree of grounding, that is, the extent to which a story is about the entities shown in the images. We analyze current metrics, both designed for this purpose and for general vision-text alignment. Given their observed shortcomings, we propose a novel evaluation tool, GROOViST, that accounts for cross-modal dependencies, \\textit{temporal misalignments} (the fact that the order in which entities appear in the story and the image sequence may not match), and human intuitions on visual grounding. An additional advantage of GROOViST is its modular design, where the contribution of each component can be assessed and interpreted individually.", "keywords": "visual storytelling;grounding;NLG evaluation", "primary_area": "", "supplementary_material": "", "author": "Aditya Kaushik Surikuchi;Sandro Pezzelle;Raquel Fern\u00e1ndez", "authorids": "~Aditya_Kaushik_Surikuchi3;~Sandro_Pezzelle1;~Raquel_Fern\u00e1ndez1", "gender": ";M;F", "homepage": "https://akskuchi.github.io/;https://sandropezzelle.github.io/;http://www.illc.uva.nl/~raquel", "dblp": ";182/2260;02/5384", "google_scholar": "bnJ-B9wAAAAJ;https://scholar.google.it/citations?user=PW6eQ6YAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Aditya_Kaushik_Surikuchi3;~Sandro_Pezzelle1;~Raquel_Fern\u00e1ndez1", "aff": "University of Amsterdam;University of Amsterdam;University of Amsterdam", "aff_domain": "uva.nl;uva.nl;uva.nl", "position": "PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nsurikuchi2023groovist,\ntitle={{GROOV}i{ST}: A Metric for Grounding Objects in Visual Storytelling},\nauthor={Aditya Kaushik Surikuchi and Sandro Pezzelle and Raquel Fern{\\'a}ndez},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=n9y4IDFcCr}\n}", "github": "", "project": "", "reviewers": "Pc9w;sXd8;G7ME", "site": "https://openreview.net/forum?id=n9y4IDFcCr", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "4;3;3", "reproducibility": "4;4;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-3969-7445;0000-0001-5540-5943", "linkedin": "adityasurikuchi;;raquel-fernandez-13578148/", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Amsterdam", "aff_unique_dep": "", "aff_unique_url": "https://www.uva.nl", "aff_unique_abbr": "UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Netherlands" }, { "id": "nC47EZVfAw", "title": "Low-Resource Comparative Opinion Quintuple Extraction by Data Augmentation with Prompting", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Comparative Opinion Quintuple Extraction (COQE) aims to predict comparative opinion quintuples from comparative sentences. These quintuples include subject, object, shareable aspect, comparative opinion, and preference. The existing pipeline-based COQE method fails in error propagation. In addition, the complexity and insufficient amounts of annotated data hinder the performance of COQE models. In this paper, we introduce a novel approach called low-resource comparative opinion quintuple extraction by Data Augmentation with Prompting (DAP). Firstly, we present an end-to-end model architecture better suited to the data augmentation method from triplets to quintuples and can effectively avoid error propagation. Additionally, we introduce a data-centric augmentation approach that leverages the robust generative abilities of ChatGPT and integrates transfer learning techniques. Experimental results over three datasets (Camera, Car, Ele) demonstrate that our approach yields substantial improvements and achieves state-of-the-art results. The source code and data are publicly released at: https://github.com/qtxu-nlp/COQE-DAP.", "keywords": "Comparative opinion quintuple extraction;Low-resource;Data augmentation;Large language models;Sentiment analysis", "primary_area": "", "supplementary_material": "", "author": "Qingting Xu;Yu Hong;Fubang Zhao;Kaisong Song;Yangyang Kang;Jiaxiang Chen;Guodong Zhou", "authorids": "~Qingting_Xu1;~Yu_Hong1;~Fubang_Zhao3;~Kaisong_Song1;~Yangyang_Kang1;~Jiaxiang_Chen1;~Guodong_Zhou1", "gender": ";M;M;M;M;M;M", "homepage": "https://blog.csdn.net/weixin_41862755;;https://sites.google.com/site/kaisongsong;;https://github.com/chenjx56;http://nlp.suda.edu.cn/~gdzhou/;", "dblp": ";66/5306;30/11037;162/0109;;;https://dblp.uni-trier.de/pid/249/5765.html", "google_scholar": ";;Ms678voAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;;", "or_profile": "~Qingting_Xu1;~Yu_Hong1;~Kaisong_Song1;~Yangyang_Kang1;~Jiaxiang_Chen1;~Guodong_Zhou1;~FUBANG_ZHAO2", "aff": "Suzhou University;Suzhou University;Alibaba Group;Alibaba Group;Suzhou University;Soochow University, China;Alibaba Group", "aff_domain": "suda.edu.cn;suda.edu.cn;alibaba-inc.com;alibaba.com;suda.edu.cn;suda.edu.cn;alibaba-inc.com", "position": "PhD student;Full Professor;Algorithm Expert;Staff Algorithm Engineer;MS student;Full Professor;Researcher", "bibtex": "@inproceedings{\nxu2023lowresource,\ntitle={Low-Resource Comparative Opinion Quintuple Extraction by Data Augmentation with Prompting},\nauthor={Qingting Xu and Yu Hong and Fubang Zhao and Kaisong Song and Yangyang Kang and Jiaxiang Chen and Guodong Zhou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=nC47EZVfAw}\n}", "github": "", "project": "", "reviewers": "1sq6;ttGM;Azi6;wHff", "site": "https://openreview.net/forum?id=nC47EZVfAw", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;4;3;4", "excitement": "4;3;3;3", "reproducibility": "4;4;4;3", "correctness": "4;3;2;3", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 3.25, "reproducibility_avg": 3.75, "correctness_avg": 3.0, "replies_avg": 13, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-5979-7769;;;;", "linkedin": ";;;;;;", "aff_unique_index": "0;0;1;1;0;2;1", "aff_unique_norm": "Suzhou University;Alibaba Group;Soochow University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.suda.edu.cn;https://www.alibaba.com;https://www.soochow.edu.cn", "aff_unique_abbr": "Suda;Alibaba;Soochow U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "nC8WUrpWjG", "title": "Answer-state Recurrent Relational Network (AsRRN) for Constructed Response Assessment and Feedback Grouping", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "STEM educators must trade off the ease of assessing selected response (SR) questions, like multiple choice, with constructed response (CR) questions, where students articulate their own reasoning. Our work addresses a CR type new to NLP but common in college STEM, consisting of multiple questions per context. To relate the context, the questions, the reference responses, and students' answers, we developed an Answer-state Recurrent Relational Network (AsRRN). In recurrent time-steps, relation vectors are learned for specific dependencies in a computational graph, where the nodes encode the distinct types of text input. AsRRN incorporates contrastive loss for better representation learning, which improves performance and supports student feedback. AsRRN was developed on a new dataset of 6,532 student responses to three, two-part CR questions. AsRRN outperforms classifiers based on LLMs, a previous relational network for CR questions, and few-shot learning with GPT-3.5. Ablation studies show the distinct contributions of AsRRN's dependency structure, the number of time steps in the recurrence, and the contrastive loss.", "keywords": "Writing assessment;relation networks;contrastive learning", "primary_area": "", "supplementary_material": "", "author": "Zhaohui Li;Susan E Lloyd;Matthew D Beckman;Rebecca J. Passonneau", "authorids": "~Zhaohui_Li1;~Susan_E_Lloyd1;~Matthew_D_Beckman1;~Rebecca_J._Passonneau1", "gender": "M;;;F", "homepage": ";https://science.psu.edu/stat/people/sel5591;;https://sites.psu.edu/becky/", "dblp": "46/4444;;;04/696", "google_scholar": ";;;https://scholar.google.com/citations?hl=en", "or_profile": "~Zhaohui_Li1;~Susan_E_Lloyd1;~Matthew_D_Beckman1;~Rebecca_J._Passonneau1", "aff": "Pennsylvania State University;The Pennsylvania State University;Pennsylvania State University;Pennsylvania State University", "aff_domain": "psu.edu;psu.edu;psu.edu;psu.edu", "position": "PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nli2023answerstate,\ntitle={Answer-state Recurrent Relational Network (As{RRN}) for Constructed Response Assessment and Feedback Grouping},\nauthor={Zhaohui Li and Susan E Lloyd and Matthew D Beckman and Rebecca J. Passonneau},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=nC8WUrpWjG}\n}", "github": "", "project": "", "reviewers": "tuki;ui5s;DeHo", "site": "https://openreview.net/forum?id=nC8WUrpWjG", "pdf_size": 0, "rating": "1;1;1", "confidence": "1;4;2", "excitement": "3;2;3", "reproducibility": "3;3;4", "correctness": "2;3;3", "rating_avg": 1.0, "confidence_avg": 2.3333333333333335, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-6747-3289;0000-0001-8626-811X", "linkedin": ";;;rebecca-j-passonneau-8967091/", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Pennsylvania State University", "aff_unique_dep": "", "aff_unique_url": "https://www.psu.edu", "aff_unique_abbr": "PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "nE9aUYqz6k", "title": "What Else Do I Need to Know? The Effect of Background Information on Users\u2019 Reliance on QA Systems", "track": "main", "status": "Long Main", "tldr": "", "abstract": "NLP systems have shown impressive performance at answering questions by retrieving relevant context. However, with the increasingly large models, it is impossible and often undesirable to constrain models' knowledge or reasoning to only the retrieved context. This leads to a mismatch between the information that \\textit{the models} access to derive the answer and the information that is available to \\textit{the user} to assess the model predicted answer. In this work, we study how users interact with QA systems in the absence of sufficient information to assess their predictions. Further, we ask whether adding the requisite background helps mitigate users' over-reliance on predictions. Our study reveals that users rely on model predictions even in the absence of sufficient information needed to assess the model's correctness. Providing the relevant background, however, helps users better catch model errors, reducing over-reliance on incorrect predictions. On the flip side, background information also increases users' confidence in their accurate as well as inaccurate judgments. Our work highlights that supporting users' verification of QA predictions is an important, yet challenging, problem.", "keywords": "human-centered NLP;over-reliance;explainability", "primary_area": "", "supplementary_material": "", "author": "Navita Goyal;Eleftheria Briakou;Amanda Stephanie Liu;Connor Baumler;Claire Bonial;Jeffrey Micher;Clare R. Voss;Marine Carpuat;Hal Daum\u00e9 III", "authorids": "~Navita_Goyal1;~Eleftheria_Briakou1;~Amanda_Stephanie_Liu1;~Connor_Baumler1;~Claire_Bonial1;~Jeffrey_Micher1;~Clare_R._Voss1;~Marine_Carpuat1;~Hal_Daum\u00e9_III1", "gender": "F;F;F;Not Specified;F;M;F;F;M", "homepage": "https://navitagoyal.github.io/;https://elbria.github.io;;https://ctbaumler.github.io/;;;;http://www.cs.umd.edu/~marine/;http://hal3.name", "dblp": "277/1584;217/4858;;320/5752;40/8157;89/8161;41/3792;71/1827;77/2856.html", "google_scholar": "YrvZ2E0AAAAJ;bxqqNFEAAAAJ;;V1pX-PwAAAAJ;PqD57uMAAAAJ;;https://scholar.google.com/citations?hl=en;iPAX6jcAAAAJ;PbEw81gAAAAJ", "or_profile": "~Navita_Goyal1;~Eleftheria_Briakou1;~Amanda_Stephanie_Liu1;~Connor_Baumler1;~Claire_Bonial1;~Jeffrey_Micher1;~Clare_R._Voss1;~Marine_Carpuat1;~Hal_Daum\u00e9_III1", "aff": "University of Maryland, College Park;Department of Computer Science, University of Maryland, College Park;University of Maryland, College Park;University of Maryland, College Park;Army Research Lab;US Army Research Lab;ARL;University of Maryland, College Park;Microsoft", "aff_domain": "umd.edu;cs.umd.edu;umd.edu;umd.edu;army.mil;army.mil;army.mil;umd.edu;microsoft.com", "position": "PhD student;PhD student;Undergrad student;PhD student;Researcher;Researcher;Principal Researcher;Associate Professor;Senior Principle Researcher", "bibtex": "@inproceedings{\ngoyal2023what,\ntitle={What Else Do I Need to Know? The Effect of Background Information on Users{\\textquoteright} Reliance on {QA} Systems},\nauthor={Navita Goyal and Eleftheria Briakou and Amanda Stephanie Liu and Connor Baumler and Claire Bonial and Jeffrey Micher and Clare R. Voss and Marine Carpuat and Hal Daum{\\'e} III},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=nE9aUYqz6k}\n}", "github": "", "project": "", "reviewers": "7seo;Dbbf;bvZJ", "site": "https://openreview.net/forum?id=nE9aUYqz6k", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;3;4", "excitement": "3;4;4", "reproducibility": "4;3;4", "correctness": "3;4;5", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;0000-0001-5023-6474;;", "linkedin": ";;asl3/;;claire-bonial-093424100/;;;;", "aff_unique_index": "0;1;0;0;2;3;2;0;4", "aff_unique_norm": "University of Maryland;University of Maryland, College Park;Army Research Laboratory;US Army Research Laboratory;Microsoft", "aff_unique_dep": ";Department of Computer Science;;Research;Microsoft Corporation", "aff_unique_url": "https://www/umd.edu;https://www/umd.edu;https://www.arl.army.mil;https://www.arl.army.mil;https://www.microsoft.com", "aff_unique_abbr": "UMD;UMD;ARL;ARL;Microsoft", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "nFagtplIb8", "title": "Improving Chinese Pop Song and Hokkien Gezi Opera Singing Voice Synthesis by Enhancing Local Modeling", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Singing Voice Synthesis (SVS) strives to synthesize pleasing vocals based on music scores and lyrics. The current acoustic models based on Transformer usually process the entire sequence globally and use a simple L1 loss. However, this approach overlooks the significance of local modeling within the sequence and the local optimization of the hard-to-synthesize parts in the predicted mel-spectrogram. Consequently, the synthesized audio exhibits local incongruities (\\textsl{e.g.}, local pronunciation jitter or local noise). To address this problem, we propose two methods to enhance local modeling in the acoustic model. First, we devise a nearest neighbor local attention, where each phoneme token focuses only on the adjacent phoneme tokens located before and after it. Second, we propose a phoneme-level local adaptive weights loss function that enables the model to focus more on the hard-to-synthesize parts of the mel-spectrogram. We have verified the universality of our methods on public Chinese pop song and Hokkien Gezi Opera datasets. Extensive experiments have demonstrated the effectiveness of our methods, resulting in significant improvements in both objective and subjective evaluations when compared to the strong baselines. Our code and demonstration samples are available at \n\\href{https://github.com/baipeng1/SVSELM}{https://github.com/baipeng1/SVSELM}.", "keywords": "Singing voice synthesis;local modeling enhancement;local adaptive weights loss;Hokkien Gezi Opera;Chinese pop song", "primary_area": "", "supplementary_material": "", "author": "Peng Bai;Yue Zhou;Meizhen Zheng;Wujin Sun;Xiaodong Shi", "authorids": "~Peng_Bai3;~Yue_Zhou9;~Meizhen_Zheng1;~Wujin_Sun1;~Xiaodong_Shi2", "gender": ";M;M;M;M", "homepage": "https://www.scholat.com/baipeng;;;http://nlp.xmu.edu.cn/group.html;", "dblp": ";;;;73/5055", "google_scholar": ";https://scholar.google.com.hk/citations?user=JZXXAJAAAAAJ;;;", "or_profile": "~Peng_Bai3;~Yue_Zhou9;~Meizhen_Zheng1;~Wujin_Sun1;~Xiaodong_Shi2", "aff": "Xiamen University;Xiamen University;Xiamen University;Xiamen University;Xiamen University, Tsinghua University", "aff_domain": "xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn", "position": "PhD student;PhD student;MS student;MS student;Full Professor", "bibtex": "@inproceedings{\nbai2023improving,\ntitle={Improving Chinese Pop Song and Hokkien Gezi Opera Singing Voice Synthesis by Enhancing Local Modeling},\nauthor={Peng Bai and Yue Zhou and Meizhen Zheng and Wujin Sun and Xiaodong Shi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=nFagtplIb8}\n}", "github": "", "project": "", "reviewers": "knLR;DiTD;cHLv;SqYD", "site": "https://openreview.net/forum?id=nFagtplIb8", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "5;4;3;4", "excitement": "3;3;4;3", "reproducibility": "5;3;3;4", "correctness": "3;3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.25, "reproducibility_avg": 3.75, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-1447-7358;;", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Xiamen University", "aff_unique_dep": "", "aff_unique_url": "https://www.xmu.edu.cn", "aff_unique_abbr": "XMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "nGCwDjinT8", "title": "Adaptive Hinge Balance Loss for Document-Level Relation Extraction", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Document-Level Relation Extraction aims at predicting relations between entities from multiple sentences. A common practice is to select multi-label classification thresholds to decide whether a relation exists between an entity pair. However, in the document-level task, most entity pairs do not express any relations, resulting in a highly imbalanced distribution between positive and negative classes. We argue that the imbalance problem affects threshold selection and may lead to incorrect \"no-relation\" predictions. In this paper, we propose to down-weight the easy negatives by utilizing a distance between the classification threshold and the predicted score of each relation. Our novel Adaptive Hinge Balance Loss measures the difficulty of each relation class with the distance, putting more focus on hard, misclassified relations, i.e. the minority positive relations. Experiment results on Re-DocRED demonstrate the superiority of our approach over other balancing methods. Source codes are available at https://github.com/Jize-W/HingeABL.", "keywords": "document-level relation extraction;multi-label classification;balancing methods;loss function design", "primary_area": "", "supplementary_material": "", "author": "Jize Wang;Xinyi Le;Xiaodi Peng;Cailian Chen", "authorids": "~Jize_Wang1;~Xinyi_Le1;~Xiaodi_Peng3;~Cailian_Chen1", "gender": "F;F;F;M", "homepage": "https://jize-w.github.io/;;https://iwin.sjtu.edu.cn/;", "dblp": "350/4823;;;", "google_scholar": "T95fIJYAAAAJ;MGZyMf4AAAAJ;;", "or_profile": "~Jize_Wang1;~Xinyi_Le1;~Cailian_Chen1;~xiaodi_peng2", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;", "aff_domain": "sjtu.edu.cn;sjtu.edu;sjtu.edu.cn;", "position": "MS student;Associate Professor;Full Professor;", "bibtex": "@inproceedings{\nwang2023adaptive,\ntitle={Adaptive Hinge Balance Loss for Document-Level Relation Extraction},\nauthor={Jize Wang and Xinyi Le and Xiaodi Peng and Cailian Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=nGCwDjinT8}\n}", "github": "", "project": "", "reviewers": "cr9k;zSKD;yRY8", "site": "https://openreview.net/forum?id=nGCwDjinT8", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;3;4", "excitement": "3;3;3", "reproducibility": "4;4;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;https://www.linkedin.cn/incareer/in/ACoAAEQ67BsBa-81Uzz3bdH_YXMutyjcllQaFEs", "aff_unique_index": "0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "nGFQ7IqOyg", "title": "Non-Autoregressive Math Word Problem Solver with Unified Tree Structure", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Existing MWP solvers employ sequence or binary tree to present the solution expression and decode it from given problem description. However, such structures fail to handle the variants that can be derived via mathematical manipulation, e.g., $(a_1+a_2)*a_3$ and $a_1 * a_3+a_2 * a_3$ can both be possible valid solutions for a same problem but formulated as different expression sequences or trees. The multiple solution variants depicting different possible solving procedures for the same input problem would raise two issues: 1) making it hard for the model to learn the mapping function between the input and output spaces effectively, and 2) wrongly indicating \\textit{wrong} when evaluating a valid expression variant. To address these issues, we introduce a unified tree structure to present a solution expression, where the elements are permutable and identical for all the expression variants. We propose a novel non-autoregressive solver, named \\textit{MWP-NAS}, to parse the problem and deduce the solution expression based on the unified tree. For evaluating the possible expression variants, we design a path-based metric to evaluate the partial accuracy of expressions of a unified tree. The results from extensive experiments conducted on Math23K and MAWPS demonstrate the effectiveness of our proposed MWP-NAS. The codes and checkpoints are available at: \\url{https://github.com/mengqunhan/MWP-NAS}.", "keywords": "MWP solving;non-autoregressive solver;unified tree structure", "primary_area": "", "supplementary_material": "", "author": "Yi Bin;Mengqun Han;WENHAO SHI;Lei Wang;Yang Yang;See-Kiong Ng;Heng Tao Shen", "authorids": "~Yi_Bin1;~Mengqun_Han1;~WENHAO_SHI1;~Lei_Wang28;~Yang_Yang37;~See-Kiong_Ng1;~Heng_Tao_Shen3", "gender": ";;M;M;M;M;M", "homepage": ";;https://github.com/steven640pixel;https://demoleiwang.github.io/HomePage/;http://cfm.uestc.edu.cn/~yangyang/;https://www.comp.nus.edu.sg/~ngsk/;https://cfm.uestc.edu.cn/~shenht/", "dblp": "172/9392;;;;;00/5480;s/HTShen", "google_scholar": "KDdkZKQAAAAJ;;;VidA02oAAAAJ;;https://scholar.google.com.tw/citations?user=_wsommYAAAAJ;https://scholar.google.com.au/citations?user=krryaDkAAAAJ", "or_profile": "~Yi_Bin1;~Mengqun_Han1;~WENHAO_SHI1;~Lei_Wang28;~Yang_Yang37;~See-Kiong_Ng1;~Hengtao_Shen1", "aff": "National University of Singapore;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;Singapore Management University;University of Electronic Science and Technology of China;National University of Singapore;", "aff_domain": "nus.edu;uestc.edu.cn;uestc.edu.cn;smu.edu.sg;uestc.edu.cn;nus.edu.sg;", "position": "Researcher;MS student;MS student;PhD student;Full Professor;Full Professor;", "bibtex": "@inproceedings{\nbin2023nonautoregressive,\ntitle={Non-Autoregressive Math Word Problem Solver with Unified Tree Structure},\nauthor={Yi Bin and Mengqun Han and WENHAO SHI and Lei Wang and Yang Yang and See-Kiong Ng and Heng Tao Shen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=nGFQ7IqOyg}\n}", "github": "", "project": "", "reviewers": "P6mx;jd6M;fZF3", "site": "https://openreview.net/forum?id=nGFQ7IqOyg", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;4;3", "reproducibility": "3;3;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-4737-3467;;;;0000-0001-6565-7511;", "linkedin": ";;;;;seekiong/?originalSubdomain=sg;", "aff_unique_index": "0;1;1;2;1;0", "aff_unique_norm": "National University of Singapore;University of Electronic Science and Technology of China;Singapore Management University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;https://www.uestc.edu.cn;https://www.smu.edu.sg", "aff_unique_abbr": "NUS;UESTC;SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1;0", "aff_country_unique": "Singapore;China" }, { "id": "nI0X5IZOQA", "title": "Oolong: Investigating What Makes Transfer Learning Hard with Controlled Studies", "track": "main", "status": "Short Main", "tldr": "", "abstract": "When we transfer a pretrained language model to a new language, there are many axes of variation that change at once. To disentangle the impact of different factors like syntactic similarity and vocabulary similarity, we propose a set of \\emph{controlled transfer studies}: we systematically transform the language of the GLUE benchmark, altering one axis of crosslingual variation at a time, and then measure the resulting drops in a pretrained model's downstream performance. We find that models can largely recover from syntactic-style shifts, but cannot recover from vocabulary misalignment and embedding matrix re-initialization, even with continued pretraining on 15 million tokens.\nMoreover, good-quality tokenizers in the transfer language do not make vocabulary alignment easier. Our experiments provide insights into the factors of cross-lingual transfer that researchers should most focus on when designing language transfer scenarios.", "keywords": "crosslingual;knowledge transfer;language model;finetuning", "primary_area": "", "supplementary_material": "", "author": "Zhengxuan Wu;Alex Tamkin;Isabel Papadimitriou", "authorids": "~Zhengxuan_Wu1;~Alex_Tamkin1;~Isabel_Papadimitriou1", "gender": "M;;F", "homepage": "https://cs.stanford.edu/~wuzhengx/;;https://www.isabelpapad.com/", "dblp": "234/4650;;264/0034", "google_scholar": "CBvE6lwAAAAJ;;", "or_profile": "~Zhengxuan_Wu1;~Alex_Tamkin1;~Isabel_Papadimitriou1", "aff": "Stanford University;;Stanford University", "aff_domain": "stanford.edu;;stanford.edu", "position": "PhD student;;PhD student", "bibtex": "@inproceedings{\nwu2023oolong,\ntitle={Oolong: Investigating What Makes Transfer Learning Hard with Controlled Studies},\nauthor={Zhengxuan Wu and Alex Tamkin and Isabel Papadimitriou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=nI0X5IZOQA}\n}", "github": "", "project": "", "reviewers": "rSVE;qFy2;h6Fy;4H2z", "site": "https://openreview.net/forum?id=nI0X5IZOQA", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;4;4;4", "excitement": "4;2;3;4", "reproducibility": "4;4;4;3", "correctness": "4;3;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.25, "reproducibility_avg": 3.75, "correctness_avg": 3.75, "replies_avg": 13, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-0214-0659", "linkedin": ";;", "aff_unique_index": "0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "nIp7wkMeMP", "title": "Enhancing Low-resource Fine-grained Named Entity Recognition by Leveraging Coarse-grained Datasets", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Named Entity Recognition (NER) frequently suffers from the problem of insufficient labeled data, particularly in fine-grained NER scenarios.\nAlthough $K$-shot learning techniques can be applied, their performance tends to saturate when the number of annotations exceeds several tens of labels. \nTo overcome this problem, we utilize existing coarse-grained datasets that offer a large number of annotations.\nA straightforward approach to address this problem is pre-finetuning, which employs coarse-grained data for representation learning.\nHowever, it cannot directly utilize the relationships between fine-grained and coarse-grained entities, although a fine-grained entity type is likely to be a subcategory of a coarse-grained entity type. \nWe propose a fine-grained NER model with a Fine-to-Coarse(F2C) mapping matrix to leverage the hierarchical structure explicitly. \nIn addition, we present an inconsistency filtering method to eliminate coarse-grained entities that are inconsistent with fine-grained entity types to avoid performance degradation.\nOur experimental results show that our method outperforms both $K$-shot learning and supervised learning methods when dealing with a small number of fine-grained annotations.", "keywords": "Named Entity Recognition;Fine-grained NER;Low-resource scenario", "primary_area": "", "supplementary_material": "", "author": "Su Ah Lee;Seokjin Oh;Woohwan Jung", "authorids": "~Su_Ah_Lee2;~Seokjin_Oh1;~Woohwan_Jung1", "gender": "F;M;M", "homepage": ";;https://sites.google.com/view/whjung/", "dblp": ";;193/7295", "google_scholar": ";;KsU7NzIAAAAJ", "or_profile": "~Su_Ah_Lee2;~Seokjin_Oh1;~Woohwan_Jung1", "aff": "Hanyang University;Hanyang University;Hanyang University", "aff_domain": "hanyang.ac.kr;hanyang.ac.kr;hanyang.ac.kr", "position": "MS student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nlee2023enhancing,\ntitle={Enhancing Low-resource Fine-grained Named Entity Recognition by Leveraging Coarse-grained Datasets},\nauthor={Su Ah Lee and Seokjin Oh and Woohwan Jung},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=nIp7wkMeMP}\n}", "github": "", "project": "", "reviewers": "Y7sL;ZwPd;D3qZ", "site": "https://openreview.net/forum?id=nIp7wkMeMP", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "4;3;4", "reproducibility": "4;4;3", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-4561-2214", "linkedin": "su-ah-lee-603974267/;seokjinoh;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Hanyang University", "aff_unique_dep": "", "aff_unique_url": "https://www.hanyang.ac.kr", "aff_unique_abbr": "HYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "nIuJXuSdhn", "title": "Can LLMs Facilitate Interpretation of Pre-trained Language Models?", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Work done to uncover the knowledge encoded within pre-trained language models rely on annotated corpora or human-in-the-loop methods. However, these approaches are limited in terms of scalability and the scope of interpretation. We propose using a large language model, ChatGPT, as an annotator to enable fine-grained interpretation analysis of pre-trained language models. We discover latent concepts within pre-trained language models by applying agglomerative hierarchical clustering over contextualized representations and then annotate these concepts using ChatGPT. Our findings demonstrate that ChatGPT produces accurate and semantically richer annotations compared to human-annotated concepts. Additionally, we showcase how GPT-based annotations empower interpretation analysis methodologies of which we demonstrate two: probing frameworks and neuron interpretation. To facilitate further exploration and experimentation in the field, we make available a substantial ConceptNet dataset (TCN) comprising 39,000 annotated concepts.", "keywords": "Interpretation;explainability;Large Language Models;Neuron Analysis", "primary_area": "", "supplementary_material": "", "author": "Basel Mousi;Nadir Durrani;Fahim Dalvi", "authorids": "~Basel_Mousi1;~Nadir_Durrani1;~Fahim_Dalvi1", "gender": "M;M;M", "homepage": ";https://nadirdurrani.github.io/;https://fdalvi.github.io", "dblp": "322/3648.html;54/9012;194/2537", "google_scholar": "c5nG1eIAAAAJ;https://scholar.google.co.uk/citations?user=K6uisFAAAAAJ;uQGCv10AAAAJ", "or_profile": "~Basel_Mousi1;~Nadir_Durrani1;~Fahim_Dalvi1", "aff": "Qatar Computing Research Institute;Qatar Computing Research Institute;Hamad Bin Khalifa University", "aff_domain": "hbku.edu.qa;hbku.edu.qa;hbku.edu.qa", "position": "Researcher;Scientist;Researcher", "bibtex": "@inproceedings{\nmousi2023can,\ntitle={Can {LLM}s Facilitate Interpretation of Pre-trained Language Models?},\nauthor={Basel Mousi and Nadir Durrani and Fahim Dalvi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=nIuJXuSdhn}\n}", "github": "", "project": "", "reviewers": "ZW2Y;zd3p;VLTK", "site": "https://openreview.net/forum?id=nIuJXuSdhn", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;2;4", "excitement": "4;3;3", "reproducibility": "4;3;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-9378-4128;", "linkedin": "basel-mousi-8a5113193/;nadir-durrani-04048744/;", "aff_unique_index": "0;0;1", "aff_unique_norm": "Qatar Computing Research Institute;Hamad Bin Khalifa University", "aff_unique_dep": ";", "aff_unique_url": "https://www.qcri.org;https://www.hbku.edu.qa", "aff_unique_abbr": "QCRI;HBKU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Qatar" }, { "id": "nMjktU5AiP", "title": "IndiSocialFT: Multilingual Word Representation for Indian languages in code-mixed environment", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "The increasing number of Indian language users on the internet necessitates the development of Indian language technologies. In response to this demand, our paper presents a generalized representation vector for diverse text characteristics, including native scripts, transliterated text, multilingual, code-mixed, and social media-related attributes. We gather text from both social media and well-formed sources and utilize the FastText model to create the \"IndiSocialFT\" embedding. Through intrinsic and extrinsic evaluation methods, we compare IndiSocialFT with three popular pretrained embeddings trained over Indian languages. Our findings show that the proposed embedding surpasses the baselines in most cases and languages, demonstrating its suitability for various NLP applications.", "keywords": "Indian Languages;Multilingual Word Embedding;Code-mixed;Social Media Text", "primary_area": "", "supplementary_material": "", "author": "Saurabh Kumar;Ranbir Singh Sanasam;Sukumar Nandi", "authorids": "~Saurabh_Kumar3;~Ranbir_Singh_Sanasam1;~Sukumar_Nandi1", "gender": "M;M;M", "homepage": "https://iitg.ac.in/stud/saurabh1003/;https://www.iitg.ac.in/ranbir/research/;https://www.iitg.ac.in/sukumar", "dblp": ";81/2295.html;20/4077.html", "google_scholar": "PhgrPnAAAAAJ;https://scholar.google.co.in/citations?user=Es12tcUAAAAJ;https://scholar.google.co.in/citations?hl=en", "or_profile": "~Saurabh_Kumar3;~Ranbir_Singh_Sanasam1;~Sukumar_Nandi1", "aff": "Indian Institute of Technology, Guwahati;Indian Institute of Technology, Guwahati, Dhirubhai Ambani Institute Of Information and Communication Technology;Indian Institute of Technology, Guwahati", "aff_domain": "iitg.ac.in;iitg.ac.in;iitg.ac.in", "position": "PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nkumar2023indisocialft,\ntitle={IndiSocial{FT}: Multilingual Word Representation for Indian languages in code-mixed environment},\nauthor={Saurabh Kumar and Ranbir Singh Sanasam and Sukumar Nandi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=nMjktU5AiP}\n}", "github": "", "project": "", "reviewers": "NDo2;tX8d;T51M", "site": "https://openreview.net/forum?id=nMjktU5AiP", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "3;4;2", "reproducibility": "3;3;3", "correctness": "3;2;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-0717-9512;0000-0003-0484-2144;0000-0002-5869-1057", "linkedin": ";;sukumar-nandi-b28b853/", "aff_unique_index": "0;1;0", "aff_unique_norm": "Indian Institute of Technology Guwahati;Indian Institute of Technology, Guwahati", "aff_unique_dep": ";", "aff_unique_url": "https://www.iitg.ac.in;https://www.iitg.ac.in", "aff_unique_abbr": "IIT Guwahati;IIT Guwahati", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Guwahati", "aff_country_unique_index": "0;0;0", "aff_country_unique": "India" }, { "id": "nPzrjWrtlz", "title": "The Truth, The Whole Truth, and Nothing but the Truth: A New Benchmark Dataset for Hebrew Text Credibility Assessment", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In the age of information overload, it is more important than ever to discern fact from fiction. From the internet to traditional media, we are constantly confronted with a deluge of information, much of which comes from politicians and other public figures who wield significant influence. In this paper, we introduce HeTrue: a new, publicly available dataset for evaluating the credibility of statements made by Israeli public figures and politicians. This dataset consists of 1021 statements, manually annotated by Israeli professional journalists, for their credibility status. Using this corpus, we set out to assess whether the credibility of statements can be predicted based on the text alone. To establish a baseline, we compare text-only methods with others using additional data like metadata, context, and evidence. Furthermore, we develop several credibility assessment models, including a feature-based model that utilizes linguistic features, and state-of-the-art transformer-based models with contextualized embeddings from a pre-trained encoder. Empirical results demonstrate improved performance when models integrate statement and context, outperforming those relying on the statement text alone. Our best model, which also integrates evidence, achieves a 48.3 F1 Score, suggesting that HeTrue is a challenging benchmark, calling for further work on this task.", "keywords": "NLP and social media;NLP application;Fake news detection;Fact Checking;Credibility Assessment", "primary_area": "", "supplementary_material": "", "author": "Ben Hagag;Reut Tsarfaty", "authorids": "~Ben_Hagag1;~Reut_Tsarfaty1", "gender": "M;F", "homepage": ";", "dblp": "332/1789;21/3716", "google_scholar": ";", "or_profile": "~Ben_Hagag1;~Reut_Tsarfaty1", "aff": "Bar-Ilan University;Bar-Ilan University, Technion", "aff_domain": "biu.ac.il;biu.ac.il", "position": "MS student;Associate Professor", "bibtex": "@inproceedings{\nhagag2023the,\ntitle={The Truth, The Whole Truth, and Nothing but the Truth: A New Benchmark Dataset for Hebrew Text Credibility Assessment},\nauthor={Ben Hagag and Reut Tsarfaty},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=nPzrjWrtlz}\n}", "github": "", "project": "", "reviewers": "BPdV;UQD5;uzQB", "site": "https://openreview.net/forum?id=nPzrjWrtlz", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;5;3", "excitement": "3;2;3", "reproducibility": "4;1;3", "correctness": "3;1;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 2.3333333333333335, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Bar-Ilan University", "aff_unique_dep": "", "aff_unique_url": "https://www.biu.ac.il", "aff_unique_abbr": "BIU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "id": "nRB8VpeM7b", "title": "Pushdown Layers: Encoding Recursive Structure in Transformer Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recursion is a prominent feature of human language, and fundamentally challenging for self-attention due to the lack of an explicit recursive-state tracking mechanism. Consequently, Transformer language models poorly capture long-tail recursive structure and exhibit sample-inefficient syntactic generalization. This work introduces Pushdown Layers, a new self-attention layer that models recursive state via a stack tape that tracks estimated depths of every token in an incremental parse of the observed prefix. Transformer LMs with Pushdown Layers are syntactic language models that autoregressively and synchronously update this stack tape as they predict new tokens, in turn using the stack tape to softly modulate attention over tokens---for instance, learning to ``skip'' over closed constituents. When trained on a corpus of strings annotated with silver constituency parses, Transformers equipped with Pushdown Layers achieve dramatically better and 3-5x more sample-efficient syntactic generalization, while maintaining similar perplexities. Pushdown Layers are a drop-in replacement for standard self-attention. We illustrate this by finetuning GPT2-medium with Pushdown Layers on an automatically parsed WikiText-103, leading to improvements on several GLUE text classification tasks.", "keywords": "recursive structure;syntactic language models;generalization", "primary_area": "", "supplementary_material": "", "author": "Shikhar Murty;Pratyusha Sharma;Jacob Andreas;Christopher D Manning", "authorids": "~Shikhar_Murty1;~Pratyusha_Sharma1;~Jacob_Andreas1;~Christopher_D_Manning1", "gender": "M;F;M;M", "homepage": "https://murtyshikhar.github.io/;https://pratyushasharma.github.io/;http://web.mit.edu/jda/www;https://nlp.stanford.edu/~manning/", "dblp": "202/2040;228/7904;97/8154;m/ChristopherDManning", "google_scholar": "https://scholar.google.ca/citations?user=ubAcojQAAAAJ;RGiCLUgAAAAJ;dnZ8udEAAAAJ;1zmDOdwAAAAJ", "or_profile": "~Shikhar_Murty1;~Pratyusha_Sharma1;~Jacob_Andreas1;~Christopher_D_Manning1", "aff": "Stanford University;Massachusetts Institute of Technology;Microsoft;Computer Science Department, Stanford University", "aff_domain": "cs.stanford.edu;mit.edu;microsoft.com;cs.stanford.edu", "position": "PhD student;PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nmurty2023pushdown,\ntitle={Pushdown Layers: Encoding Recursive Structure in Transformer Language Models},\nauthor={Shikhar Murty and Pratyusha Sharma and Jacob Andreas and Christopher D Manning},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=nRB8VpeM7b}\n}", "github": "", "project": "", "reviewers": "tLfe;iyy7;HLRB", "site": "https://openreview.net/forum?id=nRB8VpeM7b", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;4;3", "reproducibility": "3;3;3", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-6155-649X", "linkedin": ";;;christopher-manning-011575/", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Stanford University;Massachusetts Institute of Technology;Microsoft", "aff_unique_dep": ";;Microsoft Corporation", "aff_unique_url": "https://www.stanford.edu;https://web.mit.edu;https://www.microsoft.com", "aff_unique_abbr": "Stanford;MIT;Microsoft", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Stanford;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "nSPsxWVe4k", "title": "SLOG: A Structural Generalization Benchmark for Semantic Parsing", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The goal of compositional generalization benchmarks is to evaluate how well models generalize to new complex linguistic expressions. Existing benchmarks often focus on lexical generalization, the interpretation of novel lexical items in syntactic structures familiar from training; structural generalization tasks, where a model needs to interpret syntactic structures that are themselves unfamiliar from training, are often underrepresented, resulting in overly optimistic perceptions of how well models can generalize. We introduce SLOG, a semantic parsing dataset that extends COGS (Kim and Linzen, 2020) with 17 structural generalization cases. In our experiments, the generalization accuracy of Transformer models, including pretrained ones, only reaches 40.6%, while a structure-aware parser only achieves 70.8%. These results are far from the near-perfect accuracy existing models achieve on COGS, demonstrating the role of SLOG in foregrounding the large discrepancy between models' lexical and structural generalization capacities.", "keywords": "compositional generalization;structural generalization;long-distance dependencies;recursion;semantic parsing", "primary_area": "", "supplementary_material": "", "author": "Bingzhi Li;Lucia Donatelli;Alexander Koller;Tal Linzen;Yuekun Yao;Najoung Kim", "authorids": "~Bingzhi_Li1;~Lucia_Donatelli1;~Alexander_Koller2;~Tal_Linzen1;~Yuekun_Yao2;~Najoung_Kim1", "gender": "F;;;M;;F", "homepage": ";;;http://tallinzen.net;;https://najoungkim.github.io", "dblp": "290/6351;;;169/3438;;194/1249", "google_scholar": "yy_bLOMAAAAJ;;;5mJDXjoAAAAJ;;Uod-_B8AAAAJ", "or_profile": "~Bingzhi_Li1;~Lucia_Donatelli1;~Alexander_Koller2;~Tal_Linzen1;~Yuekun_Yao2;~Najoung_Kim1", "aff": "Universit\u00e9 de Paris;;;New York University;;Google", "aff_domain": "etu.u-paris.fr;;;nyu.edu;;google.com", "position": "PhD student;;;Assistant Professor;;Researcher", "bibtex": "@inproceedings{\nli2023slog,\ntitle={{SLOG}: A Structural Generalization Benchmark for Semantic Parsing},\nauthor={Bingzhi Li and Lucia Donatelli and Alexander Koller and Tal Linzen and Yuekun Yao and Najoung Kim},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=nSPsxWVe4k}\n}", "github": "", "project": "", "reviewers": "t68a;b6uL;up6o", "site": "https://openreview.net/forum?id=nSPsxWVe4k", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "4;4;4", "reproducibility": "4;5;3", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";;;;;", "aff_unique_index": "0;1;2", "aff_unique_norm": "Universit\u00e9 de Paris;New York University;Google", "aff_unique_dep": ";;Google", "aff_unique_url": "https://www.universitedeparis.fr;https://www.nyu.edu;https://www.google.com", "aff_unique_abbr": "UP;NYU;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1", "aff_country_unique": "France;United States" }, { "id": "nT4S0wgrwp", "title": "Understanding Translationese in Cross-Lingual Summarization", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Given a document in a source language, cross-lingual summarization (CLS) aims at generating a concise summary in a different target language. Unlike monolingual summarization (MS), naturally occurring source-language documents paired with target-language summaries are rare. To collect large-scale CLS data, existing datasets typically involve translation in their creation. However, the translated text is distinguished from the text originally written in that language, i.e., translationese. In this paper, we first confirm that different approaches of constructing CLS datasets will lead to different degrees of translationese. Then we systematically investigate how translationese affects CLS model evaluation and performance when it appears in source documents or target summaries. In detail, we find that (1) the translationese in documents or summaries of test sets might lead to the discrepancy between human judgment and automatic evaluation; (2) the translationese in training sets would harm model performance in real-world applications; (3) though machine-translated documents involve translationese, they are very useful for building CLS systems on low-resource languages under specific training strategies. Lastly, we give suggestions for future CLS research including dataset and model developments. We hope that our work could let researchers notice the phenomenon of translationese in CLS and take it into account in the future.", "keywords": "summarization;multi-lingual summarization;cross-lingual summarization", "primary_area": "", "supplementary_material": "", "author": "Jiaan Wang;Fandong Meng;Yunlong Liang;Tingyi Zhang;Jiarong Xu;Zhixu Li;Jie Zhou", "authorids": "~Jiaan_Wang1;~Fandong_Meng3;~Yunlong_Liang1;~Tingyi_Zhang1;~Jiarong_Xu2;~Zhixu_Li2;~Jie_Zhou8", "gender": "M;M;M;F;F;M;M", "homepage": "https://wangjiaan.cn/;http://fandongmeng.github.io/;;;https://galina0217.github.io/;http://demigroup.cn/staff/ZhiXuLi;", "dblp": "296/2112;117/4056.html;177/5130.html;;;38/3988;00/5012-16", "google_scholar": "5S8h7qAAAAAJ;sA8U4S0AAAAJ;P5iDDGIAAAAJ;tXmhiscAAAAJ;;https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=OijxQCMAAAAJ", "or_profile": "~Jiaan_Wang1;~Fandong_Meng3;~Yunlong_Liang1;~Tingyi_Zhang1;~Jiarong_Xu2;~Zhixu_Li2;~Jie_Zhou8", "aff": "Soochow University;WeChat AI, Tencent Inc.;Beijing Jiaotong University;Soochow University;Fudan University;Fudan University;WeChat AI, Tencent Inc.", "aff_domain": "suda.edu.cn;tencent.com;bjtu.edu.cn;suda.edu.cn;fudan.edu.cn;fudan.edu.cn;tencent.com", "position": "MS student;Principal Researcher;PhD student;MS student;Assistant Professor;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nwang2023understanding,\ntitle={Understanding Translationese in Cross-Lingual Summarization},\nauthor={Jiaan Wang and Fandong Meng and Yunlong Liang and Tingyi Zhang and Jiarong Xu and Zhixu Li and Jie Zhou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=nT4S0wgrwp}\n}", "github": "", "project": "", "reviewers": "AJcm;WQsK;pmRA;pBBk", "site": "https://openreview.net/forum?id=nT4S0wgrwp", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;3;3;4", "excitement": "4;4;3;4", "reproducibility": "5;4;3;5", "correctness": "3;3;3;4", "rating_avg": 4.0, "confidence_avg": 3.5, "excitement_avg": 3.75, "reproducibility_avg": 4.25, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-2587-7648;0000-0002-8158-2377;0000-0003-2311-7642;0000-0002-0087-3835;0000-0003-2973-1889;0000-0003-2355-288X;0000-0002-5899-5165", "linkedin": ";;;;;;", "aff_unique_index": "0;1;2;0;3;3;1", "aff_unique_norm": "Soochow University;Tencent;Beijing Jiao Tong University;Fudan University", "aff_unique_dep": ";WeChat AI;;", "aff_unique_url": "https://www.soochow.edu.cn;https://www.tencent.com;http://www.njtu.edu.cn/en;https://www.fudan.edu.cn", "aff_unique_abbr": "Soochow U;Tencent;BJTU;Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "nTKRAgssvX", "title": "SiMFy: A Simple Yet Effective Approach for Temporal Knowledge Graph Reasoning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Temporal Knowledge Graph (TKG) reasoning, which focuses on leveraging temporal information to infer future facts in knowledge graphs, plays a vital role in knowledge graph completion. Typically, existing works for this task design graph neural networks and recurrent neural networks to respectively capture the structural and temporal information in KGs. Despite their effectiveness, in our practice, we find that they tend to suffer the issues of low training efficiency and insufficient generalization ability, which can be attributed to the over design of model architectures. To this end, this paper aims to figure out whether the current complex model architectures are necessary for temporal knowledge graph reasoning. As a result, we put forward a simple yet effective approach (termed SiMFy), which simply utilizes multilayer perceptron (MLP) to model the structural dependencies of events and adopts a fixed-frequency strategy to incorporate historical frequency during inference. Extensive experiments on real-world datasets demonstrate that our SiMFy can reach state-of-the-art performance with the following strengths: 1) faster convergence speed and better generalization ability; 2) a much smaller time consumption in the training process; and 3) better ability to capture the structural dependencies of events in KGs. These results provide evidence that the substitution of complex models with simpler counterparts is a feasible strategy.", "keywords": "Temporal Knowledge Graph;Reasoning;Multilayer Perceptron;Historical Frequency", "primary_area": "", "supplementary_material": "", "author": "Zhengtao Liu;Lei Tan;Mengfan Li;Yao Wan;Hai Jin;Xuanhua Shi", "authorids": "~Zhengtao_Liu1;~Lei_Tan5;~Mengfan_Li2;~Yao_Wan2;~Hai_Jin1;~Xuanhua_Shi1", "gender": ";;;M;M;M", "homepage": "https://github.com/Nickyii;https://github.com/Stones-Tan;;http://wanyao.me;http://www.linkedin.com/in/jinhust;https://sites.google.com/view/xhshi/home", "dblp": ";;;167/0275.html;98/4156;85/5317.html", "google_scholar": ";;;c3MtqtMAAAAJ;;https://scholar.google.com/citations?hl=en", "or_profile": "~Zhengtao_Liu1;~Lei_Tan5;~Mengfan_Li2;~Yao_Wan2;~Hai_Jin1;~Xuanhua_Shi1", "aff": "Huazhong University of Science and Technology;Huazhong University of Science and Technology;;Huazhong University of Science and Technology;Huazhong University of Science and Technology;Huazhong University of Science and Technology", "aff_domain": "hust.edu.cn;hust.edu.cn;;hust.edu.cn;hust.edu.cn;hust.edu.cn", "position": "MS student;MS student;;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nliu2023simfy,\ntitle={Si{MF}y: A Simple Yet Effective Approach for Temporal Knowledge Graph Reasoning},\nauthor={Zhengtao Liu and Lei Tan and Mengfan Li and Yao Wan and Hai Jin and Xuanhua Shi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=nTKRAgssvX}\n}", "github": "", "project": "", "reviewers": "AXyf;Tw4C;G2Sv", "site": "https://openreview.net/forum?id=nTKRAgssvX", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;5", "excitement": "4;3;4", "reproducibility": "4;4;5", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-6937-4180;0000-0002-3934-7605;0000-0001-8451-8656", "linkedin": ";;;;jinhust;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Huazhong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hust.edu.cn", "aff_unique_abbr": "HUST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "nWXMv949ZH", "title": "Logic-LM: Empowering Large Language Models with Symbolic Solvers for Faithful Logical Reasoning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large Language Models (LLMs) have shown human-like reasoning abilities but still struggle with complex logical problems. This paper introduces a novel framework, Logic-LM, which integrates LLMs with symbolic solvers to improve logical problem-solving. Our method first utilizes LLMs to translate a natural language problem into a symbolic formulation. Afterward, a deterministic symbolic solver performs inference on the formulated problem. We also introduce a self-refinement module, which utilizes the symbolic solver's error messages to revise symbolic formalizations. We demonstrate Logic-LM's effectiveness on five logical reasoning datasets: ProofWriter, PrOntoQA, FOLIO, LogicalDeduction, and AR-LSAT. On average, Logic-LM achieves a significant performance boost of 39.2% over using LLM alone with standard prompting and 18.4% over LLM with chain-of-thought prompting. Our findings suggest that Logic-LM, by combining LLMs with symbolic logic, offers a promising avenue for faithful logical reasoning.", "keywords": "Logical Reasoning;Language Model;Symbolic Language;Self-Refinement", "primary_area": "", "supplementary_material": "", "author": "Liangming Pan;Alon Albalak;Xinyi Wang;William Yang Wang", "authorids": "~Liangming_Pan1;~Alon_Albalak1;~Xinyi_Wang2;~William_Yang_Wang2", "gender": "M;;F;M", "homepage": "https://liangmingpan.bio;https://alon-albalak.github.io/;https://wangxinyilinda.github.io/;https://www.cs.ucsb.edu/~william/", "dblp": "186/9707;283/4427;;08/9282", "google_scholar": "JcjjOTUAAAAJ;F6J_7d8AAAAJ;3vvbplcAAAAJ;gf8Ms_8AAAAJ", "or_profile": "~Liangming_Pan1;~Alon_Albalak1;~Xinyi_Wang2;~William_Wang1", "aff": "University of California, Santa Barbara;University of California, Santa Barbara;Microsoft;UC Santa Barbara", "aff_domain": "ucsb.edu;ucsb.edu;microsoft.com;ucsb.edu", "position": "Postdoc;PhD student;Intern;Full Professor", "bibtex": "@inproceedings{\npan2023logiclm,\ntitle={Logic-{LM}: Empowering Large Language Models with Symbolic Solvers for Faithful Logical Reasoning},\nauthor={Liangming Pan and Alon Albalak and Xinyi Wang and William Yang Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=nWXMv949ZH}\n}", "github": "", "project": "", "reviewers": "sDiS;udfF;Yhix;TXB8", "site": "https://openreview.net/forum?id=nWXMv949ZH", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;4;4;4", "excitement": "3;4;3;3", "reproducibility": "3;5;4;4", "correctness": "3;4;3;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.25, "reproducibility_avg": 4.0, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-0809-1704;;", "linkedin": ";alonalbalak;xinyi-wang-444385133/;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of California, Santa Barbara;Microsoft", "aff_unique_dep": ";Microsoft Corporation", "aff_unique_url": "https://www.ucsb.edu;https://www.microsoft.com", "aff_unique_abbr": "UCSB;Microsoft", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Santa Barbara;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "nYbOG9EaxD", "title": "A Question Answering Framework for Decontextualizing User-facing Snippets from Scientific Documents", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Many real-world applications (e.g., note taking, search) require extracting a sentence or paragraph from a document and showing that snippet to a human outside of the source document. Yet, users may find snippets difficult to understand as they lack context from the original document. In this work, we use language models to rewrite snippets from scientific documents to be read on their own. First, we define the requirements and challenges for this user-facing decontextualization task, such as clarifying where edits occur and handling references to other documents. Second, we propose a framework that decomposes the task into three stages: question generation, question answering, and rewriting. Using this framework, we collect gold decontextualizations from experienced scientific article readers. We then conduct a range of experiments across state-of-the-art commercial and open-source language models to identify how to best provide missing-but-relevant information to models for our task. Finally, we develop QaDecontext, a simple prompting strategy inspired by our framework that improves over end-to-end prompting. We conclude with analysis that finds, while rewriting is easy, question generation and answering remain challenging for today\u2019s models.", "keywords": "decontextualization;snippets;text-simplification", "primary_area": "", "supplementary_material": "", "author": "Benjamin Newman;Luca Soldaini;Raymond Fok;Arman Cohan;Kyle Lo", "authorids": "~Benjamin_Newman1;~Luca_Soldaini1;~Raymond_Fok1;~Arman_Cohan1;~Kyle_Lo1", "gender": ";Non-Binary;M;M;", "homepage": "http://blnewman.com;https://soldaini.net;https://rayfok.github.io/;http://www.armancohan.com;https://kyleclo.github.io/", "dblp": "126/5109;160/1741;;160/1727;220/2020", "google_scholar": "QehvrDoAAAAJ;3KPvwcgAAAAJ;BuE1X6oAAAAJ;https://scholar.google.com/citations?hl=en;VJS12uMAAAAJ", "or_profile": "~Benjamin_Newman1;~Luca_Soldaini1;~Raymond_Fok1;~Arman_Cohan1;~Kyle_Lo1", "aff": "Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;University of Washington;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence", "aff_domain": "allenai.org;allenai.org;cs.washington.edu;allenai.org;allenai.org", "position": "Researcher;Researcher;PhD student;Research Scientist;Researcher", "bibtex": "@inproceedings{\nnewman2023a,\ntitle={A Question Answering Framework for Decontextualizing User-facing Snippets from Scientific Documents},\nauthor={Benjamin Newman and Luca Soldaini and Raymond Fok and Arman Cohan and Kyle Lo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=nYbOG9EaxD}\n}", "github": "", "project": "", "reviewers": "BSQ8;uGwc;ELeZ", "site": "https://openreview.net/forum?id=nYbOG9EaxD", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "3;4;5", "reproducibility": "3;4;4", "correctness": "3;4;5", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-6998-9863;0000-0002-0612-5782;;", "linkedin": ";soldni/;;;kylelo/", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Allen Institute for Artificial Intelligence;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "https://allenai.org;https://www.washington.edu", "aff_unique_abbr": "AI2;UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "nYgu408UIo", "title": "Why LLMs Hallucinate, and How to Get (Evidential) Closure: Perceptual, Intensional, and Extensional Learning for Faithful Natural Language Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We show that LLMs hallucinate because their output is not constrained to be synonymous with claims for which they have evidence: a condition that we call evidential closure. Information about the truth or falsity of sentences is not statistically identified in the standard neural language generation setup, and so cannot be conditioned on to generate new strings. We then show how to constrain LLMs to produce output that satisfies evidential closure. A multimodal LLM must learn about the external world (perceptual learning); it must learn a mapping from strings to states of the world (extensional learning); and, to achieve fluency when generalizing beyond a body of evidence, it must learn mappings from strings to their synonyms (intensional learning). The output of a unimodal LLM must be synonymous with strings in a validated evidence set. Finally, we present a heuristic procedure, Learn-Babble-Prune, that yields faithful output from an LLM by rejecting output that is not synonymous with claims for which the LLM has evidence.", "keywords": "Truth;Semantics;Meaning;Hallucination;Identification;Natural Language Generation", "primary_area": "", "supplementary_material": "", "author": "Adam Bouyamourn", "authorids": "~Adam_Bouyamourn1", "gender": "M", "homepage": "https://abouyamourn.github.io", "dblp": "", "google_scholar": "", "or_profile": "~Adam_Bouyamourn1", "aff": "University of California, Berkeley", "aff_domain": "berkeley.edu", "position": "PhD student", "bibtex": "@inproceedings{\nbouyamourn2023why,\ntitle={Why {LLM}s Hallucinate, and How to Get (Evidential) Closure: Perceptual, Intensional, and Extensional Learning for Faithful Natural Language Generation},\nauthor={Adam Bouyamourn},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=nYgu408UIo}\n}", "github": "", "project": "", "reviewers": "gLhZ;Leq6;L6mo", "site": "https://openreview.net/forum?id=nYgu408UIo", "pdf_size": 0, "rating": "5;5;5", "confidence": "1;3;2", "excitement": "4;4;4", "reproducibility": "0;0;3", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 2.0, "excitement_avg": 4.0, "reproducibility_avg": 1.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 1, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "", "linkedin": "", "aff_unique_index": "0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "neRWI1hWyO", "title": "Allies: Prompting Large Language Model with Beam Search", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "With the advance of large language models (LLMs), the research field of LLM applications becomes more and more popular and the idea of constructing pipelines to accomplish complex tasks by stacking LLM API calls come true.\nHowever, this kind of methods face two limitations: narrow information coverage and low fault tolerance.\nIn this work, we propose a novel method called ALLIES.\nGiven an input query, ALLIES leverages LLMs to iteratively generate new queries related to the original query, enabling an iterative reasoning process.\nBy iteratively refining and expanding the scope of the original query, ALLIES captures and utilizes hidden knowledge that may not be directly obtainable through retrieval.\nWe take zero-shot open-domain question answering (ODQA) as an application scene and evaluate ALLIES on the widely-used benchmarks, such as NQ, WebQ and TriviaQA.\nThe experimental results demonstrate that ALLIES significantly outperforms other zero-shot baselines, indicating its effectiveness in tackling those challenges.\nOur code is available in https://github.com/microsoft/SimXNS/tree/main/ALLIES.", "keywords": "Large Language Model;Question Answering;Beam Search", "primary_area": "", "supplementary_material": "", "author": "Hao Sun;Xiao Liu;Yeyun Gong;Yan Zhang;Daxin Jiang;Linjun Yang;Nan Duan", "authorids": "~Hao_Sun9;~Xiao_Liu14;~Yeyun_Gong2;~Yan_Zhang14;~Daxin_Jiang2;~Linjun_Yang2;~Nan_Duan1", "gender": "M;M;M;M;M;M;M", "homepage": "https://sunhaopku.github.io/;https://xiaoliunlc.github.io/;;https://sai.pku.edu.cn/szdw/zzjs/zy.htm;https://www.microsoft.com/en-us/research/people/djiang/;;https://nanduan.github.io/", "dblp": "82/2248-15;82/1364-29;06/10400.html;04/3348-4;77/5094;65/5947;", "google_scholar": "at9AB50AAAAJ;https://scholar.google.com.sg/citations?user=cn1k7gYAAAAJ;piUkwMYAAAAJ;K8AfHKkAAAAJ;N-wAHCoAAAAJ;;Qaa6OxIAAAAJ", "or_profile": "~Hao_Sun9;~Xiao_Liu14;~Yeyun_Gong2;~Yan_Zhang14;~Daxin_Jiang2;~Linjun_Yang2;~Nan_Duan1", "aff": "Peking University;Microsoft Research Asia;Microsoft;Peking University;Microsoft;;Microsoft Research Asia", "aff_domain": "pku.edu.cn;microsoft.com;microsoft.com;pku.edu.cn;microsoft.com;;microsoft.com", "position": "PhD student;Researcher;Researcher;Full Professor;Researcher/Scientist;;Principal Researcher", "bibtex": "@inproceedings{\nsun2023allies,\ntitle={Allies: Prompting Large Language Model with Beam Search},\nauthor={Hao Sun and Xiao Liu and Yeyun Gong and Yan Zhang and Daxin Jiang and Linjun Yang and Nan Duan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=neRWI1hWyO}\n}", "github": "", "project": "", "reviewers": "7GwV;KsFN;rUD4;2w8K", "site": "https://openreview.net/forum?id=neRWI1hWyO", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;4;4", "excitement": "3;4;3;3", "reproducibility": "4;3;4;4", "correctness": "4;3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.25, "reproducibility_avg": 3.75, "correctness_avg": 3.25, "replies_avg": 13, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-8456-7925;0000-0002-8893-366X;;;;;", "linkedin": ";xiao-liu-71357b72/;;;;;", "aff_unique_index": "0;1;1;0;1;1", "aff_unique_norm": "Peking University;Microsoft", "aff_unique_dep": ";Research", "aff_unique_url": "http://www.pku.edu.cn;https://www.microsoft.com/en-us/research/group/asia", "aff_unique_abbr": "Peking U;MSR Asia", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;1;0;1;0", "aff_country_unique": "China;United States" }, { "id": "newk6aDMRi", "title": "Teacher Perception of Automatically Extracted Grammar Concepts for L2 Language Learning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "One of the challenges in language teaching is how best to organize rules regarding syntax, semantics, or phonology in a meaningful manner.\nThis not only requires content creators to have pedagogical skills, but also have that language's deep understanding.\nWhile comprehensive materials to develop such curricula are available in English and some broadly spoken languages, for many other languages, teachers need to manually create them in response to their students' needs.\nThis is challenging because i) it requires that such experts be accessible and have the necessary resources, and ii) describing all the intricacies of a language is time-consuming and prone to omission.\nIn this work, we aim to facilitate this process by automatically discovering and visualizing grammar descriptions.\nWe extract descriptions from a natural text corpus that answer questions about morphosyntax (learning of word order, agreement, case marking, or word formation) and semantics (learning of vocabulary).\nWe apply this method for teaching two Indian languages, Kannada and Marathi, which, unlike English, do not have well-developed resources for second language learning.\nTo assess the perceived utility of the extracted material, we enlist the help of language educators from schools in North America to perform a manual evaluation, who find the materials have potential to be used for their lesson preparation and learner evaluation.", "keywords": "NLP for pedagogy;second language learning;low-resource languages", "primary_area": "", "supplementary_material": "", "author": "Aditi Chaudhary;Arun Sampath;Ashwin Sheshadri;Antonios Anastasopoulos;Graham Neubig", "authorids": "~Aditi_Chaudhary1;~Arun_Sampath1;~Ashwin_Sheshadri1;~Antonios_Anastasopoulos1;~Graham_Neubig1", "gender": ";M;M;M;M", "homepage": ";;https://www.kannadaacademy.com;http://www.cs.gmu.edu/~antonis/;http://phontron.com", "dblp": "225/7684;;;148/9479;03/8155", "google_scholar": "iNuUxiwAAAAJ;;;g_G_SNAAAAAJ;wlosgkoAAAAJ", "or_profile": "~Aditi_Chaudhary1;~Arun_Sampath1;~Ashwin_Sheshadri1;~Antonios_Anastasopoulos1;~Graham_Neubig1", "aff": "Google;;kannadaacademy.com;George Mason University;Carnegie Mellon University", "aff_domain": "google.com;;kannadaacademy.com;gmu.edu;cmu.edu", "position": "Researcher;;Researcher;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nchaudhary2023teacher,\ntitle={Teacher Perception of Automatically Extracted Grammar Concepts for L2 Language Learning},\nauthor={Aditi Chaudhary and Arun Sampath and Ashwin Sheshadri and Antonios Anastasopoulos and Graham Neubig},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=newk6aDMRi}\n}", "github": "", "project": "", "reviewers": "61GB;r9y4;iWaN", "site": "https://openreview.net/forum?id=newk6aDMRi", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "4;4;3", "reproducibility": "3;4;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-8544-246X;", "linkedin": ";arun-gopalpura-sampath-0008563/;ashwin-sheshadri-57b7625;;", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Google;Kannada Academy;George Mason University;Carnegie Mellon University", "aff_unique_dep": "Google;;;", "aff_unique_url": "https://www.google.com;http://www.kannadaacademy.com;https://www.gmu.edu;https://www.cmu.edu", "aff_unique_abbr": "Google;;GMU;CMU", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;India" }, { "id": "nmSvzxwfRZ", "title": "FinePrompt: Unveiling the Role of Finetuned Inductive Bias on Compositional Reasoning in GPT-4", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Compositional reasoning across texts has been a long-standing challenge in natural language processing. With large language models like GPT-4 taking over the field, prompting techniques such as chain-of-thought (CoT) were proposed to unlock compositional, multi-step reasoning capabilities of LLMs. Despite their success, the prompts demand significant human effort to discover and validate them. Our work draws attention to the idea of transferring task-specific inductive biases from finetuned models to prompts, as a way of improving GPT-4's compositional reasoning capabilities. To leverage these inductive biases, we formulate prompt templates to ease the transfer of inductive biases. The experimental results on multi-hop question answering and numerical reasoning over text show that our proposed prompt scheme shows competitive zero-shot and few-shot performances compared to existing prompts on complicated reasoning tasks, highlighting the importance of adopting the validated biases of the previous paradigm.", "keywords": "Large Language Model;Prompt Learning;Fine-tuning;Compositional Reasoning;Question Answering", "primary_area": "", "supplementary_material": "", "author": "Jeonghwan Kim;Giwon Hong;Sung-Hyon Myaeng;Joyce Jiyoung Whang", "authorids": "~Jeonghwan_Kim2;~Giwon_Hong1;~Sung-Hyon_Myaeng1;~Joyce_Jiyoung_Whang2", "gender": "M;M;M;F", "homepage": "https://wjdghks950.github.io/;https://honggiwon.github.io/;http://ir.kaist.ac.kr/member/professor/;http://bdi-lab.kaist.ac.kr/", "dblp": ";;;121/4230", "google_scholar": "CcnGNN8AAAAJ;uBu5iKIAAAAJ;6pdKebMAAAAJ;TLrKglQAAAAJ", "or_profile": "~Jeonghwan_Kim2;~Giwon_Hong1;~Sung-Hyon_Myaeng1;~Joyce_Jiyoung_Whang2", "aff": "University of Illinois, Urbana Champaign;Korea Advanced Institute of Science & Technology;;KAIST", "aff_domain": "illinois.edu;kaist.ac.kr;;kaist.ac.kr", "position": "PhD student;Researcher;;Assistant Professor", "bibtex": "@inproceedings{\nkim2023fineprompt,\ntitle={FinePrompt: Unveiling the Role of Finetuned Inductive Bias on Compositional Reasoning in {GPT}-4},\nauthor={Jeonghwan Kim and Giwon Hong and Sung-Hyon Myaeng and Joyce Jiyoung Whang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=nmSvzxwfRZ}\n}", "github": "", "project": "", "reviewers": "wpwG;fz2V;cDaD", "site": "https://openreview.net/forum?id=nmSvzxwfRZ", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;3", "excitement": "3;3;3", "reproducibility": "3;5;4", "correctness": "2;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0002-7277-2168;;;0000-0002-4773-3194", "linkedin": ";giwon-hong-101337201/;;", "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Illinois Urbana-Champaign;Korea Advanced Institute of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://www.kaist.ac.kr", "aff_unique_abbr": "UIUC;KAIST", "aff_campus_unique_index": "0", "aff_campus_unique": "Urbana-Champaign;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "United States;South Korea" }, { "id": "nmnPI4eNuh", "title": "DeSIQ: Towards an Unbiased, Challenging Benchmark for Social Intelligence Understanding", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Social intelligence is essential for understanding and reasoning about human expressions, intents and interactions. \nOne representative benchmark for its study is Social Intelligence Queries (Social-IQ), a dataset of multiple-choice questions on videos of complex social interactions. \nWe define a comprehensive methodology to study the soundness of Social-IQ, as the soundness of such benchmark datasets is crucial to the investigation of the underlying research problem. \nWe define a comprehensive methodology to study the soundness of Social-IQ, as the soundness of such benchmark datasets is crucial to the investigation of the underlying research problem. \nOur analysis reveals that Social-IQ contains substantial biases, which can be exploited by a moderately strong language model to learn spurious correlations to achieve perfect performance without being given the context or even the question. \nWe introduce DeSIQ, a new challenging dataset, constructed by applying simple perturbations to Social-IQ. \nOur empirical analysis shows De-SIQ significantly reduces the biases in the original Social-IQ dataset. \nFurthermore, we examine and shed light on the effect of model size, model style, learning settings, commonsense knowledge, and multi-modality on the new benchmark performance. \nOur new dataset, observations and findings open up important research questions for the study of social intelligence.", "keywords": "Question Answering; Social Intelligence; Multimodal Learning", "primary_area": "", "supplementary_material": "", "author": "Xiao-Yu Guo;Yuan-Fang Li;Gholamreza Haffari", "authorids": "~Xiao-Yu_Guo1;~Yuan-Fang_Li1;~Gholamreza_Haffari2", "gender": ";M;", "homepage": ";https://users.monash.edu.au/~yli/;", "dblp": ";20/2537;", "google_scholar": ";https://scholar.google.com.tw/citations?user=wufXO1kAAAAJ;", "or_profile": "~Xiao-Yu_Guo1;~Yuan-Fang_Li1;~Gholamreza_Haffari2", "aff": ";Monash University;", "aff_domain": ";monash.edu;", "position": ";Associate Professor;", "bibtex": "@inproceedings{\nguo2023desiq,\ntitle={De{SIQ}: Towards an Unbiased, Challenging Benchmark for Social Intelligence Understanding},\nauthor={Xiao-Yu Guo and Yuan-Fang Li and Gholamreza Haffari},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=nmnPI4eNuh}\n}", "github": "", "project": "", "reviewers": "BsAc;tCWT;hZcY", "site": "https://openreview.net/forum?id=nmnPI4eNuh", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "4;4;4", "reproducibility": "3;3;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0", "aff_unique_norm": "Monash University", "aff_unique_dep": "", "aff_unique_url": "https://www.monash.edu", "aff_unique_abbr": "Monash", "aff_country_unique_index": "0", "aff_country_unique": "Australia" }, { "id": "nntsSuRSPb", "title": "TextMixer: Mixing Multiple Inputs for Privacy-Preserving Inference", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Pre-trained language models (PLMs) are often deployed as cloud services, enabling users to upload textual data and perform inference remotely. \nHowever, users' personal text often contains sensitive information, and sharing such data directly with the service providers can lead to serious privacy leakage.\nTo address this problem, we introduce a novel privacy-preserving inference framework called \\textbf{\\textit{MixPi}}, which prevents plaintext leakage during the inference phase. \nInspired by $k$-anonymity, MixPi aims to obfuscate a user's private input by mixing it with multiple other inputs, thereby confounding potential privacy attackers. \nTo achieve this, our approach involves: (1) proposing a novel encryption module, Privacy Mixer, which encrypts input from three distinct dimensions: mixing, representation, and position. \n(2) adopting a pre-trained Multi-input Multi-output network to handle mixed representations and obtain multiple predictions. \n(3) employing a Privacy Demixer to ensure only the user can decrypt the real output among the multiple predictions.\nFurthermore, we explore different ways to automatically generate synthetic inputs required for mixing.\nExperimental results on token and sentence classification tasks demonstrate that MixPi greatly surpasses existing privacy-preserving methods in both performance and privacy.", "keywords": "Privacy-preserving Inference;Multi-input Multi-output network", "primary_area": "", "supplementary_material": "", "author": "Xin Zhou;Yi Lu;Ruotian Ma;Tao Gui;Qi Zhang;Xuanjing Huang", "authorids": "~Xin_Zhou6;~Yi_Lu7;~Ruotian_Ma1;~Tao_Gui1;~Qi_Zhang8;~Xuanjing_Huang1", "gender": ";;F;M;M;F", "homepage": ";;;;http://qizhang.info;https://xuanjing-huang.github.io/", "dblp": "05/3403-12;;246/3164;135/6973;52/323-1;05/6735-1", "google_scholar": "8AWfEb0AAAAJ;;lD66qJYAAAAJ;;XfqR3yYAAAAJ;RGsMgZA4H78C", "or_profile": "~Xin_Zhou6;~Yi_Lu7;~Ruotian_Ma1;~Tao_Gui1;~Qi_Zhang8;~Xuanjing_Huang1", "aff": "Fudan University;;Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "PhD student;;PhD student;Assistant Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nzhou2023textmixer,\ntitle={TextMixer: Mixing Multiple Inputs for Privacy-Preserving Inference},\nauthor={Xin Zhou and Yi Lu and Ruotian Ma and Tao Gui and Qi Zhang and Xuanjing Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=nntsSuRSPb}\n}", "github": "", "project": "", "reviewers": "qUde;ekHf;hBPv", "site": "https://openreview.net/forum?id=nntsSuRSPb", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;2;4", "excitement": "3;4;2", "reproducibility": "3;3;3", "correctness": "4;4;1", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0001-9197-9426", "linkedin": ";;;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "noEKNSB8Zq", "title": "\u201cKelly is a Warm Person, Joseph is a Role Model\u201d: Gender Biases in LLM-Generated Reference Letters", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large Language Models (LLMs) have recently emerged as an effective tool to assist individuals in writing various types of content, including professional documents such as recommendation letters. Though bringing convenience, this application also introduces unprecedented fairness concerns. Model-generated reference letters might be directly used by users in professional scenarios. If underlying biases exist in these model-constructed letters, using them without scrutinization could lead to direct societal harms, such as sabotaging application success rates for female applicants. In light of this pressing issue, it is imminent and necessary to comprehensively study fairness issues and associated harms in this real-world use case. In this paper, we critically examine gender biases in LLM-generated reference letters. Drawing inspiration from social science findings, we design evaluation methods to manifest biases through 2 dimensions: (1) biases in language style and (2) biases in lexical content. We further investigate the extent of bias propagation by analyzing the hallucination bias of models, a term that we define to be bias exacerbation in model-hallucinated contents. Through benchmarking evaluation on 2 popular LLMs- ChatGPT and Alpaca, we reveal significant gender biases in LLM-generated recommendation letters. Our findings not only warn against using LLMs for this application without scrutinization, but also illuminate the importance of thoroughly studying hidden biases and harms in LLM-generated professional documents.", "keywords": "fairness;reference letter generation;LLMs", "primary_area": "", "supplementary_material": "", "author": "Yixin Wan;George Pu;Jiao Sun;Aparna Garimella;Kai-Wei Chang;Nanyun Peng", "authorids": "~Yixin_Wan1;~George_Pu2;~Jiao_Sun1;~Aparna_Garimella1;~Kai-Wei_Chang1;~Nanyun_Peng1", "gender": "F;M;;F;M;F", "homepage": "https://scholar.google.com/citations?user=hZPIICQAAAAJ&hl=en;https://georgepu1.github.io/;https://sunjiao123sun.github.io/;https://research.adobe.com/person/aparna-garimella/;http://kwchang.net;https://violetpeng.github.io/", "dblp": "320/5376;;;183/5034.html;18/2428;117/4036", "google_scholar": "hZPIICQAAAAJ;;;Q4PJyXIAAAAJ;fqDBtzYAAAAJ;XxRXvX0AAAAJ", "or_profile": "~Yixin_Wan1;~George_Pu2;~Jiao_Sun1;~Aparna_Garimella1;~Kai-Wei_Chang1;~Nanyun_Peng1", "aff": "University of California, Los Angeles;University of California, Los Angeles;University of Southern California;Adobe Research;Amazon;University of California, Los Angeles", "aff_domain": "ucla.edu;ucla.edu;usc.edu;adobe.com;amazon.com;ucla.edu", "position": "PhD student;MS student;PhD student;Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nwan2023kelly,\ntitle={{\\textquotedblleft}Kelly is a Warm Person, Joseph is a Role Model{\\textquotedblright}: Gender Biases in {LLM}-Generated Reference Letters},\nauthor={Yixin Wan and George Pu and Jiao Sun and Aparna Garimella and Kai-Wei Chang and Nanyun Peng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=noEKNSB8Zq}\n}", "github": "", "project": "", "reviewers": "rSCW;kHWV;yJSC", "site": "https://openreview.net/forum?id=noEKNSB8Zq", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;3;4", "reproducibility": "4;3;2", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0001-5365-0072;", "linkedin": "elaine-yixin-wan-8032b8136/;https://linkedin.com/in/georgenpu;;aparna-garimella-639738110/;kai-wei-chang-41239040;", "aff_unique_index": "0;0;1;2;3;0", "aff_unique_norm": "University of California, Los Angeles;University of Southern California;Adobe;Amazon", "aff_unique_dep": ";;Adobe Research;Amazon.com, Inc.", "aff_unique_url": "https://www.ucla.edu;https://www.usc.edu;https://research.adobe.com;https://www.amazon.com", "aff_unique_abbr": "UCLA;USC;Adobe;Amazon", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "noIvPGG8P1", "title": "Search Augmented Instruction Learning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models (LLMs) have been significantly improved by instruction fine-tuning, but still lack transparency and the ability to utilize up-to-date knowledge and information. In this work, we propose search-augmented instruction learning (SAIL), which grounds the language generation and instruction following abilities on complex search results generated by in-house and external search engines. With an instruction tuning corpus, we collect search results for each training case from different search APIs and domains, and construct a new search-grounded training set containing (instruction, grounding information, response) triplets. We then fine-tune the LLaMA-7B model on the constructed training set. Since the collected results contain unrelated and disputing languages, the model needs to learn to ground on trustworthy search results, filter out distracting passages, and generate the target response. The search result-denoising process entails explicit trustworthy information selection and multi-hop reasoning, since the retrieved passages might be informative but not contain the instruction-following answer. Experiments show that the fine-tuned SAIL-7B model has a strong instruction-following ability, and it performs significantly better on transparency-sensitive tasks, including open-ended question answering and fact checking.", "keywords": "large language model;instruction tuning;question answering", "primary_area": "", "supplementary_material": "", "author": "Hongyin Luo;Tianhua Zhang;Yung-Sung Chuang;Yuan Gong;Yoon Kim;Xixin Wu;Helen M. Meng;James R. Glass", "authorids": "~Hongyin_Luo1;~Tianhua_Zhang2;~Yung-Sung_Chuang1;~Yuan_Gong3;~Yoon_Kim1;~Xixin_Wu1;~Helen_M._Meng1;~James_R._Glass1", "gender": "M;F;M;M;;;F;", "homepage": ";;https://people.csail.mit.edu/yungsung/;;https://people.csail.mit.edu/yoonkim/;https://www1.se.cuhk.edu.hk/~wuxx/;http://www.se.cuhk.edu.hk/people/academic-staff/prof-meng-mei-ling-helen/;", "dblp": "147/4317;01/8403;64/3095;;;125/2836;92/3270;", "google_scholar": ";https://scholar.google.com.hk/citations?user=dEfp5vQAAAAJ;3ar1DOwAAAAJ;MuhvvOkAAAAJ;n_ts4eYAAAAJ;;;", "or_profile": "~Hongyin_Luo1;~Tianhua_Zhang2;~Yung-Sung_Chuang1;~Yuan_Gong3;~Yoon_Kim1;~Xixin_Wu1;~Helen_M._Meng1;~James_R._Glass1", "aff": "Massachusetts Institute of Technology;Chinese University of Hong Kong, The Chinese University of Hong Kong;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Massachusetts Institute of Technology;The Chinese University of Hong Kong;The Chinese University of Hong Kong;", "aff_domain": "mit.edu;se.cuhk.edu.hk;mit.edu;mit.edu;mit.edu;cuhk.edu.hk;cuhk.edu.hk;", "position": "Postdoc;PhD student;PhD student;Postdoc;Assistant Professor;Research Assistant Professor;Full Professor;", "bibtex": "@inproceedings{\nluo2023search,\ntitle={Search Augmented Instruction Learning},\nauthor={Hongyin Luo and Tianhua Zhang and Yung-Sung Chuang and Yuan Gong and Yoon Kim and Xixin Wu and Helen M. Meng and James R. Glass},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=noIvPGG8P1}\n}", "github": "", "project": "", "reviewers": "ZXCP;o9nE;LADc", "site": "https://openreview.net/forum?id=noIvPGG8P1", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "4;4;2", "reproducibility": "3;4;4", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 9, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-1723-5063;;;;;", "linkedin": ";;yschuang;;;;;", "aff_unique_index": "0;1;0;0;0;1;1", "aff_unique_norm": "Massachusetts Institute of Technology;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "https://web.mit.edu;https://www.cuhk.edu.hk", "aff_unique_abbr": "MIT;CUHK", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;0;0;1;1", "aff_country_unique": "United States;China" }, { "id": "noPuQXVx8Y", "title": "Exploring the Sensitivity of LLMs' Decision-Making Capabilities: Insights from Prompt Variations and Hyperparameters", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "The advancement of Large Language Models (LLMs) has led to their widespread use across a broad spectrum of tasks, including decision-making. Prior studies have compared the decision-making abilities of LLMs with those of humans from a psychological perspective. However, these studies have not always properly accounted for the sensitivity of LLMs\u2019 behavior to hyperparameters and variations in the prompt. In this study, we examine LLMs\u2019 performance on the Horizon decision-making task studied by Binz and Schulz (2023), analyzing how LLMs respond to variations in prompts and hyperparameters. By experimenting on three OpenAI language models possessing different capabilities, we observe that the decision-making abilities fluctuate based on the input prompts and temperature settings. Contrary to previous findings, language models display a human-like exploration\u2013exploitation tradeoff after simple adjustments to the prompt.", "keywords": "Language Models;Decision-Making;Cognitive Psychology;Chain of Thought", "primary_area": "", "supplementary_material": "", "author": "Manikanta Loya;Divya Anand Sinha;Richard Futrell", "authorids": "~Manikanta_Loya1;~Divya_Anand_Sinha1;~Richard_Futrell2", "gender": "M;M;Not Specified", "homepage": "https://manikanta-72.github.io/;;http://socsci.uci.edu/~rfutrell", "dblp": ";;169/3172", "google_scholar": "0i8jhtwAAAAJ;;BzI4ynUAAAAJ", "or_profile": "~Manikanta_Loya1;~Divya_Anand_Sinha1;~Richard_Futrell2", "aff": "University of California, Irvine;Donald Bren School of Information and Computer Sciences, University of California, Irvine;University of California, Irvine", "aff_domain": "uci.edu;ics.uci.edu;uci.edu", "position": "MS student;MS student;Associate Professor", "bibtex": "@inproceedings{\nloya2023exploring,\ntitle={Exploring the Sensitivity of {LLM}s' Decision-Making Capabilities: Insights from Prompt Variations and Hyperparameters},\nauthor={Manikanta Loya and Divya Anand Sinha and Richard Futrell},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=noPuQXVx8Y}\n}", "github": "", "project": "", "reviewers": "nAFL;AUjq;57ap;zACN", "site": "https://openreview.net/forum?id=noPuQXVx8Y", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;5;2;4", "excitement": "4;4;3;2", "reproducibility": "3;3;4;5", "correctness": "4;4;2;2", "rating_avg": 3.0, "confidence_avg": 3.5, "excitement_avg": 3.25, "reproducibility_avg": 3.75, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-0792-0948;;", "linkedin": "manikantaloya/;divya-anand-sinha-837024112/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Irvine", "aff_unique_dep": "", "aff_unique_url": "https://www.uci.edu", "aff_unique_abbr": "UCI", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Irvine", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "noUf45O1PX", "title": "BanLemma: A Word Formation Dependent Rule and Dictionary Based Bangla Lemmatizer", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Lemmatization holds significance in both natural language processing (NLP) and linguistics, as it effectively decreases data density and aids in comprehending contextual meaning. However, due to the highly inflected nature and morphological richness, lemmatization in Bangla text poses a complex challenge. In this study, we propose linguistic rules for lemmatization and utilize a dictionary along with the rules to design a lemmatizer specifically for Bangla. Our system aims to lemmatize words based on their parts of speech class within a given sentence. Unlike previous rule-based approaches, we analyzed the suffix marker occurrence according to the morpho-syntactic values and then utilized sequences of suffix markers instead of entire suffixes. To develop our rules, we analyze a large corpus of Bangla text from various domains, sources, and time periods to observe the word formation of inflected words. The lemmatizer achieves an accuracy of 96.36% when tested against a manually annotated test dataset by trained linguists and demonstrates competitive performance on three previously published Bangla lemmatization datasets. We are making the code and datasets publicly available at https://github.com/eblict-gigatech/BanLemma in order to contribute to the further advancement of Bangla NLP.", "keywords": "Computational Linguistics;Morphology;Bangla NLP;Lemmatization", "primary_area": "", "supplementary_material": "", "author": "Sadia Afrin;Md. Shahad Mahmud Chowdhury;Md. Ekramul Islam;Faisal Ahamed Khan;Labib Imam Chowdhury;Md. Motahar Mahtab;Nazifa Nuha Chowdhury;Massud Forkan;Neelima Kundu;Hakim Arif;Mohammad Mamun Or Rashid;Mohammad Ruhul Amin;Nabeel Mohammed", "authorids": "~Sadia_Afrin1;~Md._Shahad_Mahmud_Chowdhury1;~Md._Ekramul_Islam1;~Faisal_Ahamed_Khan1;~Labib_Imam_Chowdhury1;~Md._Motahar_Mahtab3;~Nazifa_Nuha_Chowdhury1;~Massud_Forkan2;~Neelima_Kundu1;~Hakim_Arif1;~Mohammad_Mamun_Or_Rashid1;~Mohammad_Ruhul_Amin1;~Nabeel_Mohammed1", "gender": "F;M;M;M;M;M;F;M;M;M;M;M;F", "homepage": ";https://shahadmahmud.com;;;http://labibchowdhury.com;;;https://www.du.ac.bd/faculty/faculty_details/COMD/256;;https://ruhulsbu.github.io;http://ece.northsouth.edu/people/dr-nabeel-mohammed/;https://mdmotaharmahtab.github.io/;", "dblp": "167/8401.html;;;319/4243;;360/0865.html;360/1123;360/0394.html;180/5786;193/0290.html;127/2798;330/3543;360/0960", "google_scholar": ";https://scholar.google.com/citations?hl=en;TmiwxJwAAAAJ;jAk1N_YAAAAJ;wV_lM9wAAAAJ;https://scholar.google.com/citations?view_op=new_articles;giyM9YAAAAAJ;-9Pa2e4AAAAJ;jvrb40cAAAAJ;N_yWGjIAAAAJ;https://scholar.google.com.au/citations?hl=en;https://scholar.google.com/citations?hl=en;", "or_profile": "~Sadia_Afrin1;~Md._Shahad_Mahmud_Chowdhury1;~Md._Ekramul_Islam1;~Faisal_Ahamed_Khan1;~Labib_Imam_Chowdhury1;~Massud_Forkan2;~Neelima_Kundu1;~Hakim_Arif1;~Mohammad_Mamun_Or_Rashid1;~Mohammad_Ruhul_Amin1;~Nabeel_Mohammed1;~MD._Motahar_Mahtab2;~Nazifa_Nuha_Chowdhury2", "aff": "Giga Tech Limited;Giga Tech Limited;Giga Tech Limited.;Giga Tech Limited;Giga Tech Limited;Giga Tech Limited;gigatech ltd;University of Dhaka;Jahangirnagar University;Fordham University;North South University;GIGATECH, BEXIMCO ;University of Dhaka", "aff_domain": "gigatechltd.com;gigatechltd.com;gigatechltd.com;gigatechltd.com;gigatechltd.com;gigatechltd.com;gigatechltd.com;du.ac.bd;juniv.edu;fordham.edu;northsouth.edu;gigatechltd.com;du.ac.bd", "position": "Researcher;Researcher;Junior Machine Learning Engineeer;Head of R&D;Machine Learning Engineer;Associate Linguist;Linguist ;Full Professor;Assistant Professor;Assistant Professor;Associate Professor;Machine Learning Engineer;Researcher", "bibtex": "@inproceedings{\nafrin2023banlemma,\ntitle={BanLemma: A Word Formation Dependent Rule and Dictionary Based Bangla Lemmatizer},\nauthor={Sadia Afrin and Md. Shahad Mahmud Chowdhury and Md. Ekramul Islam and Faisal Ahamed Khan and Labib Imam Chowdhury and Md. Motahar Mahtab and Nazifa Nuha Chowdhury and Massud Forkan and Neelima Kundu and Hakim Arif and Mohammad Mamun Or Rashid and Mohammad Ruhul Amin and Nabeel Mohammed},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=noUf45O1PX}\n}", "github": "", "project": "", "reviewers": "ATsq;N1LB;5hHh", "site": "https://openreview.net/forum?id=noUf45O1PX", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "3;4;4", "reproducibility": "2;3;4", "correctness": "2;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 13, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0007-6406-4619;0000-0001-8076-2803;0000-0001-8494-1809;0000-0001-7377-1226;;;;;0000-0001-6517-2039;0000-0001-6540-3415;0000-0002-7661-3570;0000-0003-1075-1783;", "linkedin": "sadia-afrin92882/;shahad-mahmud/;ekramul-islam-b824a1176/;;;massud-forkan-854b2b19a;;arif-hakim-86929683/?originalSubdomain=bd;;shajibsust/;;motahar-mahtab/;", "aff_unique_index": "0;0;0;0;0;0;1;2;3;4;5;6;2", "aff_unique_norm": "Giga Tech Limited;Gigatech Limited;University of Dhaka;Jahangirnagar University;Fordham University;North South University;GIGATECH", "aff_unique_dep": ";;;;;;", "aff_unique_url": ";;https://www.du.ac.bd;http://www.ju.ac.bd;https://www.fordham.edu;https://www.northsouth.edu/;", "aff_unique_abbr": ";;DU;JU;Fordham;NSU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1;2;1;1;1", "aff_country_unique": ";Bangladesh;United States" }, { "id": "nsupkM0ppH", "title": "Watermarking PLMs on Classification Tasks by Combining Contrastive Learning with Weight Perturbation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large pre-trained language models (PLMs) have achieved remarkable success, making them highly valuable intellectual property due to their expensive training costs. Consequently, model watermarking, a method developed to protect the intellectual property of neural models, has emerged as a crucial yet underexplored technique. \nThe problem of watermarking PLMs has remained unsolved since the parameters of PLMs will be updated when fine-tuned on downstream datasets, and then embedded watermarks could be removed easily due to the catastrophic forgetting phenomenon. \nThis study investigates the feasibility of watermarking PLMs by embedding backdoors that can be triggered by specific inputs. \n\nWe employ contrastive learning during the watermarking phase, allowing the representations of specific inputs to be isolated from others and mapped to a particular label after fine-tuning. Moreover, we demonstrate that by combining weight perturbation with the proposed method, watermarks can be embedded in a flatter region of the loss landscape, thereby increasing their robustness to watermark removal.\n\nExtensive experiments on multiple datasets demonstrate that the embedded watermarks can be robustly extracted without any knowledge about downstream tasks, and with a high success rate.", "keywords": "PLM;Warermarking;backdoor;contrastive learning;weight perturbation", "primary_area": "", "supplementary_material": "", "author": "Chenxi Gu;Xiaoqing Zheng;Jianhan Xu;Muling Wu;Cenyuan Zhang;Chengsong Huang;Hua Cai;Xuanjing Huang", "authorids": "~Chenxi_Gu2;~Xiaoqing_Zheng2;~Jianhan_Xu1;~Muling_Wu1;~Cenyuan_Zhang1;~Chengsong_Huang1;~Hua_Cai1;~Xuanjing_Huang1", "gender": ";;M;;;M;M;F", "homepage": ";;;;;https://chengsong-huang.github.io/;https://www.linkedin.com/in/hua-cai-064103b6/;https://xuanjing-huang.github.io/", "dblp": ";;278/1558.html;358/8927;293/9880;211/1188;;05/6735-1", "google_scholar": ";;G_p-oocAAAAJ;;ghu4BZcAAAAJ;https://scholar.google.com/citations?hl=en;;RGsMgZA4H78C", "or_profile": "~Chenxi_Gu2;~Xiaoqing_Zheng2;~Jianhan_Xu1;~Muling_Wu1;~Cenyuan_Zhang1;~Chengsong_Huang1;~Hua_Cai1;~Xuanjing_Huang1", "aff": ";;Fudan University;Fudan University;Fudan University;Fudan University;UniDT;Fudan University", "aff_domain": ";;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;unidt.com;fudan.edu.cn", "position": ";;MS student;MS student;MS student;Undergrad student;Researcher;Full Professor", "bibtex": "@inproceedings{\ngu2023watermarking,\ntitle={Watermarking {PLM}s on Classification Tasks by Combining Contrastive Learning with Weight Perturbation},\nauthor={Chenxi Gu and Xiaoqing Zheng and Jianhan Xu and Muling Wu and Cenyuan Zhang and Chengsong Huang and Hua Cai and Xuanjing Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=nsupkM0ppH}\n}", "github": "", "project": "", "reviewers": "CyZ5;oXB6;NT7H;j5wj", "site": "https://openreview.net/forum?id=nsupkM0ppH", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;4;4;4", "excitement": "3;4;4;2", "reproducibility": "4;4;4;4", "correctness": "3;4;3;3", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 3.25, "reproducibility_avg": 4.0, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0009-0003-1875-6658;;;0000-0002-7738-129X;0000-0001-9197-9426", "linkedin": ";;;;;;;", "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Fudan University;University of Duisburg-Essen", "aff_unique_dep": ";", "aff_unique_url": "https://www.fudan.edu.cn;https://www.uni-due.de", "aff_unique_abbr": "Fudan;UniDT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "China;Germany" }, { "id": "nuLtpgr9l5", "title": "Disfluent Cues for Enhanced Speech Understanding in Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In computational linguistics, the common practice is to \"clean\" disfluent content from spontaneous speech. However, we hypothesize that these disfluencies might serve as more than mere noise, potentially acting as informative cues. We use a range of pre-trained models for a reading comprehension task involving disfluent queries, specifically featuring different types of speech repairs. The findings indicate that certain disfluencies can indeed improve model performance, particularly those stemming from context-based adjustments. However, large-scale language models struggle to handle repairs involving decision-making or the correction of lexical or syntactic errors, suggesting a crucial area for potential improvement. This paper thus highlights the importance of a nuanced approach to disfluencies, advocating for their potential utility in enhancing model performance rather than their removal.", "keywords": "disfluency detection;disfluencies;self-repairs;large language models;interruptions;contextual cues;spontaneous speech", "primary_area": "", "supplementary_material": "", "author": "Morteza Rohanian;Farhad Nooralahzadeh;Omid Rohanian;David A. Clifton;Michael Krauthammer", "authorids": "~Morteza_Rohanian1;~Farhad_Nooralahzadeh1;~Omid_Rohanian1;~David_A._Clifton1;~Michael_Krauthammer1", "gender": "Not Specified;M;Not Specified;M;M", "homepage": "http://eecs.qmul.ac.uk/profiles/rohanianmorteza.html;;;http://www.eng.ox.ac.uk/chi;https://krauthammerlab.ch", "dblp": ";151/8470;205/9192;89/6424;", "google_scholar": ";iCPxe4UAAAAJ;https://scholar.google.co.uk/citations?user=dwWwU0UAAAAJ;;", "or_profile": "~Morteza_Rohanian1;~Farhad_Nooralahzadeh1;~Omid_Rohanian1;~David_A._Clifton1;~Michael_Krauthammer1", "aff": "University of Zurich;ZHAW - Z\u00fcrcher Hochschule f\u00fcr Angewandte Wissenschaften;University of Oxford;University of Oxford;University of Zurich", "aff_domain": "uzh.ch;zhaw.ch;ox.ac.uk;ox.ac.uk;uzh.ch", "position": "Postdoc;Researcher;Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\nrohanian2023disfluent,\ntitle={Disfluent Cues for Enhanced Speech Understanding in Large Language Models},\nauthor={Morteza Rohanian and Farhad Nooralahzadeh and Omid Rohanian and David A. Clifton and Michael Krauthammer},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=nuLtpgr9l5}\n}", "github": "", "project": "", "reviewers": "mj8b;XA8w;wpMJ", "site": "https://openreview.net/forum?id=nuLtpgr9l5", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;5;2", "excitement": "4;1;4", "reproducibility": "4;3;4", "correctness": "4;1;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;;", "aff_unique_index": "0;1;2;2;0", "aff_unique_norm": "University of Zurich;Z\u00fcrcher Hochschule f\u00fcr Angewandte Wissenschaften;University of Oxford", "aff_unique_dep": ";;", "aff_unique_url": "https://www.unizh.ch;https://www.zhaw.ch;https://www.ox.ac.uk", "aff_unique_abbr": "UZH;ZHAW;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "Switzerland;United Kingdom" }, { "id": "nuPp6jdCgg", "title": "Evaluating Large Language Models on Controlled Generation Tasks", "track": "main", "status": "Long Main", "tldr": "", "abstract": "While recent studies have looked into the abilities of large language models in various benchmark tasks, including question generation, reading comprehension, multilingual and etc, there have been few studies looking into the controllability of large language models on generation tasks. We present an extensive analysis of various benchmarks including a sentence planning benchmark with different granularities. After comparing large language models against state-of-the-start finetuned smaller models, we present a spectrum showing large language models falling behind, are comparable, or exceed the ability of smaller models. We conclude that *large language models struggle at meeting fine-grained hard constraints*.", "keywords": "Evaluation;Large Language Model;Analysis", "primary_area": "", "supplementary_material": "", "author": "Jiao Sun;Yufei Tian;Wangchunshu Zhou;Nan Xu;Qian Hu;Rahul Gupta;John Frederick Wieting;Nanyun Peng;Xuezhe Ma", "authorids": "~Jiao_Sun1;~Yufei_Tian1;~Wangchunshu_Zhou1;~Nan_Xu2;~Qian_Hu4;~Rahul_Gupta3;~John_Frederick_Wieting1;~Nanyun_Peng1;~Xuezhe_Ma1", "gender": ";;M;F;M;M;M;F;M", "homepage": "https://sunjiao123sun.github.io/;;https://michaelzhouwang.github.io;https://sites.google.com/site/xunannancy;;;;https://violetpeng.github.io/;https://xuezhemax.github.io/", "dblp": ";;245/8640.html;;;;156/0158;117/4036;127/0230", "google_scholar": ";;UebIjuQAAAAJ;https://scholar.google.co.uk/citations?hl=en;CLleKDAAAAAJ;1CFrm2YAAAAJ;;XxRXvX0AAAAJ;6_MQLIcAAAAJ", "or_profile": "~Jiao_Sun1;~Yufei_Tian1;~Wangchunshu_Zhou1;~Nan_Xu2;~Qian_Hu4;~Rahul_Gupta3;~John_Frederick_Wieting1;~Nanyun_Peng1;~Xuezhe_Ma1", "aff": "University of Southern California;;Department of Computer Science, ETHZ - ETH Zurich;University of Southern California;Amazon;Amazon;Google DeepMind;University of California, Los Angeles;USC/ISI", "aff_domain": "usc.edu;;inf.ethz.ch;usc.edu;amazon.com;amazon.com;google.com;ucla.edu;isi.edu", "position": "PhD student;;PhD student;PhD student;Researcher;Researcher;Researcher;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nsun2023evaluating,\ntitle={Evaluating Large Language Models on Controlled Generation Tasks},\nauthor={Jiao Sun and Yufei Tian and Wangchunshu Zhou and Nan Xu and Qian Hu and Rahul Gupta and John Frederick Wieting and Nanyun Peng and Xuezhe Ma},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=nuPp6jdCgg}\n}", "github": "", "project": "", "reviewers": "Hweb;rXWW;iLd9", "site": "https://openreview.net/forum?id=nuPp6jdCgg", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;4;3", "reproducibility": "3;4;4", "correctness": "4;4;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;;", "linkedin": ";;;https://linkedin.com/in/nan-xu-b52777125;;;;;xuezhe-ma-b5354731", "aff_unique_index": "0;1;0;2;2;3;4;0", "aff_unique_norm": "University of Southern California;ETH Zurich;Amazon;Google;University of California, Los Angeles", "aff_unique_dep": ";Department of Computer Science;Amazon.com, Inc.;Google DeepMind;", "aff_unique_url": "https://www.usc.edu;https://www.ethz.ch;https://www.amazon.com;https://deepmind.com;https://www.ucla.edu", "aff_unique_abbr": "USC;ETHZ;Amazon;DeepMind;UCLA", "aff_campus_unique_index": "0;1;0;0;3", "aff_campus_unique": "Los Angeles;Zurich;;ISI", "aff_country_unique_index": "0;1;0;0;0;2;0;0", "aff_country_unique": "United States;Switzerland;United Kingdom" }, { "id": "nucyYJZS5z", "title": "Lion: Adversarial Distillation of Proprietary Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The practice of transferring knowledge from a sophisticated, proprietary large language model (LLM) to a compact, open-source LLM has garnered considerable attention. Previous works have focused on a unidirectional knowledge distillation way by aligning the responses of the student model with those of the teacher model to a set of instructions. Nevertheless, they overlooked the possibility of incorporating any \"feedback\"--identifying challenging instructions where the student model's performance falls short--to boost the student model's proficiency iteratively. To this end, we propose a novel adversarial distillation framework for a more efficient knowledge transfer. Leveraging the versatile role adaptability of LLMs, we prompt the teacher model to identify \"hard\" instructions and generate new \"hard\" instructions for the student model, creating a three-stage adversarial loop of imitation, discrimination, and generation. By applying this adversarial framework, we successfully transfer knowledge from ChatGPT to a student model (named Lion), using a mere 70k training data. Our results show that Lion-13B not only achieves comparable open-ended generation capabilities to ChatGPT but surpasses conventional state-of-the-art (SOTA) instruction-tuned models like Vicuna-13B by 55.4% in challenging zero-shot reasoning benchmarks such as BIG-Bench Hard (BBH) and 16.7% on AGIEval.", "keywords": "large language models;instruction following;knowledge distillation", "primary_area": "", "supplementary_material": "", "author": "Yuxin Jiang;Chunkit Chan;Mingyang Chen;Wei Wang", "authorids": "~Yuxin_Jiang1;~Chunkit_Chan1;~Mingyang_Chen4;~Wei_Wang55", "gender": "M;M;M;M", "homepage": "https://yjiangcm.github.io/;;http://wei-wang.net;https://chanchunkithkust.github.io", "dblp": "23/6328;125/2319;w/WeiWang00;345/9642", "google_scholar": "QnfcEEcAAAAJ;;wLtu3FYAAAAJ;5ateiVsAAAAJ", "or_profile": "~Yuxin_Jiang1;~Mingyang_Chen4;~Wei_Wang55;~Chun_Kit_Chan1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;ust.hk;ust.hk;hkust.edu", "position": "PhD student;PhD student;Full Professor;MS student", "bibtex": "@inproceedings{\njiang2023lion,\ntitle={Lion: Adversarial Distillation of Proprietary Large Language Models},\nauthor={Yuxin Jiang and Chunkit Chan and Mingyang Chen and Wei Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=nucyYJZS5z}\n}", "github": "", "project": "", "reviewers": "XLBf;nhXy;THki", "site": "https://openreview.net/forum?id=nucyYJZS5z", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "4;3;5", "reproducibility": "4;4;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-0766-9762;0000-0002-3464-0976;0000-0002-1568-2396;0000-0002-1520-4597", "linkedin": ";;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "nw6JxagUNG", "title": "Methodological Insights in Detecting Subtle Semantic Shifts with Contextualized and Static Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In this paper, we investigate automatic detection of subtle semantic shifts between social communities of different political convictions in Dutch and English. We perform a methodological study comparing methods using static and contextualized language models. We investigate the impact of specializing contextualized models through fine-tuning on target corpora, word sense disambiguation and sentiment. We furthermore propose a new approach using masked token prediction, that relies on behavioral information, specifically the most probable substitutions, instead of geometrical comparison of representations. Our results show that methods using static models and our masked token prediction method can detect differences in connotation of politically loaded terms, whereas methods that rely on measuring the distance between contextualized representations are not providing clear signals, even in synthetic scenarios of extreme shifts.", "keywords": "semantic shift detection;contextualized embeddings;static embeddings;political communities", "primary_area": "", "supplementary_material": "", "author": "Sanne Hoeken;\u00d6zge Alacam;Antske Fokkens;Pia Sommerauer", "authorids": "~Sanne_Hoeken1;~\u00d6zge_Alacam1;~Antske_Fokkens1;~Pia_Sommerauer1", "gender": "F;F;F;F", "homepage": "https://ekvv.uni-bielefeld.de/pers_publ/publ/PersonDetail.jsp?personId=398620434;;https://piasommerauer.github.io/;", "dblp": "353/1150;41/9013;220/0959;68/7180.html", "google_scholar": "wrUOBskAAAAJ;El5nmZUAAAAJ;5UxZbeAAAAAJ;1_O0QmMAAAAJ", "or_profile": "~Sanne_Hoeken1;~Antske_Fokkens1;~Pia_Sommerauer1;~Ozge_Alacam1", "aff": "Universit\u00e4t Bielefeld;VU University Amsterdam;Vrije Universiteit Amsterdam;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen", "aff_domain": "uni-bielefeld.de;vu.nl;vu.nl;lmu.de", "position": "PhD student;Full Professor;Assistant Professor;Postdoc", "bibtex": "@inproceedings{\nhoeken2023methodological,\ntitle={Methodological Insights in Detecting Subtle Semantic Shifts with Contextualized and Static Language Models},\nauthor={Sanne Hoeken and {\\\"O}zge Alacam and Antske Fokkens and Pia Sommerauer},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=nw6JxagUNG}\n}", "github": "", "project": "", "reviewers": "af38;j7jk;ykvj", "site": "https://openreview.net/forum?id=nw6JxagUNG", "pdf_size": 0, "rating": "5;5;5", "confidence": "2;3;4", "excitement": "3;3;4", "reproducibility": "3;3;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-6628-6916;0000-0003-3593-1465;0000-0003-1055-8334", "linkedin": ";;;", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Universit\u00e4t Bielefeld;VU University Amsterdam;Vrije Universiteit Amsterdam;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.uni-bielefeld.de/;https://www.vu.nl;https://www.vu.nl;https://www.lmu.de", "aff_unique_abbr": "Uni Bielefeld;VU;VU Amsterdam;LMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Amsterdam", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "Germany;Netherlands" }, { "id": "nwTqq0XW3w", "title": "Language and Mental Health: Measures of Emotion Dynamics from Text as Linguistic Biosocial Markers", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Research in psychopathology has shown that, at an aggregate level, the patterns of emotional change over time---emotion dynamics---are indicators of one's mental health. One's patterns of emotion change have traditionally been determined through self-reports of emotions; however, there are known issues with accuracy, bias, and convenience. Recent approaches to determining emotion dynamics from one's everyday utterances, addresses many of these concerns, but it is not yet known whether these measures of utterance emotion dynamics (UED) correlate with mental health diagnoses. Here, for the first time, we study the relationship between tweet emotion dynamics and mental health disorders. We find that each of the UED metrics studied varied by the user's self-disclosed diagnosis. For example: average valence was significantly higher (i.e., more positive text) in the control group compared to users with ADHD, MDD, and PTSD. Valence variability was significantly lower in the control group compared to ADHD, depression, bipolar disorder, MDD, PTSD, and OCD but not PPD. Rise and recovery rates of valence also exhibited significant differences from the control. This work provides important early evidence for how linguistic cues pertaining to emotion dynamics can play a crucial role as biosocial markers for mental illnesses and aid in the understanding, diagnosis, and management of mental health disorders.", "keywords": "Utterance Emotion Dynamics;Mental Health;Social Media;Sentiment Analysis;Emotion Arcs;Emotional Reactivity;Lexicons", "primary_area": "", "supplementary_material": "", "author": "Daniela Teodorescu;Tiffany Cheng;Alona Fyshe;Saif M. Mohammad", "authorids": "~Daniela_Teodorescu1;~Tiffany_Cheng1;~Alona_Fyshe1;~Saif_M._Mohammad1", "gender": ";F;F;M", "homepage": ";;http://webdocs.cs.ualberta.ca/~alona/;http://saifmohammad.com", "dblp": ";;30/3660;58/380", "google_scholar": ";NxmrsXEAAAAJ;https://scholar.google.ca/citations?user=Vw8z7qwAAAAJ;zJHymXh9EVwC", "or_profile": "~Daniela_Teodorescu1;~Tiffany_Cheng1;~Alona_Fyshe1;~Saif_M._Mohammad1", "aff": ";Carleton University;University of Alberta;National Research Council Canada", "aff_domain": ";carleton.ca;ualberta.ca;nrc-cnrc.gc.ca", "position": ";Researcher;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nteodorescu2023language,\ntitle={Language and Mental Health: Measures of Emotion Dynamics from Text as Linguistic Biosocial Markers},\nauthor={Daniela Teodorescu and Tiffany Cheng and Alona Fyshe and Saif M. Mohammad},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=nwTqq0XW3w}\n}", "github": "", "project": "", "reviewers": "Vx9h;9kEM;Gsi5", "site": "https://openreview.net/forum?id=nwTqq0XW3w", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;4", "excitement": "4;4;4", "reproducibility": "4;3;3", "correctness": "4;3;3", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-6209-3992;0000-0003-4367-0306;0000-0003-2716-7516", "linkedin": ";;;", "aff_unique_index": "0;1;2", "aff_unique_norm": "Carleton University;University of Alberta;National Research Council Canada", "aff_unique_dep": ";;", "aff_unique_url": "https://carleton.ca;https://www.ualberta.ca;https://www.nrc-cnrc.gc.ca", "aff_unique_abbr": "Carleton;UAlberta;NRC-CNRC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Canada" }, { "id": "o2HBfgY20b", "title": "API-Bank: A Comprehensive Benchmark for Tool-Augmented LLMs", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recent research has demonstrated that Large Language Models (LLMs) can enhance their capabilities by utilizing external tools. However, three pivotal questions remain unanswered: (1) How effective are current LLMs in utilizing tools? (2) How can we enhance LLMs' ability to utilize tools? (3) What obstacles need to be overcome to leverage tools? To address these questions, we introduce API-Bank, a groundbreaking benchmark, specifically designed for tool-augmented LLMs. For the first question, we develop a runnable evaluation system consisting of 73 API tools. We annotate 314 tool-use dialogues with 753 API calls to assess the existing LLMs' capabilities in planning, retrieving, and calling APIs. For the second question, we construct a comprehensive training set containing 1,888 tool-use dialogues from 2,138 APIs spanning 1,000 distinct domains. Using this dataset, we train Lynx, a tool-augmented LLM initialized from Alpaca. Experimental results demonstrate that GPT-3.5 exhibits improved tool utilization compared to GPT-3, while GPT-4 excels in planning. However, there is still significant potential for further improvement. Moreover, Lynx surpasses Alpaca's tool utilization performance by more than 26 pts and approaches the effectiveness of GPT-3.5. Through error analysis, we highlight the key challenges for future research in this field to answer the third question.", "keywords": "Tool-Augmented LLMs;Large Language Model;Benchmark", "primary_area": "", "supplementary_material": "", "author": "Minghao Li;Yingxiu Zhao;Bowen Yu;Feifan Song;Hangyu Li;Haiyang Yu;Zhoujun Li;Fei Huang;Yongbin Li", "authorids": "~Minghao_Li1;~Yingxiu_Zhao1;~Bowen_Yu3;~Feifan_Song1;~Hangyu_Li4;~Haiyang_Yu3;~Zhoujun_Li1;~Fei_Huang1;~Yongbin_Li2", "gender": "M;F;M;;;M;M;;M", "homepage": "https://minghao.li/;;https://yubowen-ph.github.io/;;;;;;https://yongbin-li.github.io/", "dblp": "91/1271;;95/10266-2.html;;;90/6643-3;76/2866-1;;", "google_scholar": "kyr6njcAAAAJ;https://scholar.google.com/citations?hl=en;oHoEp34AAAAJ;;;VhWV-1wAAAAJ;;;xF5VrokAAAAJ", "or_profile": "~Minghao_Li1;~Yingxiu_Zhao1;~Bowen_Yu3;~Feifan_Song1;~Hangyu_Li4;~Haiyang_Yu3;~Zhoujun_Li1;~Fei_Huang1;~Yongbin_Li2", "aff": "Beihang University;Hong Kong University of Science and Technology;Alibaba Group;;;Alibaba Group;Beihang University;;Alibaba Group", "aff_domain": "buaa.edu.cn;ust.hk;alibaba-inc.com;;;alibaba-inc.com;buaa.edu.cn;;alibaba-inc.com", "position": "PhD student;PhD student;Researcher;;;Researcher;Full Professor;;Researcher", "bibtex": "@inproceedings{\nli2023apibank,\ntitle={{API}-Bank: A Comprehensive Benchmark for Tool-Augmented {LLM}s},\nauthor={Minghao Li and Yingxiu Zhao and Bowen Yu and Feifan Song and Hangyu Li and Haiyang Yu and Zhoujun Li and Fei Huang and Yongbin Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=o2HBfgY20b}\n}", "github": "", "project": "", "reviewers": "vaMb;FRbH;1pDE;ZmrS", "site": "https://openreview.net/forum?id=o2HBfgY20b", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;4;4;4", "excitement": "4;4;3;4", "reproducibility": "4;4;4;4", "correctness": "4;4;3;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.75, "reproducibility_avg": 4.0, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-5218-9920;0000-0002-6804-1859;;;;;;", "linkedin": ";;;;;;;;", "aff_unique_index": "0;1;2;2;0;2", "aff_unique_norm": "Beihang University;Hong Kong University of Science and Technology;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "http://www.buaa.edu.cn/;https://www.ust.hk;https://www.alibaba.com", "aff_unique_abbr": "BUAA;HKUST;Alibaba", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "o5LeRFe7VS", "title": "Test-time Augmentation for Factual Probing", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Factual probing is a method that uses prompts to test if a language model ``knows'' certain world knowledge facts.\nA problem in factual probing is that small changes to the prompt can lead to large changes in model output.\nPrevious work aimed to alleviate this problem by optimizing prompts via text mining or fine-tuning.\nHowever, such approaches are relation-specific and do not generalize to unseen relation types.\nHere, we propose to use test-time augmentation (TTA) as a relation-agnostic method for reducing sensitivity to prompt variations by automatically augmenting and ensembling prompts at test time.\nExperiments show improved model calibration, i.e., with TTA, model confidence better reflects prediction accuracy.\nImprovements in prediction accuracy are observed for some models, but for other models, TTA leads to degradation.\nError analysis identifies the difficulty of producing high-quality prompt variations as the main challenge for TTA.", "keywords": "Factual Probing;TTA;Calibration", "primary_area": "", "supplementary_material": "", "author": "Go Kamoda;Benjamin Heinzerling;Keisuke Sakaguchi;Kentaro Inui", "authorids": "~Go_Kamoda1;~Benjamin_Heinzerling1;~Keisuke_Sakaguchi2;~Kentaro_Inui1", "gender": "M;;;M", "homepage": "https://gokamoda.github.io/;https://bheinzerling.github.io/;https://keisuke-sakaguchi.github.io/;http://www.cl.ecei.tohoku.ac.jp/~inui/", "dblp": "359/5554;165/9523;127/0185.html;90/3315", "google_scholar": "6hqExwkAAAAJ;https://scholar.google.co.jp/citations?user=w8pxkWsAAAAJ;6CRBF-MAAAAJ;https://scholar.google.co.jp/citations?user=38_o3-kAAAAJ", "or_profile": "~Go_Kamoda1;~Benjamin_Heinzerling1;~Keisuke_Sakaguchi2;~Kentaro_Inui1", "aff": "Tohoku University;RIKEN;Tohoku University;Tohoku University", "aff_domain": "tohoku.ac.jp;riken.jp;tohoku.ac.jp;tohoku.ac.jp", "position": "MS student;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nkamoda2023testtime,\ntitle={Test-time Augmentation for Factual Probing},\nauthor={Go Kamoda and Benjamin Heinzerling and Keisuke Sakaguchi and Kentaro Inui},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=o5LeRFe7VS}\n}", "github": "", "project": "", "reviewers": "jtoE;HiLF;nPms", "site": "https://openreview.net/forum?id=o5LeRFe7VS", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;2", "excitement": "2;4;3", "reproducibility": "0;3;4", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-6510-604X", "linkedin": ";;;kentaro-inui-52401a31/", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Tohoku University;RIKEN", "aff_unique_dep": ";", "aff_unique_url": "https://www.tohoku.ac.jp;https://www.riken.jp", "aff_unique_abbr": "Tohoku U;RIKEN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Japan" }, { "id": "o5bOK5a9qz", "title": "DemaFormer: Damped Exponential Moving Average Transformer with Energy-Based Modeling for Temporal Language Grounding", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Temporal Language Grounding seeks to localize video moments that semantically correspond to a natural language query. Recent advances employ the attention mechanism to learn the relations between video moments and the text query. However, naive attention might not be able to appropriately capture such relations, resulting in ineffective distributions where target video moments are difficult to separate from the remaining ones. To resolve the issue, we propose an energy-based model framework to explicitly learn moment-query distributions. Moreover, we propose DemaFormer, a novel Transformer-based architecture that utilizes exponential moving average with a learnable damping factor to effectively encode moment-query inputs. Comprehensive experiments on four public temporal language grounding datasets showcase the superiority of our methods over the state-of-the-art baselines.", "keywords": "temporal language grounding;energy-based modeling;exponential-moving average;transformer", "primary_area": "", "supplementary_material": "", "author": "Thong Thanh Nguyen;Xiaobao Wu;Xinshuai Dong;Cong-Duy T Nguyen;See-Kiong Ng;Anh Tuan Luu", "authorids": "~Thong_Thanh_Nguyen1;~Xiaobao_Wu1;~Xinshuai_Dong1;~Cong-Duy_T_Nguyen1;~See-Kiong_Ng1;~Anh_Tuan_Luu2", "gender": "M;;M;M;M;M", "homepage": "https://nguyentthong.github.io/;https://bobxwu.github.io/;https://dongxinshuai.github.io/;https://duyngtr16061999.github.io/;https://www.comp.nus.edu.sg/~ngsk/;https://tuanluu.github.io/", "dblp": "29/5255.html;249/8429;279/6151.html;;00/5480;81/8329.html", "google_scholar": "C2zb0lkAAAAJ;Y1oag4sAAAAJ;A7JyL1sAAAAJ;vIdT3F8AAAAJ;https://scholar.google.com.tw/citations?user=_wsommYAAAAJ;https://scholar.google.com.sg/citations?hl=en", "or_profile": "~Thong_Thanh_Nguyen1;~Xiaobao_Wu1;~Xinshuai_Dong1;~Cong-Duy_T_Nguyen1;~See-Kiong_Ng1;~Anh_Tuan_Luu2", "aff": "National University of Singapore;Nanyang Technological University;Carnegie Mellon University;School of Computer Science and Engineering, Nanyang Technological University;National University of Singapore;Nanyang Technological University", "aff_domain": "nus.edu;ntu.edu.sg;cmu.edu;scse.ntu.edu.sg;nus.edu.sg;ntu.edu.sg", "position": "PhD student;PhD student;PhD student;PhD student;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\nnguyen2023demaformer,\ntitle={DemaFormer: Damped Exponential Moving Average Transformer with Energy-Based Modeling for Temporal Language Grounding},\nauthor={Thong Thanh Nguyen and Xiaobao Wu and Xinshuai Dong and Cong-Duy T Nguyen and See-Kiong Ng and Anh Tuan Luu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=o5bOK5a9qz}\n}", "github": "", "project": "", "reviewers": "SbrT;ecVy;goDS;ZP2j", "site": "https://openreview.net/forum?id=o5bOK5a9qz", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "1;4;3;5", "excitement": "3;4;3;2", "reproducibility": "3;4;3;4", "correctness": "2;4;3;3", "rating_avg": 3.0, "confidence_avg": 3.25, "excitement_avg": 3.0, "reproducibility_avg": 3.5, "correctness_avg": 3.0, "replies_avg": 13, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0001-6565-7511;", "linkedin": ";xiaobao-wu/;;;seekiong/?originalSubdomain=sg;", "aff_unique_index": "0;1;2;1;0;1", "aff_unique_norm": "National University of Singapore;Nanyang Technological University;Carnegie Mellon University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;https://www.ntu.edu.sg;https://www.cmu.edu", "aff_unique_abbr": "NUS;NTU;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0;0", "aff_country_unique": "Singapore;United States" }, { "id": "o6D5yTpK8w", "title": "Exploring Graph Pre-training for Aspect-based Sentiment Analysis", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Existing studies tend to extract the sentiment elements in a generative manner in order to avoid complex modeling.\nDespite their effectiveness, they ignore importance of the relationships between sentiment elements that could be crucial, making the large pre-trained generative models sub-optimal for modeling sentiment knowledge. \nTherefore, we introduce two pre-training paradigms to improve the generation model by exploring graph pre-training that targeting to strengthen the model in capturing the elements' relationships.\nSpecifically, We first employ an Element-level Graph Pre-training paradigm, which is designed to improve the structure awareness of the generative model. Then, we design a Task Decomposition Pre-training paradigm to make the generative model generalizable and robust against various irregular sentiment quadruples. \nExtensive experiments show the superiority of our proposed method, validate the correctness of our motivation.", "keywords": "Aspect-based Sentiment Analysis; Generative model; Graph pre-train", "primary_area": "", "supplementary_material": "", "author": "Xiaoyi Bao;Zhongqing Wang;Guodong Zhou", "authorids": "~Xiaoyi_Bao2;~Zhongqing_Wang1;~Guodong_Zhou1", "gender": "M;M;M", "homepage": "https://www.polyu.edu.hk;http://nlp.suda.edu.cn/~wangzq;http://nlp.suda.edu.cn/~gdzhou/", "dblp": "135/7034.html;20/9924;", "google_scholar": "1IudabAAAAAJ;;", "or_profile": "~Xiaoyi_Bao2;~Zhongqing_Wang1;~Guodong_Zhou1", "aff": "Soochow University;Soochow University, China;Soochow University, China", "aff_domain": "suda.edu.cn;suda.edu.cn;suda.edu.cn", "position": "MS student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nbao2023exploring,\ntitle={Exploring Graph Pre-training for Aspect-based Sentiment Analysis},\nauthor={Xiaoyi Bao and Zhongqing Wang and Guodong Zhou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=o6D5yTpK8w}\n}", "github": "", "project": "", "reviewers": "LibN;JuKD;Jvv2", "site": "https://openreview.net/forum?id=o6D5yTpK8w", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;5;3", "excitement": "4;3;3", "reproducibility": "5;3;3", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0006-2406-1934;;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Soochow University", "aff_unique_dep": "", "aff_unique_url": "https://www.soochow.edu.cn", "aff_unique_abbr": "Soochow U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "o7Cpy0nZZb", "title": "Improving Conversational Recommendation Systems via Bias Analysis and Language-Model-Enhanced Data Augmentation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Conversational Recommendation System (CRS) is a rapidly growing research area that has gained significant attention alongside advancements in language modelling techniques. However, the current state of conversational recommendation faces numerous challenges due to its relative novelty and limited existing contributions. In this study, we delve into benchmark datasets for developing CRS models and address potential biases arising from the feedback loop inherent in multi-turn interactions, including selection bias and multiple popularity bias variants. Drawing inspiration from the success of generative data via using language models and data augmentation techniques, we present two novel strategies, 'Once-Aug' and 'PopNudge', to enhance model performance while mitigating biases. Through extensive experiments on ReDial and TG-ReDial benchmark datasets, we show a consistent improvement of CRS techniques with our data augmentation approaches and offer additional insights on addressing multiple newly formulated biases.", "keywords": "Conversational Recommendation;Bias Mitigation;Generative Data;Data Augmentation", "primary_area": "", "supplementary_material": "", "author": "Xi Wang;Hossein A. Rahmani;Jiqun Liu;Emine Yilmaz", "authorids": "~Xi_Wang16;~Hossein_A._Rahmani1;~Jiqun_Liu2;~Emine_Yilmaz1", "gender": "M;M;M;F", "homepage": "https://www.xiwangeric.com/;http://rahmanidashti.github.io/;https://jiqunl.github.io/me/;https://sites.google.com/site/emineyilmaz/", "dblp": "08/5760-12;238/1568;;36/3270", "google_scholar": "nFmvLQgAAAAJ;1uzYEI0AAAAJ;;https://scholar.google.com.tw/citations?user=ocmAN4YAAAAJ", "or_profile": "~Xi_Wang16;~Hossein_A._Rahmani1;~Jiqun_Liu2;~Emine_Yilmaz1", "aff": "University College London, University of London;University College London (UCL);University of Oklahoma;Department of Computer Science, University College London", "aff_domain": "ucl.ac.uk;ucl.ac.uk;ou.edu;cs.ucl.ac.uk", "position": "Postdoc;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nwang2023improving,\ntitle={Improving Conversational Recommendation Systems via Bias Analysis and Language-Model-Enhanced Data Augmentation},\nauthor={Xi Wang and Hossein A. Rahmani and Jiqun Liu and Emine Yilmaz},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=o7Cpy0nZZb}\n}", "github": "", "project": "", "reviewers": "ZYGt;EaFv;PYLM", "site": "https://openreview.net/forum?id=o7Cpy0nZZb", "pdf_size": 0, "rating": "2;2;2", "confidence": "2;4;3", "excitement": "4;3;2", "reproducibility": "4;4;4", "correctness": "3;2;2", "rating_avg": 2.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 2.3333333333333335, "replies_avg": 12, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-5936-9919;;;", "linkedin": ";rahmanidashti/;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University College London;University of Oklahoma", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucl.ac.uk;https://www.ou.edu", "aff_unique_abbr": "UCL;OU", "aff_campus_unique_index": "1", "aff_campus_unique": ";London", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United Kingdom;United States" }, { "id": "o7SWorg8EM", "title": "S2abEL: A Dataset for Entity Linking from Scientific Tables", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Entity linking (EL) is the task of linking a textual mention to its corresponding entry in a knowledge base, and is critical for many knowledge-intensive NLP applications. When applied to tables in scientific papers, EL is a step toward large-scale scientific knowledge bases that could enable advanced scientific question answering and analytics. We present the first dataset for EL in scientific tables. EL for scientific tables is especially challenging because scientific knowledge bases can be very incomplete, and disambiguating table mentions typically requires understanding the paper's text in addition to the table. Our dataset, Scientific Table Entity Linking (S2abEL), focuses on EL in machine learning results tables and includes hand-labeled cell types, attributed sources, and entity links from the PaperswithCode taxonomy for 8,429 cells from 732 tables. We introduce a neural baseline method designed for EL on scientific tables containing many out-of-knowledge-base mentions, and show that it significantly outperforms a state-of-the-art generic table EL method. The best baselines fall below human performance, and our analysis highlights avenues for improvement.", "keywords": "Table Entity Linking;Machine Learning;Dataset", "primary_area": "", "supplementary_material": "", "author": "Yuze Lou;Bailey Kuehl;Erin Bransom;Sergey Feldman;Aakanksha Naik;Doug Downey", "authorids": "~Yuze_Lou1;~Bailey_Kuehl1;~Erin_Bransom1;~Sergey_Feldman1;~Aakanksha_Naik1;~Doug_Downey1", "gender": "M;F;F;;F;M", "homepage": ";;;http://www.data-cowboys.com;http://www.cs.cmu.edu/~anaik/;https://www.cs.northwestern.edu/~ddowney/", "dblp": ";;;81/8052;204/7137;57/5363", "google_scholar": "iAVX2YoAAAAJ;1lzjTX0AAAAJ;;C6-OMDIAAAAJ;https://scholar.google.com/citations?hl=en;E8evkcQAAAAJ", "or_profile": "~Yuze_Lou1;~Bailey_Kuehl1;~Erin_Bransom1;~Sergey_Feldman1;~Aakanksha_Naik1;~Doug_Downey1", "aff": "University of Michigan - Ann Arbor;University of California, Berkeley;Allen Institute for Artificial Intelligence;Data Cowboys;National Institutes of Health;Northwestern University", "aff_domain": "umich.edu;berkeley.edu;allenai.org;data-cowboys.com;nih.gov;northwestern.edu", "position": "PhD student;MS student;Analyst;Machine Learning Consultant;Researcher;Professor", "bibtex": "@inproceedings{\nlou2023sabel,\ntitle={S2ab{EL}: A Dataset for Entity Linking from Scientific Tables},\nauthor={Yuze Lou and Bailey Kuehl and Erin Bransom and Sergey Feldman and Aakanksha Naik and Doug Downey},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=o7SWorg8EM}\n}", "github": "", "project": "", "reviewers": "CtHC;zr1c;NRhP", "site": "https://openreview.net/forum?id=o7SWorg8EM", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "3;3;4", "reproducibility": "1;4;4", "correctness": "2;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-0386-0922;;", "linkedin": ";baileykuehl?trk=public_profile_browsemap_profile-result-card_result-card_full-click;;;aakanksha-naik-b3494882/;", "aff_unique_index": "0;1;2;3;4;5", "aff_unique_norm": "University of Michigan;University of California, Berkeley;Allen Institute for Artificial Intelligence;Data Cowboys;National Institutes of Health;Northwestern University", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.umich.edu;https://www.berkeley.edu;https://allenai.org;;https://www.nih.gov;https://www.northwestern.edu", "aff_unique_abbr": "UM;UC Berkeley;AI2;;NIH;NU", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Ann Arbor;Berkeley;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States;" }, { "id": "o9wco8bIVN", "title": "Unsupervised Grammatical Error Correction Rivaling Supervised Methods", "track": "main", "status": "Long Main", "tldr": "", "abstract": "State-of-the-art grammatical error correction (GEC) systems rely on parallel training data (ungrammatical sentences and their manually corrected counterparts), which are expensive to construct. In this paper, we employ the Break-It-Fix-It (BIFI) method to build an unsupervised GEC system. The BIFI framework generates parallel data from unlabeled text using a fixer to transform ungrammatical sentences into grammatical ones, and a critic to predict sentence grammaticality. We present an unsupervised approach to build the fixer and the critic, and an algorithm that allows them to iteratively improve each other. We evaluate our unsupervised GEC system on English and Chinese GEC. Empirical results show that our GEC system outperforms previous unsupervised GEC systems, and achieves performance comparable to supervised GEC systems without ensemble. Furthermore, when combined with labeled training data, our system achieves new state-of-the-art results on the CoNLL-2014 and NLPCC-2018 test sets.", "keywords": "Unsupervised Grammatical Error Correction", "primary_area": "", "supplementary_material": "", "author": "Hannan Cao;Liping Yuan;Yuchen Zhang;Hwee Tou Ng", "authorids": "~Hannan_Cao2;~Liping_Yuan2;~Yuchen_Zhang1;~Hwee_Tou_Ng3", "gender": "M;F;M;M", "homepage": "https://michaelcaohn.github.io/;;https://www.comp.nus.edu.sg/~nght/;", "dblp": "305/9817;04/40.html;97/3037.html;09/5661-2", "google_scholar": "1fs9u0YAAAAJ;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com.tw/citations?user=FABZCeAAAAAJ;Om4Lag0AAAAJ", "or_profile": "~Hannan_Cao2;~Liping_Yuan2;~Hwee_Tou_Ng3;~Yuchen_Zhang2", "aff": "National University of Singapore;ByteDance Inc.;National University of Singapore;ByteDance Inc.", "aff_domain": "nus.edu.sg;bytedance.com;nus.edu.sg;bytedance.com", "position": "PhD student;Researcher;Professor;Researcher", "bibtex": "@inproceedings{\ncao2023unsupervised,\ntitle={Unsupervised Grammatical Error Correction Rivaling Supervised Methods},\nauthor={Hannan Cao and Liping Yuan and Yuchen Zhang and Hwee Tou Ng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=o9wco8bIVN}\n}", "github": "", "project": "", "reviewers": "Y4qz;fd3a;XoJu", "site": "https://openreview.net/forum?id=o9wco8bIVN", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;5", "excitement": "3;3;4", "reproducibility": "4;4;3", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "hannan-cao-599363125/?originalSubdomain=sg;;;", "aff_unique_index": "0;1;0;1", "aff_unique_norm": "National University of Singapore;ByteDance", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.bytedance.com", "aff_unique_abbr": "NUS;ByteDance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "Singapore;China" }, { "id": "oC5e8mAKAP", "title": "Weakly-Supervised Learning of Visual Relations in Multimodal Pretraining", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recent work in vision-and-language pretraining has investigated supervised signals from object detection data to learn better, fine-grained multimodal representations. In this work, we take a step further and explore how we can tap into supervision from small-scale visual relation data. In particular, we propose two pretraining approaches to contextualise visual entities in a multimodal setup. With verbalised scene graphs, we transform visual relation triplets into structured captions, and treat them as additional image descriptions. With masked relation prediction, we further encourage relating entities from image regions with visually masked contexts. When applied to strong baselines pretrained on large amounts of Web data, zero-shot evaluations on both coarse-grained and fine-grained tasks show the efficacy of our methods in learning multimodal representations from weakly-supervised relations data.", "keywords": "vision-and-language;multimodal;pretraining;zero-shot;fine-grained", "primary_area": "", "supplementary_material": "", "author": "Emanuele Bugliarello;Aida Nematzadeh;Lisa Anne Hendricks", "authorids": "~Emanuele_Bugliarello1;~Aida_Nematzadeh1;~Lisa_Anne_Hendricks1", "gender": "M;;F", "homepage": "http://e-bug.github.io/;http://www.aidanematzadeh.me/;https://people.eecs.berkeley.edu/~lisa_anne/", "dblp": "241/9497;153/9556;154/6359", "google_scholar": "9yc1aXYAAAAJ;FWJZYMYAAAAJ;pvyI8GkAAAAJ", "or_profile": "~Emanuele_Bugliarello1;~Aida_Nematzadeh1;~Lisa_Anne_Hendricks1", "aff": "University of Copenhagen;Google Deepmind;Google DeepMind", "aff_domain": "ku.dk;deepmind.com;google.com", "position": "PhD student;Researcher;Researcher", "bibtex": "@inproceedings{\nbugliarello2023weaklysupervised,\ntitle={Weakly-Supervised Learning of Visual Relations in Multimodal Pretraining},\nauthor={Emanuele Bugliarello and Aida Nematzadeh and Lisa Anne Hendricks},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=oC5e8mAKAP}\n}", "github": "", "project": "", "reviewers": "y9T8;tnr9;VUQu;JtYm", "site": "https://openreview.net/forum?id=oC5e8mAKAP", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "4;3;3;2", "excitement": "3;4;3;4", "reproducibility": "4;4;4;3", "correctness": "4;4;4;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 3.5, "reproducibility_avg": 3.75, "correctness_avg": 4.0, "replies_avg": 13, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-2999-7081;;", "linkedin": "emanuelebugliarello/;;", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Copenhagen;DeepMind;Google", "aff_unique_dep": ";DeepMind;Google DeepMind", "aff_unique_url": "https://www.ku.dk;https://deepmind.com;https://deepmind.com", "aff_unique_abbr": "UCPH;DeepMind;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Denmark;United Kingdom" }, { "id": "oEsYs3WRc3", "title": "Enhancing Chat Language Models by Scaling High-quality Instructional Conversations", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Fine-tuning on instruction data has been widely validated as an effective practice for implementing chat language models like ChatGPT. Scaling the diversity and quality of such data, although straightforward, stands a great chance of leading to improved performance.\nThis paper aims to push the upper bound of open-source models further. \nWe first provide a systematically designed, diverse, informative, large-scale dataset of instructional conversations, UltraChat, which does not involve human queries.\nOur objective is to capture the breadth of interactions between a human user and an AI assistant and employs a comprehensive framework to generate multi-turn conversation iteratively.\nUltraChat contains 1.5 million high-quality multi-turn dialogues and covers a wide range of topics and instructions. \nOur statistical analysis of UltraChat reveals its superiority in various key metrics, including scale, average length, diversity, coherence, etc., solidifying its position as a leading open-source dataset.\nBuilding upon UltraChat, we fine-tune a LLaMA model to create a powerful conversational model, UltraLM.\nOur evaluations indicate that UltraLM consistently outperforms other open-source models, including WizardLM and Vicuna, the previously recognized state-of-the-art open-source models.", "keywords": "Instructional Data;Language Models", "primary_area": "", "supplementary_material": "", "author": "Ning Ding;Yulin Chen;Bokai Xu;Yujia Qin;Shengding Hu;Zhiyuan Liu;Maosong Sun;Bowen Zhou", "authorids": "~Ning_Ding5;~Yulin_Chen1;~Bokai_Xu1;~Yujia_Qin1;~Shengding_Hu2;~Zhiyuan_Liu1;~Maosong_Sun1;~Bowen_Zhou4", "gender": "M;F;M;M;M;M;;M", "homepage": "https://www.stingning.cn/;;http://bokaixu.site;https://yujia-qin.github.io/;http://nlp.csai.tsinghua.edu.cn/~lzy;https://www.cs.tsinghua.edu.cn/csen/info/1312/4394.htm;;https://shengdinghu.github.io/", "dblp": ";;;126/2333;53/3245-1;95/3291-1;;268/5534", "google_scholar": "uZXQuYAAAAAJ;tAiXl18AAAAJ;;;dT0v5u0AAAAJ;https://scholar.google.com.tw/citations?user=zIgT0HMAAAAJ;https://scholar.google.com/citations?hl=zh-CN;ZfehPhAAAAAJ", "or_profile": "~Ning_Ding5;~Yulin_Chen1;~Bokai_Xu1;~Yujia_Qin1;~Zhiyuan_Liu1;~Maosong_Sun1;~Bowen_Zhou4;~shengding_hu1", "aff": "Tsinghua University;Tsinghua University;Chinese University of Hong Kong, Shenzhen;Tsinghua University;Tsinghua University;Tsinghua University;JD.com;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;link.cuhk.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;jd.com;mail.tsinghua.edu.cn", "position": "PhD student;MS student;Undergrad student;PhD student;Associate Professor;Full Professor;Vice President;PhD student", "bibtex": "@inproceedings{\nding2023enhancing,\ntitle={Enhancing Chat Language Models by Scaling High-quality Instructional Conversations},\nauthor={Ning Ding and Yulin Chen and Bokai Xu and Yujia Qin and Shengding Hu and Zhiyuan Liu and Maosong Sun and Bowen Zhou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=oEsYs3WRc3}\n}", "github": "", "project": "", "reviewers": "EDtx;ztbK;Pncj", "site": "https://openreview.net/forum?id=oEsYs3WRc3", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;2;4", "excitement": "4;4;4", "reproducibility": "3;3;4", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-7709-2543;;;", "linkedin": ";;;yujia-qin-672595181/;;;;", "aff_unique_index": "0;0;1;0;0;0;2;0", "aff_unique_norm": "Tsinghua University;Chinese University of Hong Kong;JD.com", "aff_unique_dep": ";;", "aff_unique_url": "https://www.tsinghua.edu.cn;https://www.cuhk.edu.cn;https://www.jd.com", "aff_unique_abbr": "THU;CUHK;JD", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "oEsuNpkA8d", "title": "Gold: A Global and Local-aware Denoising Framework for Commonsense Knowledge Graph Noise Detection", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Commonsense Knowledge Graphs (CSKGs) are crucial for commonsense reasoning, yet constructing them through human annotations can be costly. As a result, various automatic methods have been proposed to construct CSKG with larger semantic coverage. However, these unsupervised approaches introduce spurious noise that can lower the quality of the resulting CSKG, which cannot be tackled easily by existing denoising algorithms due to the unique characteristics of nodes and structures in CSKGs. To address this issue, we propose Gold (Global and Local-aware Denoising), a denoising framework for CSKGs that incorporates entity semantic information, global rules, and local structural information from the CSKG. Experiment results demonstrate that Gold outperforms all baseline methods in noise detection tasks on synthetic noisy CSKG benchmarks. Furthermore, we show that denoising a real-world CSKG is effective and even benefits the downstream zero-shot commonsense question-answering task. Our code and data are publicly available at https://github.com/HKUST-KnowComp/GOLD.", "keywords": "Knowledege Graph Denoising;Commonsense Reasoning;Question Answering", "primary_area": "", "supplementary_material": "", "author": "Zheye Deng;Weiqi Wang;Zhaowei Wang;Xin Liu;Yangqiu Song", "authorids": "~Zheye_Deng1;~Weiqi_Wang1;~Zhaowei_Wang2;~Xin_Liu9;~Yangqiu_Song1", "gender": "M;M;M;M;M", "homepage": ";https://mighty-weaver.github.io/;https://zhaowei-wang-nlp.github.io/;https://www.cse.ust.hk/~xliucr/;https://www.cse.ust.hk/~yqsong/", "dblp": "248/8937.html;51/5775-1;120/1278-3;76/1820-39.html;86/2159", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;5dzojAsAAAAJ;https://scholar.google.com.hk/citations?user=WvC4upQAAAAJ;MdQZ-q8AAAAJ", "or_profile": "~Zheye_Deng1;~Weiqi_Wang1;~Zhaowei_Wang2;~Xin_Liu9;~Yangqiu_Song1", "aff": "Department of Computer Science and Engineering, Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Department of Computer Science and Engineering, Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": "cse.ust.hk;ust.hk;cse.ust.hk;ust.hk;ust.hk", "position": "PhD student;PhD student;MS student;PhD student;Associate Professor", "bibtex": "@inproceedings{\ndeng2023gold,\ntitle={Gold: A Global and Local-aware Denoising Framework for Commonsense Knowledge Graph Noise Detection},\nauthor={Zheye Deng and Weiqi Wang and Zhaowei Wang and Xin Liu and Yangqiu Song},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=oEsuNpkA8d}\n}", "github": "", "project": "", "reviewers": "UStH;ys1w;68ji", "site": "https://openreview.net/forum?id=oEsuNpkA8d", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;2", "excitement": "3;3;3", "reproducibility": "3;3;4", "correctness": "2;3;4", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-1617-9805;0000-0001-5539-8181;0000-0001-9610-9526;0000-0002-7818-6090", "linkedin": ";weiqi-wang-a49b5019a/;zhaowei-wang-571943221/;xin-liu-179830143;yqsong/", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "Department of Computer Science and Engineering", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "oOKU31j9Q6", "title": "A Word Sense Distribution-based approach for Semantic Change Prediction", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Semantic Change Detection of words is an important task for various NLP applications that must make time-sensitive predictions.\nSome words are used over time in novel ways to express new meanings, and these new meanings establish themselves as novel senses of existing words.\nOn the other hand, Word Sense Disambiguation (WSD) methods associate ambiguous words with sense ids, depending on the context in which they occur.\nGiven this relationship between WSD and SCD, we explore the possibility of predicting whether a target word has its meaning changed between two corpora collected at different time steps, by comparing the distributions of senses of that word in each corpora.\nFor this purpose, we use pretrained static sense embeddings to automatically annotate each occurrence of the target word in a corpus with a sense id.\nNext, we compute the distribution of sense ids of a target word in a given corpus.\nFinally, we use different divergence or distance measures to quantify the semantic change of the target word across the two given corpora.\nOur experimental results on SemEval 2020 Task 1 dataset show that word sense distributions can be accurately used to predict semantic changes of words in English, German, Swedish and Latin.", "keywords": "Semantic Change Detection;Temporal Semantics;Sense Embeddings", "primary_area": "", "supplementary_material": "", "author": "Xiaohang Tang;Yi Zhou;Taichi Aida;Procheta Sen;Danushka Bollegala", "authorids": "~Xiaohang_Tang2;~Yi_Zhou14;~Taichi_Aida1;~Procheta_Sen1;~Danushka_Bollegala1", "gender": "M;F;M;F;M", "homepage": "https://xiaohang-tang.github.io/;https://aclanthology.org/people/y/yi-zhou/;https://sites.google.com/view/a1da;https://procheta.github.io/sprocheta/;https://danushka.net", "dblp": ";01/1901-19;268/1886.html;185/6249;https://dblp.uni-trier.de/pers/hd/b/Bollegala:Danushka", "google_scholar": "https://scholar.google.com/citations?hl=en;3BdddIMAAAAJ;https://scholar.google.co.jp/citations?user=YumEhloAAAAJ;hRKyQdoAAAAJ;https://scholar.google.co.uk/citations?user=kLqCYLMAAAAJ", "or_profile": "~Xiaohang_Tang2;~Yi_Zhou14;~Taichi_Aida1;~Procheta_Sen1;~Danushka_Bollegala1", "aff": "Xi'an Jiaotong-Liverpool University;Cardiff University;Tokyo Metropolitan University;University of Liverpool;University of Liverpool", "aff_domain": "xjtlu.edu.cn;cardiff.ac.uk;tmu.ac.jp;liverpool.ac.uk;liverpool.ac.uk", "position": "Undergrad student;Postdoc;PhD student;Lecturer;Professor", "bibtex": "@inproceedings{\ntang2023a,\ntitle={A Word Sense Distribution-based approach for Semantic Change Prediction},\nauthor={Xiaohang Tang and Yi Zhou and Taichi Aida and Procheta Sen and Danushka Bollegala},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=oOKU31j9Q6}\n}", "github": "", "project": "", "reviewers": "dbcW;jdV6;LWUC", "site": "https://openreview.net/forum?id=oOKU31j9Q6", "pdf_size": 0, "rating": "2;2;2", "confidence": "5;5;3", "excitement": "3;1;4", "reproducibility": "4;4;4", "correctness": "3;1;4", "rating_avg": 2.0, "confidence_avg": 4.333333333333333, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-2691-9280;0000-0001-7009-8515;;0000-0002-3814-5462;0000-0003-4476-7003", "linkedin": ";yi-zhou-867578210/;;procheta-sen-94086898/?originalSubdomain=uk;danushka-bollegala-6a636516/?originalSubdomain=uk", "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "Xi'an Jiao Tong-Liverpool University;Cardiff University;Tokyo Metropolitan University;University of Liverpool", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.xjtu.edu.cn/en;https://www.cardiff.ac.uk;https://www.tmuc.ac.jp;https://www.liverpool.ac.uk", "aff_unique_abbr": "XJTLU;Cardiff;TMU;Liv Uni", "aff_campus_unique_index": "0", "aff_campus_unique": "Xi'an;", "aff_country_unique_index": "0;1;2;1;1", "aff_country_unique": "China;United Kingdom;Japan" }, { "id": "oSYifZI06H", "title": "Generative Spoken Language Model based on continuous word-sized audio tokens", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In NLP, text language models based on words or subwords are known to outperform their character-based counterparts. Yet, in the speech community, the standard input of spoken LMs are 20ms or 40ms-long discrete units (shorter than a phoneme). Taking inspiration from word-based LM, we introduce a Generative Spoken Language Model (GSLM) based on word-size continuous-valued audio tokens that can generate diverse and expressive language output. This is obtained by replacing lookup table for lexical types with a Lexical Embedding function, the cross entropy loss by a contrastive loss, and multinomial sampling by k-NN sampling. The resulting model is the first generative language model based on word-size continuous tokens. Its performance is on par with discrete unit GSLMs regarding generation quality as measured by automatic metrics and subjective human judgements. Moreover, it is five times more memory efficient thanks to its large 200ms units. In addition, the embeddings before and after the Lexical Embedder are phonetically and semantically interpretable.", "keywords": "spoken language models;speech generation;zerospeech;textless nlp", "primary_area": "", "supplementary_material": "", "author": "Robin Jonathan Algayres;Yossi Adi;Tu Anh Nguyen;Jade Copet;Gabriel Synnaeve;Beno\u00eet Sagot;Emmanuel Dupoux", "authorids": "~Robin_Jonathan_Algayres1;~Yossi_Adi1;~Tu_Anh_Nguyen1;~Jade_Copet1;~Gabriel_Synnaeve1;~Beno\u00eet_Sagot1;~Emmanuel_Dupoux1", "gender": "M;M;M;;M;M;M", "homepage": ";http://adiyoss.github.io/;https://tuanh208.github.io/;;;http://pauillac.inria.fr/~sagot/;http://www.lscp.net/persons/dupoux/", "dblp": "239/8581.html;171/0957.html;37/11121.html;;http://dblp.uni-trier.de/pers/hd/s/Synnaeve:Gabriel;66/1016;41/8160", "google_scholar": "Rc1SZTIAAAAJ;https://scholar.google.co.il/citations?user=4W-HuYYAAAAJ;TzZXAm4AAAAJ;GRMLwjAAAAAJ;wN9rBkcAAAAJ;https://scholar.google.fr/citations?user=HXUT9ZkAAAAJ;https://scholar.google.fr/citations?user=94c1abIAAAAJ", "or_profile": "~Robin_Jonathan_Algayres1;~Yossi_Adi1;~Tu_Anh_Nguyen1;~Jade_Copet1;~Gabriel_Synnaeve1;~Beno\u00eet_Sagot1;~Emmanuel_Dupoux1", "aff": "INRIA;Meta;Meta Facebook;Facebook AI Research;Meta Facebook;Inria;EHESS", "aff_domain": "inria.fr;meta.com;fb.com;facebook.com;fb.com;inria.fr;ehess.fr", "position": "PhD student;Research Scientist;PhD student;Research Engineering Manager;Research Scientist;Research Director;Full Professor", "bibtex": "@inproceedings{\nalgayres2023generative,\ntitle={Generative Spoken Language Model based on continuous word-sized audio tokens},\nauthor={Robin Jonathan Algayres and Yossi Adi and Tu Anh Nguyen and Jade Copet and Gabriel Synnaeve and Beno{\\^\\i}t Sagot and Emmanuel Dupoux},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=oSYifZI06H}\n}", "github": "", "project": "", "reviewers": "BKbZ;esLy;Uy5K", "site": "https://openreview.net/forum?id=oSYifZI06H", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;3", "excitement": "4;4;3", "reproducibility": "4;4;5", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-2237-3898;0000-0002-9623-042X;;;0000-0002-0107-8526;0000-0002-7814-2952", "linkedin": "robin-algayres/;yossi-adi-31a32858?trk=nav_responsive_tab_profile_pic;nguyentuanh208/;jadecopet/?locale=en_US;;beno\u00eet-sagot-4731735/;emmanuel-dupoux-18034055/", "aff_unique_index": "0;1;1;1;1;0;2", "aff_unique_norm": "INRIA;Meta;Ecole des Hautes Etudes en Sciences Sociales", "aff_unique_dep": ";Meta Platforms, Inc.;", "aff_unique_url": "https://www.inria.fr;https://meta.com;https://www.ehess.fr", "aff_unique_abbr": "INRIA;Meta;EHESS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;0;0", "aff_country_unique": "France;United States" }, { "id": "oTtA9uIlR8", "title": "Detecting Syntactic Change with Pre-trained Transformer Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We investigate the ability of Transformer-based language models to find syntactic differences between the English of the early 1800s and that of the late 1900s. First, we show that a fine-tuned BERT model can distinguish between text from these two periods using syntactic information only; to show this, we employ a strategy to hide semantic information from the text. Second, we make further use of fine-tuned BERT models to identify specific instances of syntactic change and specific words for which a new part of speech was introduced. To do this, we employ an automatic part-of-speech (POS) tagger and use it to train corpora-specific taggers based only on BERT representations pretrained on different corpora. Notably, our methods of identifying specific candidates for syntactic change avoid using any automatic POS tagger on old text, where its performance may be unreliable; instead, our methods only use untagged old text together with tagged modern text. We examine samples and distributional properties of the model output to validate automatically identified cases of syntactic change. Finally, we use our techniques to confirm the historical rise of the progressive construction, a known example of syntactic change.", "keywords": "BERT;Transformers;language change;syntax;syntactic change", "primary_area": "", "supplementary_material": "", "author": "Liwen Hou;David A. Smith", "authorids": "~Liwen_Hou1;~David_A._Smith1", "gender": ";M", "homepage": ";https://khoury.northeastern.edu/home/dasmith/", "dblp": ";45/3159", "google_scholar": ";", "or_profile": "~Liwen_Hou1;~David_A._Smith1", "aff": ";Northeastern University", "aff_domain": ";northeastern.edu", "position": ";Associate Professor", "bibtex": "@inproceedings{\nhou2023detecting,\ntitle={Detecting Syntactic Change with Pre-trained Transformer Models},\nauthor={Liwen Hou and David A. Smith},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=oTtA9uIlR8}\n}", "github": "", "project": "", "reviewers": "kxVY;wgbS;mmN7", "site": "https://openreview.net/forum?id=oTtA9uIlR8", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;3", "excitement": "3;4;3", "reproducibility": "4;3;3", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0", "aff_unique_norm": "Northeastern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northeastern.edu", "aff_unique_abbr": "NEU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "oVAod8GRI9", "title": "Image Manipulation via Multi-Hop Instructions - A New Dataset and Weakly-Supervised Neuro-Symbolic Approach", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We are interested in image manipulation via natural language text -- a task that is useful for multiple AI applications but requires complex reasoning over multi-modal spaces. We extend recently proposed Neuro Symbolic Concept Learning (NSCL), which has been quite effective for the task of Visual Question Answering (VQA), for the task of image manipulation. Our system referred to as NeuroSIM can perform complex multi-hop reasoning over multi-object scenes and only requires weak supervision in the form of annotated data for VQA. NeuroSIM parses an instruction into a symbolic program, based on a Domain Specific Language (DSL) comprising of object attributes and manipulation operations, that guides its execution. We create a new dataset for the task, and extensive experiments demonstrate that NeuroSIM is highly competitive with or beats SOTA baselines that make use of supervised data for manipulation.", "keywords": "Neuro-Symbolic Reasoning;Natural Language Guided Image Manipulation;Visual Question Answering;Weakly Supervised Learning", "primary_area": "", "supplementary_material": "", "author": "Harman Singh;Poorva Garg;Mohit Gupta;Kevin Shah;Ashish Goswami;Satyam Modi;Arnab Kumar Mondal;Dinesh Khandelwal;Dinesh Garg;Parag Singla", "authorids": "~Harman_Singh1;~Poorva_Garg1;~Mohit_Gupta6;~Kevin_Shah1;~Ashish_Goswami1;~Satyam_Modi1;~Arnab_Kumar_Mondal2;~Dinesh_Khandelwal2;~Dinesh_Garg1;~Parag_Singla1", "gender": "M;;M;M;M;M;M;M;M;M", "homepage": ";;https://www.linkedin.com/in/mohit-gupta-295b42133/;;https://alphacoder01.github.io/;;;https://research.ibm.com/people/dinesh-khandelwal;https://researcher.watson.ibm.com/researcher/view.php?person=in-garg.dinesh;http://www.cse.iitd.ac.in/~parags", "dblp": "162/5054.html;;;;154/8362;362/7625;;177/0164;https://dblp.uni-trier.de/pers/g/Garg:Dinesh.html;14/167", "google_scholar": "BanlVLYAAAAJ;;;;https://scholar.google.com/citations?hl=en;;MZ8N49AAAAAJ;Pi-SqXwAAAAJ;https://scholar.google.com.tw/citations?user=YrU_ZDkAAAAJ;https://scholar.google.co.in/citations?user=V49BsgMAAAAJ", "or_profile": "~Harman_Singh1;~Poorva_Garg1;~Mohit_Gupta6;~Kevin_Shah1;~Ashish_Goswami1;~Satyam_Modi1;~Arnab_Kumar_Mondal2;~Dinesh_Khandelwal2;~Dinesh_Garg1;~Parag_Singla1", "aff": "Meta;;;Indian Institute of Technology Delhi;Indian Institute of Technology, Delhi;Indian Institute of Technology, Delhi;Fujitsu Research and Development Center Co. Ltm.;International Business Machines;;Indian Institute of Technology, Delhi", "aff_domain": "fb.com;;;iitd.ac.in;iitd.ac.in;iitd.ac.in;fujitsu.com;ibm.com;;iitd.ac.in", "position": "AI Resident;;;Undergrad student;PhD student;Undergrad student;Researcher;Researcher;;Associate Professor", "bibtex": "@inproceedings{\nsingh2023image,\ntitle={Image Manipulation via Multi-Hop Instructions - A New Dataset and Weakly-Supervised Neuro-Symbolic Approach},\nauthor={Harman Singh and Poorva Garg and Mohit Gupta and Kevin Shah and Ashish Goswami and Satyam Modi and Arnab Kumar Mondal and Dinesh Khandelwal and Dinesh Garg and Parag Singla},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=oVAod8GRI9}\n}", "github": "", "project": "", "reviewers": "Q5BV;4Vv2;La5y", "site": "https://openreview.net/forum?id=oVAod8GRI9", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;5;2", "excitement": "4;3;4", "reproducibility": "3;4;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 10, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-3970-6276;;;;;;0000-0001-7297-374X;;;", "linkedin": "harman-singh-4243ab180/;;mohit-gupta-295b42133/;kevin-shah-5527a178/;ashish-goswami-131795188/;satyammodi;arnab-mondal-a4448a18/;dinesh-khandelwal-68689420/;dingarg/;", "aff_unique_index": "0;1;1;1;2;3;1", "aff_unique_norm": "Meta;Indian Institute of Technology Delhi;Fujitsu Research and Development Center;International Business Machines Corporation", "aff_unique_dep": "Meta Platforms, Inc.;;Research and Development;", "aff_unique_url": "https://meta.com;https://www.iitd.ac.in;https://www.fujitsu.com/global/;https://www.ibm.com", "aff_unique_abbr": "Meta;IIT Delhi;Fujitsu R&D;IBM", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Delhi", "aff_country_unique_index": "0;1;1;1;2;0;1", "aff_country_unique": "United States;India;Japan" }, { "id": "oVJXUvXT9b", "title": "ASPIRO: Any-shot Structured Parsing-error-Induced ReprOmpting for Consistent Data-to-Text Generation", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "We present ASPIRO, an approach for structured data verbalisation into short template sentences in zero to few-shot settings. Unlike previous methods, our approach prompts Large Language Models (LLMs) to directly produce entity-agnostic templates, rather than relying on LLMs to faithfully copy the given example entities, or validating/crafting the templates manually. We incorporate LLM re-prompting, triggered by algorithmic parsing checks, as well as the PARENT metric induced consistency validation to identify and rectify template generation problems in real-time. ASPIRO, compared to direct LLM output, averages 66% parsing error rate reduction in generated verbalisations of RDF triples on the DART dataset. Our best 5-shot text-davinci-003 setup, scoring BLEU of 50.62, METEOR of 45.16, BLEURT of 0.82, NUBIA of 0.87, and PARENT of 0.8962 on the Rel2Text dataset, competes effectively with recent fine-tuned pretrained language models.", "keywords": "Large Language Models;Data-to-Text;data disambiguation;structured data verbalisation;few-shot learning;multi-shot re-prompting", "primary_area": "", "supplementary_material": "", "author": "Martin Vejvar;Yasutaka Fujimoto", "authorids": "~Martin_Vejvar1;~Yasutaka_Fujimoto1", "gender": "M;", "homepage": ";http://www.fujilab.dnj.ac.jp/", "dblp": ";", "google_scholar": "Nwh1_osAAAAJ;https://scholar.google.co.jp/citations?user=rS98PEAAAAAJ", "or_profile": "~Martin_Vejvar1;~Yasutaka_Fujimoto1", "aff": "Yokohama National University, Tokyo Institute of Technology;Yokohama National University, Tokyo Institute of Technology", "aff_domain": "ynu.ac.jp;ynu.ac.jp", "position": "Researcher;Full Professor", "bibtex": "@inproceedings{\nvejvar2023aspiro,\ntitle={{ASPIRO}: Any-shot Structured Parsing-error-Induced ReprOmpting for Consistent Data-to-Text Generation},\nauthor={Martin Vejvar and Yasutaka Fujimoto},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=oVJXUvXT9b}\n}", "github": "", "project": "", "reviewers": "eDmS;ShqH;4Jj1", "site": "https://openreview.net/forum?id=oVJXUvXT9b", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "3;3;3", "reproducibility": "3;4;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-9033-3172;", "linkedin": "martin-vejvar-155686250/;", "aff_unique_index": "0;0", "aff_unique_norm": "Yokohama National University", "aff_unique_dep": "", "aff_unique_url": "https://www.yokohama-nu.ac.jp", "aff_unique_abbr": "YNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "id": "oYRlrDN6uj", "title": "Manifold-Preserving Transformers are Effective for Short-Long Range Encoding", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Multi-head self-attention-based Transformers have shown promise in different learning tasks. Albeit these models exhibit significant improvement in understanding short-term and long-term contexts from sequences, encoders of Transformers and their variants fail to preserve layer-wise contextual information. Transformers usually project tokens onto sparse manifolds and fail to preserve mathematical equivalence among the token representations. In this work, we propose TransJect, an encoder model that guarantees a theoretical bound for layer-wise distance preservation between a pair of tokens. We propose a simple alternative to dot-product attention to ensure Lipschitz continuity. This allows TransJect to learn injective mappings to transform token representations to different manifolds with similar topology and preserve Euclidean distance between every pair of tokens in subsequent layers. Evaluations across multiple benchmark short- and long-sequence classification tasks show maximum improvements of 6.8% and 5.9%, respectively, over the variants of Transformers. Additionally, TransJect displays 79% better performance than Transformer on the language modeling task. We further highlight the shortcomings of multi-head self-attention from the statistical physics viewpoint. Although multi-head self-attention was incepted to learn different abstraction levels within the networks, our empirical analyses suggest that different attention heads learn randomly and unorderly. In contrast, TransJect adapts a mixture of experts for regularization; these experts are more orderly and balanced and learn different sparse representations from the input sequences. TransJect exhibits very low entropy and can be efficiently scaled to larger depths.", "keywords": "Orthogonal attention;Lipschitz;Entropic Transformer", "primary_area": "", "supplementary_material": "", "author": "Ayan Sengupta;Md Shad Akhtar;Tanmoy Chakraborty", "authorids": "~Ayan_Sengupta1;~Md_Shad_Akhtar1;~Tanmoy_Chakraborty2", "gender": "M;;M", "homepage": "https://victor7246.github.io/;;http://tanmoychak.com", "dblp": ";184/8579.html;65/2136-2.html", "google_scholar": "90EGfboAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.co.in/citations?user=C5S9JnIAAAAJ", "or_profile": "~Ayan_Sengupta1;~Md_Shad_Akhtar1;~Tanmoy_Chakraborty2", "aff": "Indian Institute of Technology, Delhi;Indraprastha Institute of Information Technology, Delhi;Indian Institute of Technology, Delhi", "aff_domain": "iitd.ac.in;iiitd.ac.in;iitd.ac.in", "position": "PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nsengupta2023manifoldpreserving,\ntitle={Manifold-Preserving Transformers are Effective for Short-Long Range Encoding},\nauthor={Ayan Sengupta and Md Shad Akhtar and Tanmoy Chakraborty},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=oYRlrDN6uj}\n}", "github": "", "project": "", "reviewers": "Vm5s;9LuQ;p61L", "site": "https://openreview.net/forum?id=oYRlrDN6uj", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;4", "excitement": "3;4;3", "reproducibility": "4;3;3", "correctness": "4;4;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-0210-0369", "linkedin": ";;tanmoy-chakraborty-89553324/", "aff_unique_index": "0;1;0", "aff_unique_norm": "Indian Institute of Technology Delhi;Indraprastha Institute of Information Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.iitdelhi.ac.in;http://www.iiitd.ac.in", "aff_unique_abbr": "IIT Delhi;IIIT-D", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Delhi", "aff_country_unique_index": "0;0;0", "aff_country_unique": "India" }, { "id": "oYs7h2dE2e", "title": "CombLM: Adapting Black-Box Language Models through Small Fine-Tuned Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Methods for adapting language models (LMs) to new tasks and domains have traditionally assumed white-box access to the model, and work by modifying its parameters.\nHowever, this is incompatible with a recent trend in the field, where the highest quality models are only available as black-boxes through inference APIs. Even when the model weights are available, the computational cost of fine-tuning large LMs can be prohibitive for most practitioners.\nIn this work, we present a lightweight method for adapting large LMs to new domains and tasks, assuming no access to their weights or intermediate activations. Our approach fine-tunes a small white-box LM and combines it with the large black-box LM at the probability level through a small network, learned on a small validation set. We validate our approach by adapting a large LM (OPT-30B) to several domains and a downstream task (machine translation), observing improved performance in all cases, of up to 9%, while using a domain expert 23x smaller.", "keywords": "black-box;language models;large language models;adaptation;domain adaptation", "primary_area": "", "supplementary_material": "", "author": "Aitor Ormazabal;Mikel Artetxe;Eneko Agirre", "authorids": "~Aitor_Ormazabal1;~Mikel_Artetxe1;~Eneko_Agirre1", "gender": "M;M;M", "homepage": ";http://www.mikelartetxe.com;http://ixa.si.ehu.eus/eneko", "dblp": "243/3370;168/0354;a/EnekoAgirre", "google_scholar": "hh8hYmoAAAAJ;N5InzP8AAAAJ;https://scholar.google.es/citations?user=kSuqts0AAAAJ", "or_profile": "~Aitor_Ormazabal1;~Mikel_Artetxe1;~Eneko_Agirre1", "aff": "Reka AI;Facebook AI Research;University of the Basque Country (UPV/EHU)", "aff_domain": "reka.ai;fb.com;ehu.eus", "position": "Intern;Research Scientist;Full Professor", "bibtex": "@inproceedings{\normazabal2023comblm,\ntitle={Comb{LM}: Adapting Black-Box Language Models through Small Fine-Tuned Models},\nauthor={Aitor Ormazabal and Mikel Artetxe and Eneko Agirre},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=oYs7h2dE2e}\n}", "github": "", "project": "", "reviewers": "BRRy;32aC;ngzh", "site": "https://openreview.net/forum?id=oYs7h2dE2e", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;5;4", "excitement": "4;3;3", "reproducibility": "4;4;2", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 9, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "https://linkedin.com/in/aitor-ormazabal-69495a1a9;artetxem;", "aff_unique_index": "0;1;2", "aff_unique_norm": "Reka AI;Meta;University of the Basque Country", "aff_unique_dep": ";Facebook AI Research;", "aff_unique_url": "https://www.reka.ai;https://research.facebook.com;https://www.ehu.eus/en", "aff_unique_abbr": "Reka AI;FAIR;UPV/EHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;Spain" }, { "id": "oaNa4rNIpU", "title": "HistAlign: Improving Context Dependency in Language Generation by Aligning with History", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Language models (LMs) can generate hallucinations and incoherent outputs, which highlights their weak context dependency. Cache-LMs, which augment LMs with a memory of recent history, can increase context dependency and have shown remarkable performance in diverse language generation tasks. However, we find that even with training, the performance gain stemming from the cache component of current cache-LMs is suboptimal due to the misalignment between the current hidden states and those stored in the memory. In this work, we present HistAlign, a new training approach to ensure good cache alignment such that the model receives useful signals from the history. We first prove our concept on a simple and synthetic task where the memory is essential for correct predictions, and we show that the cache component of HistAlign is better aligned and improves overall performance. Next, we evaluate HistAlign on diverse downstream language generation tasks, including prompt continuation, abstractive summarization, and data-to-text. We demonstrate that HistAlign improves text coherence and faithfulness in open-ended and conditional generation settings respectively. HistAlign is also generalizable across different model families, showcasing its strength in improving context dependency of LMs in diverse scenarios.", "keywords": "generation;language models;summarization;data-to-text", "primary_area": "", "supplementary_material": "", "author": "David Wan;Shiyue Zhang;Mohit Bansal", "authorids": "~David_Wan1;~Shiyue_Zhang1;~Mohit_Bansal2", "gender": "M;F;M", "homepage": ";https://www.cs.unc.edu/~shiyue/;https://www.cs.unc.edu/~mbansal/", "dblp": "17/4695.html;186/8393;32/5243.html", "google_scholar": "oHznAAYAAAAJ;co9KUGQAAAAJ;DN8QtscAAAAJ", "or_profile": "~David_Wan1;~Shiyue_Zhang1;~Mohit_Bansal2", "aff": "Department of Computer Science, University of North Carolina at Chapel Hill;University of North Carolina, Chapel Hill;University of North Carolina at Chapel Hill", "aff_domain": "cs.unc.edu;unc.edu;unc.edu", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nwan2023histalign,\ntitle={HistAlign: Improving Context Dependency in Language Generation by Aligning with History},\nauthor={David Wan and Shiyue Zhang and Mohit Bansal},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=oaNa4rNIpU}\n}", "github": "", "project": "", "reviewers": "GMge;gaqX;DVaq", "site": "https://openreview.net/forum?id=oaNa4rNIpU", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "4;4;4", "reproducibility": "5;4;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;1;1", "aff_unique_norm": "University of North Carolina at Chapel Hill;University of North Carolina", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.unc.edu;https://www.unc.edu", "aff_unique_abbr": "UNC Chapel Hill;UNC", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Chapel Hill", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "odPKQiL2X8", "title": "Exploring All-In-One Knowledge Distillation Framework for Neural Machine Translation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Conventional knowledge distillation(KD) approaches are commonly employed to compress neural machine translation(NMT) models. However, they only obtain one lightweight student each time. Consequently, we have to conduct KD multiple times when different students are required at the same time, which could be resource-intensive. Additionally, these students are individually optimized, and thus lack interactions with each other, leading to their potential not being fully exerted. In this work, we propose a novel All-In-One Knowledge Distillation(AIO-KD) framework for NMT, which generates multiple satisfactory students at once. Under AIO-KD, we first randomly extract fewer-layer subnetworks from the teacher as the sample students. Then, we jointly optimize the teacher and these students, where the students simultaneously learn the knowledge from the teacher and interact with other students via mutual learning. When utilized, we re-extract the candidate students, satisfying the specifications of various devices. Particularly, we adopt carefully-designed strategies for AIO-KD: 1) we dynamically detach gradients to prevent poorly-performed students from negatively affecting the teacher during the knowledge transfer, which could subsequently impact other students; 2) we design a two-stage mutual learning strategy, which alleviates the negative impacts of poorly-performed students on the early-stage student interactions. Extensive experiments and in-depth analyses on three benchmarks demonstrate the effectiveness and eco-friendliness of AIO-KD. Our source code is available at https://github.com/DeepLearnXMU/AIO-KD.", "keywords": "neural machine translation;efficient knowledge distillation;multi-model scenario", "primary_area": "", "supplementary_material": "", "author": "Zhongjian Miao;Wen Zhang;Jinsong Su;Xiang Li;Jian Luan;Yidong Chen;Bin Wang;Min zhang", "authorids": "~Zhongjian_Miao1;~Wen_Zhang6;~Jinsong_Su1;~Xiang_Li30;~Jian_Luan1;~Yidong_Chen2;~Bin_Wang13;~Min_zhang14", "gender": ";;M;M;M;M;M;M", "homepage": "http://baidu.com;;https://cdmc.xmu.edu.cn/info/1010/1054.htm;;;http://nlp.xmu.edu.cn/teachers/ydchen/index_en.html;;https://zhangmin-nlp-ai.github.io/", "dblp": ";43/2368-9;05/9013;;61/3233-1.html;11/1492;13/1898-4;83/5342-?", "google_scholar": ";V2FY0VoAAAAJ;;DMfYmIEAAAAJ;6Z8RUi4AAAAJ;;tDajnHEAAAAJ;https://scholar.google.com/citations?", "or_profile": "~Zhongjian_Miao1;~Wen_Zhang6;~Jinsong_Su1;~Xiang_Li30;~Jian_Luan1;~Yidong_Chen2;~Bin_Wang13;~Min_zhang14", "aff": "Xiamen University;Xiaomi AI Lab;Xiamen University;Xiaomi AI Lab;Xiaomi Corporation;Xiamen University;AI Lab, Xiaomi Inc.;Harbin Institute of Technology", "aff_domain": "xmu.edu.cn;xiaomi.com;xmu.edu.cn;xiaomi.com;xiaomi.com;xmu.edu.cn;xiaomi.com;hit.edu.cn", "position": "MS student;Engineer;Researcher;Researcher;Principal Researcher;Associate Professor;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\nmiao2023exploring,\ntitle={Exploring All-In-One Knowledge Distillation Framework for Neural Machine Translation},\nauthor={Zhongjian Miao and Wen Zhang and Jinsong Su and Xiang Li and Jian Luan and Yidong Chen and Bin Wang and Min zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=odPKQiL2X8}\n}", "github": "", "project": "", "reviewers": "s6c5;TCJa;gH6v", "site": "https://openreview.net/forum?id=odPKQiL2X8", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "3;3;3", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;0000-0002-3895-5510", "linkedin": ";;;;https://www.linkedin.cn/incareer/in/jian-luan-58b5a428;;;", "aff_unique_index": "0;1;0;1;1;0;2;3", "aff_unique_norm": "Xiamen University;Xiaomi Corporation;Xiaomi Inc.;Harbin Institute of Technology", "aff_unique_dep": ";Xiaomi AI Lab;AI Lab;", "aff_unique_url": "https://www.xmu.edu.cn;https://www.xiaomi.com;https://www.xiaomi.com;http://www.hit.edu.cn/", "aff_unique_abbr": "XMU;Xiaomi;Xiaomi;HIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "oeZiXoCHgq", "title": "ACT-SQL: In-Context Learning for Text-to-SQL with Automatically-Generated Chain-of-Thought", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recently Large Language Models (LLMs) have been proven to have strong abilities in various domains and tasks. We study the problem of prompt designing in the text-to-SQL task and attempt to improve the LLMs' reasoning ability when generating SQL queries. Besides the trivial few-shot in-context learning setting, we design our chain-of-thought (CoT) prompt with a similar method to schema linking. We provide a method named ACT-SQL to automatically generate auto-CoT exemplars and thus the whole process doesn't need manual labeling. Our approach is cost-saving since we only use the LLMs' API call once when generating one SQL query. Furthermore, we extend our in-context learning method to the multi-turn text-to-SQL task. The experiment results show that the LLMs' performance can benefit from our ACT-SQL approach. Our approach achieves SOTA performance on the Spider dev set among existing in-context learning approaches.", "keywords": "text-to-SQL;large language models;in-context learning;chain of thought", "primary_area": "", "supplementary_material": "", "author": "Hanchong Zhang;Ruisheng Cao;Lu Chen;Hongshen Xu;Kai Yu", "authorids": "~Hanchong_Zhang1;~Ruisheng_Cao1;~Lu_Chen3;~Hongshen_Xu1;~Kai_Yu3", "gender": "M;M;M;M;M", "homepage": ";https://rhythmcao.github.io/;https://coai-sjtu.github.io;https://speechlab.sjtu.edu.cn/members/hongshen-xu;https://x-lance.sjtu.edu.cn/~kaiyu/", "dblp": "348/6967;244/9541;69/157-2;314/8140;197/1322-4", "google_scholar": "4xNsDNgAAAAJ;NdK881sAAAAJ;https://scholar.google.ca/citations?user=Fb3jWaYAAAAJ;;https://scholar.google.com/citations?hl=en", "or_profile": "~Hanchong_Zhang1;~Ruisheng_Cao1;~Lu_Chen3;~Hongshen_Xu1;~Kai_Yu3", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "MS student;PhD student;Assistant Professor;PhD student;Full Professor", "bibtex": "@inproceedings{\nzhang2023actsql,\ntitle={{ACT}-{SQL}: In-Context Learning for Text-to-{SQL} with Automatically-Generated Chain-of-Thought},\nauthor={Hanchong Zhang and Ruisheng Cao and Lu Chen and Hongshen Xu and Kai Yu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=oeZiXoCHgq}\n}", "github": "", "project": "", "reviewers": "tJPB;sUqE;5VBy", "site": "https://openreview.net/forum?id=oeZiXoCHgq", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;4", "excitement": "3;3;3", "reproducibility": "4;4;4", "correctness": "2;3;4", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-1152-4355;0000-0003-4635-4368;;0000-0002-6770-6564;0000-0002-7102-9826", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "ogh9vskMDH", "title": "Open-Ended Instructable Embodied Agents with Memory-Augmented Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Pre-trained and frozen LLMs can effectively map simple scene re-arrangement instructions to programs over a robot's visuomotor functions through appropriate few-shot example prompting. To parse open-domain natural language and adapt to a user's idiosyncratic procedures, not known during prompt engineering time, fixed prompts fall short. In this paper, we introduce HELPER, an embodied agent equipped with an external memory of language-program pairs that parses free-form human-robot dialogue into action programs through retrieval-augmented LLM prompting: relevant memories are retrieved based on the current dialogue, instruction, correction or VLM description, and used as in-context prompt examples for LLM querying. The memory is expanded during deployment to include pairs of user's language and action plans, to assist future inferences and personalize them to the user's language and routines. HELPER sets a new state-of-the-art in the TEACh benchmark in both Execution from Dialog History (EDH) and Trajectory from Dialogue (TfD), with 1.7x improvement over the previous SOTA for TfD. Our models, code and video results can be found in our project's website: https://helper-agent-llm.github.io.", "keywords": "Task Planning;Embodied AI;LLMs;Robotics", "primary_area": "", "supplementary_material": "", "author": "Gabriel Herbert Sarch;Yue Wu;Michael J. Tarr;Katerina Fragkiadaki", "authorids": "~Gabriel_Herbert_Sarch1;~Yue_Wu17;~Michael_J._Tarr1;~Katerina_Fragkiadaki1", "gender": "M;M;F;M", "homepage": "https://gabesarch.me/;https://www.yuewu.ml;https://www.cs.cmu.edu/~katef/;https://tarrlab.org", "dblp": "280/0151;41/5979;21/8780;36/1880", "google_scholar": "9rYWAhsAAAAJ;LcrSIhgAAAAJ;FWp7728AAAAJ;O8ALPlkAAAAJ", "or_profile": "~Gabriel_Herbert_Sarch1;~Yue_Wu17;~Katerina_Fragkiadaki1;~Michael_Tarr1", "aff": "Carnegie Mellon University;Microsoft Research;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;research.microsoft.com;cmu.edu;cmu.edu", "position": "PhD student;Intern;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nsarch2023openended,\ntitle={Open-Ended Instructable Embodied Agents with Memory-Augmented Large Language Models},\nauthor={Gabriel Herbert Sarch and Yue Wu and Michael J. Tarr and Katerina Fragkiadaki},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ogh9vskMDH}\n}", "github": "", "project": "", "reviewers": "31Xh;hr3J;xAvR", "site": "https://openreview.net/forum?id=ogh9vskMDH", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "4;2;3", "reproducibility": "4;2;2", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-4724-1744", "linkedin": ";;;michael-tarr-ab078046/", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Carnegie Mellon University;Microsoft", "aff_unique_dep": ";Microsoft Research", "aff_unique_url": "https://www.cmu.edu;https://www.microsoft.com/en-us/research", "aff_unique_abbr": "CMU;MSR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "ojgwuBVokp", "title": "Random Entity Quantization for Parameter-Efficient Compositional Knowledge Graph Representation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Representation Learning on Knowledge Graphs (KGs) is essential for downstream tasks. \nThe dominant approach, KG Embedding (KGE), represents entities with independent vectors and faces the scalability challenge. \nRecent studies propose an alternative way for parameter efficiency, which represents entities by composing entity-corresponding codewords matched from predefined small-scale codebooks. \nWe refer to the process of obtaining corresponding codewords of each entity as entity quantization, for which previous works have designed complicated strategies. \nSurprisingly, this paper shows that simple random entity quantization can achieve similar results to current strategies.\nWe analyze this phenomenon and reveal that entity codes, the quantization outcomes for expressing entities, have higher entropy at the code level and Jaccard distance at the codeword level under random entity quantization.\nTherefore, different entities become more easily distinguished, facilitating effective KG representation.\nThe above results show that current quantization strategies are not critical for KG representation, and there is still room for improvement in entity distinguishability beyond current strategies.", "keywords": "knowledge graph;knowledge representation", "primary_area": "", "supplementary_material": "", "author": "Jiaang Li;Quan Wang;Yi Liu;Licheng Zhang;Zhendong Mao", "authorids": "~Jiaang_Li1;~Quan_Wang7;~Yi_Liu14;~Licheng_Zhang1;~Zhendong_Mao1", "gender": "Not Specified;F;M;M;", "homepage": "https://github.com/JiaangL;;;;", "dblp": "340/3968-1;;;168/0818;", "google_scholar": ";l2yEbhAAAAAJ;n-RDhCQAAAAJ;FzMltfYAAAAJ;", "or_profile": "~Jiaang_Li1;~Quan_Wang7;~Yi_Liu14;~Licheng_Zhang1;~Zhendong_Mao1", "aff": "University of Science and Technology of China;Beijing University of Posts and Telecommunications;People's Daily Online;University of Science and Technology of China;", "aff_domain": "ustc.edu.cn;bupt.edu.cn;people.cn;ustc.edu;", "position": "MS student;Associate Professor;Researcher;PhD student;", "bibtex": "@inproceedings{\nli2023random,\ntitle={Random Entity Quantization for Parameter-Efficient Compositional Knowledge Graph Representation},\nauthor={Jiaang Li and Quan Wang and Yi Liu and Licheng Zhang and Zhendong Mao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ojgwuBVokp}\n}", "github": "", "project": "", "reviewers": "Y2GK;B12b;ummp", "site": "https://openreview.net/forum?id=ojgwuBVokp", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;4", "excitement": "4;3;4", "reproducibility": "4;4;4", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0009-0001-2795-5478;0000-0001-8090-4883;", "linkedin": ";;;;", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Science and Technology of China;Beijing University of Posts and Telecommunications;People's Daily Online", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ustc.edu.cn;http://www.bupt.edu.cn/;http://en.people.cn/", "aff_unique_abbr": "USTC;BUPT;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "okV4KG4kMg", "title": "Can Language Models Laugh at YouTube Short-form Videos?", "track": "main", "status": "Long Main", "tldr": "", "abstract": "As short-form funny videos on social networks are gaining popularity, it becomes demanding for AI models to understand them for better communication with humans. Unfortunately, previous video humor datasets target specific domains such as speeches or sitcoms, and mostly focus on verbal cues.\nWe curate a user-generated dataset of 10K multimodal funny videos from YouTube, called ExFunTube. Using a video filtering pipeline with GPT-3.5, we verify both verbal and visual elements contributing to humor. After filtering, we annotate each video with timestamps and text explanations for funny moments. Our ExFunTube is unique over existing datasets in that our videos cover a wide range of domains with various types of humor that necessitate a multimodal understanding of the content.\nAlso, we develop a zero-shot video-to-text prompting to maximize video humor understanding of large language models (LLMs). With three different evaluation methods using automatic scores, rationale quality experiments, and human evaluations, we show that our prompting significantly improves LLMs' ability for humor explanation.", "keywords": "video;humor;explanation;youtube;short-form videos;funny", "primary_area": "", "supplementary_material": "", "author": "Dayoon Ko;Sangho Lee;Gunhee Kim", "authorids": "~Dayoon_Ko1;~Sangho_Lee1;~Gunhee_Kim1", "gender": "F;M;M", "homepage": "https://dayoon-ko.github.io/;https://sangho-vision.github.io/;http://vision.snu.ac.kr/gunhee/", "dblp": "359/3561;17/5702-8;45/115", "google_scholar": "W8q7tT4AAAAJ;Lq8MN6wAAAAJ;https://scholar.google.co.kr/citations?user=CiSdOV0AAAAJ", "or_profile": "~Dayoon_Ko1;~Sangho_Lee1;~Gunhee_Kim1", "aff": "Seoul National University;Seoul National University;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;snu.ac.kr", "position": "MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nko2023can,\ntitle={Can Language Models Laugh at YouTube Short-form Videos?},\nauthor={Dayoon Ko and Sangho Lee and Gunhee Kim},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=okV4KG4kMg}\n}", "github": "", "project": "", "reviewers": "gSbC;k1U2;3agr", "site": "https://openreview.net/forum?id=okV4KG4kMg", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;4;2", "reproducibility": "4;4;2", "correctness": "5;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-9543-7453", "linkedin": "dayoon-ko-388642279;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Seoul National University", "aff_unique_dep": "", "aff_unique_url": "https://www.snu.ac.kr", "aff_unique_abbr": "SNU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "South Korea" }, { "id": "olEEp3Phda", "title": "Symbolization, Prompt, and Classification: A Framework for Implicit Speaker Identification in Novels", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Speaker identification in novel dialogues can be widely applied to various downstream tasks, such as producing multi-speaker audiobooks and converting novels into scripts.\nHowever, existing state-of-the-art methods are limited to handling explicit narrative patterns like \"Tom said, '...'\", unable to thoroughly understand long-range contexts and to deal with complex cases.\nTo this end, we propose a framework named SPC, which identifies implicit speakers in novels via symbolization, prompt, and classification.\nFirst, SPC symbolizes the mentions of candidate speakers to construct a unified label set.\nThen, by inserting a prompt we re-formulate speaker identification as a classification task to minimize the gap between the training objectives of speaker identification and the pre-training task.\nTwo auxiliary tasks are also introduced in SPC to enhance long-range context understanding.\nExperimental results show that SPC outperforms previous methods by a large margin of 4.8% accuracy on the web novel collection, which reduces 47% of speaker identification errors, and also outperforms the emerging ChatGPT.\nIn addition, SPC is more accurate in implicit speaker identification cases that require long-range context semantic understanding.", "keywords": "Speaker Identification;Audiobook Production;Prompting", "primary_area": "", "supplementary_material": "", "author": "Yue Chen;Tianwei He;Hongbin Zhou;Jia-Chen Gu;Heng Lu;Zhen-Hua Ling", "authorids": "~Yue_Chen6;~Tianwei_He1;~Hongbin_Zhou1;~Jia-Chen_Gu1;~Heng_Lu1;~Zhen-Hua_Ling1", "gender": "M;;M;M;M;M", "homepage": ";;https://github.com/HongbinZhou;https://jasonforjoy.github.io/;;http://staff.ustc.edu.cn/~zhling/", "dblp": ";;;93/3604.html;;70/5210", "google_scholar": ";;;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;f8jRR3EAAAAJ", "or_profile": "~Yue_Chen6;~Tianwei_He1;~Hongbin_Zhou1;~Jia-Chen_Gu1;~Heng_Lu1;~Zhen-Hua_Ling1", "aff": "University of Science and Technology of China;;Ximalaya Inc.;University of Science and Technology of China;;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;;ximalaya.com;ustc.edu.cn;;ustc.edu.cn", "position": "PhD student;;Researcher;Postdoc;;Professor", "bibtex": "@inproceedings{\nchen2023symbolization,\ntitle={Symbolization, Prompt, and Classification: A Framework for Implicit Speaker Identification in Novels},\nauthor={Yue Chen and Tianwei He and Hongbin Zhou and Jia-Chen Gu and Heng Lu and Zhen-Hua Ling},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=olEEp3Phda}\n}", "github": "", "project": "", "reviewers": "2pX6;eb8A;rtML;M73c", "site": "https://openreview.net/forum?id=olEEp3Phda", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "2;3;5;2", "excitement": "4;3;2;3", "reproducibility": "4;4;4;3", "correctness": "3;3;3;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.75, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0006-4554-8864;;;;", "linkedin": ";;;;;", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "University of Science and Technology of China;Ximalaya Inc.", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.ximalaya.com", "aff_unique_abbr": "USTC;Ximalaya", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "olzuxDCxMZ", "title": "Investigating Bias in Multilingual Language Models: Cross-Lingual Transfer of Debiasing Techniques", "track": "main", "status": "Short Main", "tldr": "", "abstract": "This paper investigates the transferability of debiasing techniques across different languages within multilingual models. We examine the applicability of these techniques in English, French, German, and Dutch. Using multilingual BERT (mBERT), we demonstrate that cross-lingual transfer of debiasing techniques is not only feasible but also yields promising results. Surprisingly, our findings reveal no performance disadvantages when applying these techniques to non-English languages. Using translations of the CrowS-Pairs dataset, our analysis identifies SentenceDebias as the best technique across different languages, reducing bias in mBERT by an average of 13%. We also find that debiasing techniques with additional pretraining exhibit enhanced cross-lingual effectiveness for the languages included in the analyses, particularly in lower-resource languages. These novel insights contribute to a deeper understanding of bias mitigation in multilingual language models and provide practical guidance for debiasing techniques in different language contexts.", "keywords": "bias mitigation;cross-lingual transferability;multilingual BERT", "primary_area": "", "supplementary_material": "", "author": "Manon Reusens;Philipp Borchert;Margot Mieskes;Jochen De Weerdt;Bart Baesens", "authorids": "~Manon_Reusens1;~Philipp_Borchert1;~Margot_Mieskes1;~Jochen_De_Weerdt1;~Bart_Baesens1", "gender": "F;M;F;M;M", "homepage": ";https://icma.ieseg.fr/philipp-borchert/;;http://www.jochendeweerdt.com/;https://bluecourses.com", "dblp": "320/3006;338/1017;49/2175;41/9119.html;43/4264", "google_scholar": "https://scholar.google.be/citations?hl=nl;efKKfygAAAAJ;https://scholar.google.de/citations?user=NSHuWowAAAAJ;26i8eZMAAAAJ;IC7ghFwAAAAJ", "or_profile": "~Manon_Reusens1;~Philipp_Borchert1;~Margot_Mieskes1;~Jochen_De_Weerdt1;~Bart_Baesens1", "aff": "KU Leuven;KU Leuven;University of Applied Sciences Darmstadt;KU Leuven;KU Leuven", "aff_domain": "kuleuven.be;kuleuven.be;h-da.de;kuleuven.be;kuleuven.be", "position": "PhD student;PhD student;Full Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nreusens2023investigating,\ntitle={Investigating Bias in Multilingual Language Models: Cross-Lingual Transfer of Debiasing Techniques},\nauthor={Manon Reusens and Philipp Borchert and Margot Mieskes and Jochen De Weerdt and Bart Baesens},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=olzuxDCxMZ}\n}", "github": "", "project": "", "reviewers": "RwvA;4hPj;pZuG", "site": "https://openreview.net/forum?id=olzuxDCxMZ", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "4;3;3", "reproducibility": "5;3;4", "correctness": "3;2;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-5533-4281;0009-0006-2268-4804;0000-0001-6151-0504;0000-0002-5831-5668", "linkedin": ";;;;bart-baesens-403bb83/", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Katholieke Universiteit Leuven;University of Applied Sciences Darmstadt", "aff_unique_dep": ";", "aff_unique_url": "https://www.kuleuven.be;https://www.h-da.de/", "aff_unique_abbr": "KU Leuven;HDA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "Belgium;Germany" }, { "id": "on3Wo4VODO", "title": "The Law and NLP: Bridging Disciplinary Disconnects", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Legal practice is intrinsically rooted in the fabric of language, yet legal practitioners and scholars have been slow to adopt tools from natural language processing (NLP). At the same time, the legal system is experiencing an access to justice crisis, which could be partially alleviated with NLP. In this position paper, we argue that the slow uptake of NLP in legal practice is exacerbated by a disconnect between the needs of the legal community and the focus of NLP researchers. In a review of recent trends in the legal NLP literature, we find limited overlap between the legal NLP community and legal academia. Our interpretation is that some of the most popular legal NLP tasks fail to address the needs of legal practitioners. We discuss examples of legal NLP tasks that promise to bridge disciplinary disconnects and highlight interesting areas for legal NLP research that remain underexplored.", "keywords": "legal natural language processing;legal artificial intelligence;legal precedent retrieval;access to justice", "primary_area": "", "supplementary_material": "", "author": "Robert Mahari;Dominik Stammbach;Elliott Ash;Alex Pentland", "authorids": "~Robert_Mahari1;~Dominik_Stammbach1;~Elliott_Ash1;~Alex_Pentland1", "gender": "M;M;;M", "homepage": "https://robertmahari.com/;https://lawecon.ethz.ch/group/scientific-team/stammbach.html;https://elliottash.com;https://www.media.mit.edu/people/sandy/overview/", "dblp": ";242/4666;271/7737;p/AlexPentland", "google_scholar": "3qM8lPsAAAAJ;J6RHVgYAAAAJ;o5uDfHMAAAAJ;P4nfoKYAAAAJ", "or_profile": "~Robert_Mahari1;~Dominik_Stammbach1;~Elliott_Ash1;~Alex_Pentland1", "aff": "Massachusetts Institute of Technology;ETHZ - ETH Zurich;Swiss Federal Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;ethz.ch;ethz.ch;mit.edu", "position": "PhD;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nmahari2023the,\ntitle={The Law and {NLP}: Bridging Disciplinary Disconnects},\nauthor={Robert Mahari and Dominik Stammbach and Elliott Ash and Alex Pentland},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=on3Wo4VODO}\n}", "github": "", "project": "", "reviewers": "aLn7;V1t3;6D6N", "site": "https://openreview.net/forum?id=on3Wo4VODO", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;5;1", "excitement": "3;4;3", "reproducibility": "", "correctness": "2;5;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-2372-2746;0000-0003-1631-3020;0000-0002-6817-7529;", "linkedin": "robert-mahari-874310157/;;;", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Massachusetts Institute of Technology;ETH Zurich;Swiss Federal Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://web.mit.edu;https://www.ethz.ch;https://www.ethz.ch", "aff_unique_abbr": "MIT;ETHZ;ETH Zurich", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0", "aff_country_unique": "United States;Switzerland" }, { "id": "onr6HrKxn0", "title": "DEPN: Detecting and Editing Privacy Neurons in Pretrained Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Pretrained language models have learned a vast amount of human knowledge from large-scale corpora, but their powerful memorization capability also brings the risk of data leakage. Some risks may only be discovered after the model training is completed, such as the model memorizing a specific phone number and frequently outputting it. In such cases, model developers need to eliminate specific data influences from the model to mitigate legal and ethical penalties. To effectively mitigate these risks, people often have to spend a significant amount of time and computational costs to retrain new models instead of finding ways to cure the 'sick' models. Therefore, we propose a method to locate and erase risky neurons in order to eliminate the impact of privacy data in the model. We use a new method based on integrated gradients to locate neurons associated with privacy texts, and then erase these neurons by setting their activation values to zero.Furthermore, we propose a risky neuron aggregation method to eliminate the influence of privacy data in the model in batches. Experimental results show that our method can effectively and quickly eliminate the impact of privacy data without affecting the model's performance. Additionally, we demonstrate the relationship between model memorization and neurons through experiments, further illustrating the robustness of our method.", "keywords": "Privacy Protection;Language Model", "primary_area": "", "supplementary_material": "", "author": "Xinwei Wu;Junzhuo Li;Minghui Xu;Weilong Dong;Shuangzhi Wu;Chao Bian;Deyi Xiong", "authorids": "~Xinwei_Wu1;~Junzhuo_Li1;~Minghui_Xu2;~Weilong_Dong1;~Shuangzhi_Wu2;~Chao_Bian2;~Deyi_Xiong2", "gender": "M;M;M;M;M;M;M", "homepage": ";https://junzhuoli.github.io/;;https://github.com/willowdong;;https://scholar.google.com.hk/citations?user=DXiTKJsAAAAJ&hl=zh-CN;https://dyxiong.github.io", "dblp": ";297/9738;;;136/8695.html;;55/6548", "google_scholar": "https://scholar.google.com.hk/citations?user=zkbVg-IAAAAJ;;;;68mtRggAAAAJ;;QPLO3myO5PkC", "or_profile": "~Xinwei_Wu1;~Junzhuo_Li1;~Minghui_Xu2;~Weilong_Dong1;~Shuangzhi_Wu2;~Chao_Bian2;~Deyi_Xiong2", "aff": "Tianjin University;Tianjin University;Tianjin University;Tianjin University;;Tsinghua University;Tianjin University", "aff_domain": "tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;;tsinghua.edu.cn;tju.edu.cn", "position": "PhD student;MS student;MS student;postgraduate;;PhD student;Full Professor", "bibtex": "@inproceedings{\nwu2023depn,\ntitle={{DEPN}: Detecting and Editing Privacy Neurons in Pretrained Language Models},\nauthor={Xinwei Wu and Junzhuo Li and Minghui Xu and Weilong Dong and Shuangzhi Wu and Chao Bian and Deyi Xiong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=onr6HrKxn0}\n}", "github": "", "project": "", "reviewers": "iqTb;dXCH;xo7t", "site": "https://openreview.net/forum?id=onr6HrKxn0", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;5;2", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0001-2167-128X;;;;;;0000-0002-2353-5038", "linkedin": ";;minghui-xu-9431371b2/;;;;", "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Tianjin University;Tsinghua University", "aff_unique_dep": ";", "aff_unique_url": "http://www.tju.edu.cn;https://www.tsinghua.edu.cn", "aff_unique_abbr": "TJU;THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "oqqmjw1BD1", "title": "Bridging the Gap between Synthetic and Authentic Images for Multimodal Machine Translation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Multimodal machine translation (MMT) simultaneously takes the source sentence and a relevant image as input for translation. Since there is no paired image available for the input sentence in most cases, recent studies suggest utilizing powerful text-to-image generation models to provide image inputs. Nevertheless, synthetic images generated by these models often follow different distributions compared to authentic images. Consequently, using authentic images for training and synthetic images for inference can introduce a distribution shift, resulting in performance degradation during inference. To tackle this challenge, in this paper, we feed synthetic and authentic images to the MMT model, respectively. Then we minimize the gap between the synthetic and authentic images by drawing close the input image representations of the Transformer Encoder and the output distributions of the Transformer Decoder. Therefore, we mitigate the distribution disparity introduced by the synthetic images during inference, thereby freeing the authentic images from the inference process. Experimental results show that our approach achieves state-of-the-art performance on the Multi30K En-De and En-Fr datasets, while remaining independent of authentic images during inference.", "keywords": "Multimodal Machine Translation;Text-to-image Generation", "primary_area": "", "supplementary_material": "", "author": "Wenyu Guo;Qingkai Fang;Dong Yu;Yang Feng", "authorids": "~Wenyu_Guo1;~Qingkai_Fang1;~Dong_Yu7;~Yang_Feng4", "gender": "F;M;M;", "homepage": "https://ieeexplore.ieee.org/author/37089549477;https://fangqingkai.github.io/;;http://people.ucas.edu.cn/~yangfeng?language=en", "dblp": "74/6226;301/3107;;07/6095-4.html", "google_scholar": ";n2lRntoAAAAJ;;https://scholar.google.com/citations?hl=en", "or_profile": "~Wenyu_Guo1;~Qingkai_Fang1;~Dong_Yu7;~Yang_Feng4", "aff": "Beijing Language and Culture University;Institute of Computing Technology, Chinese Academy of Sciences;Beijing Language and Culture University;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "blcu.edu.cn;ict.ac.cn;blcu.edu.cn;ict.ac.cn", "position": "MS student;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nguo2023bridging,\ntitle={Bridging the Gap between Synthetic and Authentic Images for Multimodal Machine Translation},\nauthor={Wenyu Guo and Qingkai Fang and Dong Yu and Yang Feng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=oqqmjw1BD1}\n}", "github": "", "project": "", "reviewers": "ZYWp;Ft2o;JtfV", "site": "https://openreview.net/forum?id=oqqmjw1BD1", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;4;4", "excitement": "4;3;3", "reproducibility": "3;4;2", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-8575-591X;;", "linkedin": ";;;", "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Beijing Language and Culture University;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Computing Technology", "aff_unique_url": "http://www.blcu.edu.cn;http://www.ict.ac.cn", "aff_unique_abbr": "BLCU;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "orSVYeobMr", "title": "RoAST: Robustifying Language Models via Adversarial Perturbation with Selective Training", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Fine-tuning pre-trained language models (LMs) has become the de facto standard in many NLP tasks. Nevertheless, fine-tuned LMs are still prone to robustness issues, such as adversarial robustness and model calibration. Several perspectives of robustness for LMs have been studied independently, but lacking a unified consideration in multiple perspectives. In this paper, we propose Robustifying LMs via Adversarial perturbation with Selective Training (RoAST), a simple yet effective fine-tuning technique to enhance the multi-perspective robustness of LMs in a unified way. RoAST effectively incorporates two important sources for the model robustness, robustness on the perturbed inputs and generalizable knowledge in pre-trained LMs. To be specific, RoAST introduces adversarial perturbation during fine-tuning while the model parameters are selectively updated upon their relative importance to minimize unnecessary deviation. Under a unified evaluation of fine-tuned LMs by incorporating four representative perspectives of model robustness, we demonstrate the effectiveness of RoAST compared to state-of-the-art fine-tuning methods on six different types of LMs, which indicates its usefulness in practice.", "keywords": "Language model robustness;adversarial training", "primary_area": "", "supplementary_material": "", "author": "Jaehyung Kim;Yuning Mao;Rui Hou;Hanchao Yu;Davis Liang;Pascale Fung;Qifan Wang;Fuli Feng;Lifu Huang;Madian Khabsa", "authorids": "~Jaehyung_Kim1;~Yuning_Mao1;~Rui_Hou3;~Hanchao_Yu1;~Davis_Liang1;~Pascale_Fung1;~Qifan_Wang2;~Fuli_Feng1;~Lifu_Huang1;~Madian_Khabsa1", "gender": "M;;M;M;M;F;M;M;M;M", "homepage": "https://sites.google.com/view/jaehyungkim;https://morningmoni.github.io/;;https://www.linkedin.com/in/hanchao-yu-9a9381a7/;https://www.davisliang.com;http://pascale.home.ece.ust.hk/;https://wqfcr.github.io/;https://fulifeng.github.io/;https://wilburone.github.io/;https://www.madiankhabsa.com", "dblp": "02/7206-1;178/3692;;69/9936;206/6843;29/4187;33/8610;183/9198;127/0072;87/11087", "google_scholar": "https://scholar.google.co.kr/citations?user=6OYOsGsAAAAJ;steJe6IAAAAJ;;vBkncqgAAAAJ;9lh2gH8AAAAJ;;LrSyLosAAAAJ;https://scholar.google.com.sg/citations?user=QePM4u8AAAAJ;76IEGtYAAAAJ;V9JYPP0AAAAJ", "or_profile": "~Jaehyung_Kim1;~Yuning_Mao1;~Rui_Hou3;~Hanchao_Yu1;~Davis_Liang1;~Pascale_Fung1;~Qifan_Wang2;~Fuli_Feng1;~Lifu_Huang1;~Madian_Khabsa1", "aff": "Korea Advanced Institute of Science & Technology;Meta;Meta Inc. ;Meta Facebook;Meta ;HKUST;Meta AI;University of Science and Technology of China;Virginia Tech;Meta", "aff_domain": "kaist.ac.kr;meta.com;meta.inc;fb.com;meta.com;ece.ust.hk;fb.com;ustc.edu.cn;vt.edu;meta.com", "position": "PhD student;Researcher;Research Scientist;Researcher;Researcher;Full Professor;Principal Researcher;Full Professor;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nkim2023roast,\ntitle={Ro{AST}: Robustifying Language Models via Adversarial Perturbation with Selective Training},\nauthor={Jaehyung Kim and Yuning Mao and Rui Hou and Hanchao Yu and Davis Liang and Pascale Fung and Qifan Wang and Fuli Feng and Lifu Huang and Madian Khabsa},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=orSVYeobMr}\n}", "github": "", "project": "", "reviewers": "Gq2X;C6Wu;o3bN", "site": "https://openreview.net/forum?id=orSVYeobMr", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;5;3", "excitement": "3;2;3", "reproducibility": "4;3;3", "correctness": "4;2;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 10, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0009-0000-4407-7796;;;0000-0002-7570-5756;0000-0002-5828-9842;;", "linkedin": ";morningmoni/;rayhou/;hanchao-yu-9a9381a7/;;;;;;", "aff_unique_index": "0;1;1;1;1;2;1;3;4;1", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;Meta;Hong Kong University of Science and Technology;University of Science and Technology of China;Virginia Tech", "aff_unique_dep": ";Meta Platforms, Inc.;;;", "aff_unique_url": "https://www.kaist.ac.kr;https://meta.com;https://www.ust.hk;http://www.ustc.edu.cn;https://www.vt.edu", "aff_unique_abbr": "KAIST;Meta;HKUST;USTC;VT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;1;1;1;2;1;2;1;1", "aff_country_unique": "South Korea;United States;China" }, { "id": "orefzVRWqV", "title": "PsyAttention: Psychological Attention Model for Personality Detection", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Work on personality detection has tended to incorporate psychological features from different personality models, such as BigFive and MBTI. There are more than 900 psychological features, each of which is helpful for personality detection. However, when used in combination, the application of different calculation standards among these features may result in interference between features calculated using distinct systems, thereby introducing noise and reducing performance. This paper adapts different psychological models in the proposed PsyAttention for personality detection, which can effectively encode psychological features, reducing their number by 85%. In experiments on the BigFive and MBTI models, PysAttention achieved average accuracy of 65.66% and 86.30%, respectively, outperforming state-of-the-art methods, indicating that it is effective at encoding psychological features.", "keywords": "personality detection; BigFive; PsyAttention; psychological features", "primary_area": "", "supplementary_material": "", "author": "Baohua Zhang;Yongyi Huang;Wenyao Cui;Zhang Huaping;Jianyun Shang", "authorids": "~Baohua_Zhang1;~Yongyi_Huang1;~Wenyao_Cui1;~Zhang_Huaping1;~Jianyun_Shang1", "gender": "M;;M;M;F", "homepage": "http://www.nlpir.org/wordpress/2018/12/27/baohua-zhang-master-of-bit/;https://www.nlpir.org;http://www.nlpir.org/wordpress/;http://www.nlpir.org/;http://cs.bit.edu.cn/szdw/jsml/fjs/sjy/index.htm", "dblp": "94/7727-2.html;311/7407;362/5912;87/4933;", "google_scholar": ";;;P6d8akUAAAAJ;", "or_profile": "~Baohua_Zhang1;~Yongyi_Huang1;~Wenyao_Cui1;~Zhang_Huaping1;~Jianyun_Shang1", "aff": "Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology;;", "aff_domain": "bit.edu.cn;bit.edu.cn;bit.edu.cn;;", "position": "PhD student;PhD student;MS student;;", "bibtex": "@inproceedings{\nzhang2023psyattention,\ntitle={PsyAttention: Psychological Attention Model for Personality Detection},\nauthor={Baohua Zhang and Yongyi Huang and Wenyao Cui and Zhang Huaping and Jianyun Shang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=orefzVRWqV}\n}", "github": "", "project": "", "reviewers": "QLop;y6Lk;imtg", "site": "https://openreview.net/forum?id=orefzVRWqV", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;5;2", "excitement": "3;4;3", "reproducibility": "3;3;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-5486-9524;0009-0004-6339-231X;0000-0002-2810-3824;0000-0002-0137-4069;", "linkedin": ";;;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Beijing Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.bit.edu.cn/", "aff_unique_abbr": "BIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "oseYM8qxW4", "title": "Critic-Driven Decoding for Mitigating Hallucinations in Data-to-text Generation", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Hallucination of text ungrounded in the input is a well-known problem in neural data-to-text generation. Many methods have been proposed to mitigate it, but they typically require altering model architecture or collecting additional data, and thus cannot be easily applied to an existing model. In this paper, we explore a new way to mitigate hallucinations by combining the probabilistic output of a generator language model (LM) with the output of a special \u201ctext critic\u201d classifier, which guides the generation by assessing the match between the input data and the text generated so far. Our method does not need any changes to the underlying LM's architecture or training procedure and can thus be combined with any model and decoding operating on word probabilities. The critic does not need any additional training data, using the base LM's training data and synthetic negative examples. Our experimental results show that our method improves over the baseline on the WebNLG and OpenDialKG benchmarks.", "keywords": "data-to-text generation;hallucinations;decoding approaches;natural language genereation", "primary_area": "", "supplementary_material": "", "author": "Mateusz Lango;Ondrej Dusek", "authorids": "~Mateusz_Lango1;~Ondrej_Dusek1", "gender": ";M", "homepage": ";https://ufal.mff.cuni.cz/ondrej-dusek", "dblp": ";126/8739", "google_scholar": ";https://scholar.google.cz/citations?user=PI7rRV0AAAAJ", "or_profile": "~Mateusz_Lango1;~Ondrej_Dusek1", "aff": ";Charles University, Prague", "aff_domain": ";cuni.cz", "position": ";Assistant Professor", "bibtex": "@inproceedings{\nlango2023criticdriven,\ntitle={Critic-Driven Decoding for Mitigating Hallucinations in Data-to-text Generation},\nauthor={Mateusz Lango and Ondrej Dusek},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=oseYM8qxW4}\n}", "github": "", "project": "", "reviewers": "eK7W;GpK3;QRL1", "site": "https://openreview.net/forum?id=oseYM8qxW4", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "3;4;2", "reproducibility": "5;5;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.666666666666667, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-1415-1702", "linkedin": ";tuetschek/", "aff_unique_index": "0", "aff_unique_norm": "Charles University", "aff_unique_dep": "", "aff_unique_url": "https://www.cuni.cz", "aff_unique_abbr": "Charles University", "aff_campus_unique_index": "0", "aff_campus_unique": "Prague", "aff_country_unique_index": "0", "aff_country_unique": "Czech Republic" }, { "id": "osox1GoFLS", "title": "Disentangling Extraction and Reasoning in Multi-hop Spatial Reasoning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Spatial reasoning over text is challenging as the models not only need to extract the direct spatial information from the text but also reason over those and infer implicit spatial relations. Recent studies highlight the struggles even large language models encounter when it comes to performing spatial reasoning over text. In this paper, we explore the potential benefits of disentangling the processes of information extraction and reasoning in models to address this challenge. To explore this, we design various models that disentangle extraction and reasoning(either symbolic or neural) and compare them with state-of-the-art(SOTA) baselines with no explicit design for these parts. Our experimental results consistently demonstrate the efficacy of disentangling, showcasing its ability to enhance models' generalizability within realistic data domains.", "keywords": "Spatial Reasoning;Spatial Role Labeling;Disentangling Extraction and Reasoning;Pretrained Language Models;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "Roshanak Mirzaee;Parisa Kordjamshidi", "authorids": "~Roshanak_Mirzaee1;~Parisa_Kordjamshidi1", "gender": "F;F", "homepage": ";http://www.cse.msu.edu/~kordjams/", "dblp": ";73/3423", "google_scholar": ";https://scholar.google.com.tw/citations?user=Ugo3NGgAAAAJ", "or_profile": "~Roshanak_Mirzaee1;~Parisa_Kordjamshidi1", "aff": "Michigan State University;Michigan State University", "aff_domain": "msu.edu;msu.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\nmirzaee2023disentangling,\ntitle={Disentangling Extraction and Reasoning in Multi-hop Spatial Reasoning},\nauthor={Roshanak Mirzaee and Parisa Kordjamshidi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=osox1GoFLS}\n}", "github": "", "project": "", "reviewers": "6kdH;WRQX;K1u1", "site": "https://openreview.net/forum?id=osox1GoFLS", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;4;4", "excitement": "3;4;3", "reproducibility": "4;4;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": "roshanak-mirzaee-011604b1;", "aff_unique_index": "0;0", "aff_unique_norm": "Michigan State University", "aff_unique_dep": "", "aff_unique_url": "https://www.msu.edu", "aff_unique_abbr": "MSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "oueo4cEgSJ", "title": "Hierarchical Pretraining on Multimodal Electronic Health Records", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Pretraining has proven to be a powerful technique in natural language processing (NLP), exhibiting remarkable success in various NLP downstream tasks. However, in the medical domain, existing pretrained models on electronic health records (EHR) fail to capture the hierarchical nature of EHR data, limiting their generalization capability across diverse downstream tasks using a single pretrained model.\nTo tackle this challenge, this paper introduces a novel, general, and unified pretraining framework called MedHMP, specifically designed for hierarchically multimodal EHR data. The effectiveness of the proposed MedHMP is demonstrated through experimental results on eight downstream tasks spanning three levels. Comparisons against eighteen baselines further highlight the efficacy of our approach.", "keywords": "Clinical Text;Multimodal Learning;Pretraining;Electronic Health Records", "primary_area": "", "supplementary_material": "", "author": "Xiaochen Wang;Junyu Luo;Jiaqi Wang;Ziyi Yin;Suhan Cui;Yuan Zhong;Yaqing Wang;Fenglong Ma", "authorids": "~Xiaochen_Wang2;~Junyu_Luo3;~Jiaqi_Wang4;~Ziyi_Yin1;~Suhan_Cui1;~Yuan_Zhong4;~Yaqing_Wang1;~Fenglong_Ma1", "gender": "M;;;M;M;Non-Binary;M;M", "homepage": ";;;https://ericyinyzy.github.io/;;;https://yaqingwang.github.io/;https://fenglong-ma.github.io/", "dblp": "19/30-2.html;198/0850.html;;358/6428;294/0930;20/8000-2;147/1393;85/10856", "google_scholar": "PXpHePgAAAAJ;pmLGdM0AAAAJ;;wvbK37AAAAAJ;BYh25MsAAAAJ;https://scholar.google.com/citations?hl=en;_Rfg2CAAAAAJ;DLJIxNMAAAAJ", "or_profile": "~Xiaochen_Wang2;~Junyu_Luo3;~Jiaqi_Wang4;~Ziyi_Yin1;~Suhan_Cui1;~Yuan_Zhong4;~Yaqing_Wang1;~Fenglong_Ma1", "aff": "Pennsylvania State University;Pennsylvania State University;;Pennsylvania State University;Pennsylvania State University;Pennsylvania State University;Research, Google;Pennsylvania State University", "aff_domain": "psu.edu;psu.edu;;psu.edu;psu.edu;psu.edu;research.google.com;psu.edu", "position": "PhD student;PhD student;;PhD student;PhD student;PhD student;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nwang2023hierarchical,\ntitle={Hierarchical Pretraining on Multimodal Electronic Health Records},\nauthor={Xiaochen Wang and Junyu Luo and Jiaqi Wang and Ziyi Yin and Suhan Cui and Yuan Zhong and Yaqing Wang and Fenglong Ma},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=oueo4cEgSJ}\n}", "github": "", "project": "", "reviewers": "B8GM;qKr6;P82s;TZeS", "site": "https://openreview.net/forum?id=oueo4cEgSJ", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "4;4;4;4", "excitement": "4;4;4;3", "reproducibility": "4;4;4;4", "correctness": "4;4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.75, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 14, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0001-7699-3016;0000-0002-4897-7051;;0009-0002-3502-3205;;0009-0009-4427-5667;;0000-0002-4999-0303", "linkedin": "xiaochen-wang-1860691b4/;;;%E6%A2%93%E8%AF%91-%E6%AE%B7-ab816a249/?locale=en_US&trk=eml-email_network_conversations_01-header-0-profile_glimmer;%E8%8B%8F%E6%99%97-%E5%B4%94-aa067818b/;yuan-zhong-5137a4138/;;fenglong-ma-69805832/", "aff_unique_index": "0;0;0;0;0;1;0", "aff_unique_norm": "Pennsylvania State University;Google", "aff_unique_dep": ";Google Research", "aff_unique_url": "https://www.psu.edu;https://research.google", "aff_unique_abbr": "PSU;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "ouiQX2XWYc", "title": "Watermarking LLMs with Weight Quantization", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Abuse of large language models reveals high risks as large language models are being deployed at an astonishing speed. It is important to protect the model weights to avoid malicious usage that violates licenses of open-source large language models. This paper proposes a novel watermarking strategy that plants watermarks in the quantization process of large language models without pre-defined triggers during inference. The watermark works when the model is used in the fp32 mode and remains hidden when the model is quantized to int8, in this way, the users can only inference the model without further supervised fine-tuning of the model. We successfully plant the watermark into open-source large language model weights including GPT-Neo and LLaMA. We hope our proposed method can provide a potential direction for protecting model weights in the era of large language model applications.", "keywords": "watermarking;LLM;model quantization", "primary_area": "", "supplementary_material": "", "author": "Linyang Li;Botian Jiang;Pengyu Wang;Ke Ren;Hang Yan;Xipeng Qiu", "authorids": "~Linyang_Li1;~Botian_Jiang1;~Pengyu_Wang2;~Ke_Ren2;~Hang_Yan2;~Xipeng_Qiu1", "gender": "M;M;M;M;;M", "homepage": "https://github.com/LinyangLee;;;https://github.com/renke999;;https://xpqiu.github.io/", "dblp": "228/8051;;14/3832-6;;;69/1395", "google_scholar": "T6eEqcMAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.co.jp/citations?user=NGniJS0AAAAJ;https://scholar.google.com/citations?view_op=list_works;;Pq4Yp_kAAAAJ", "or_profile": "~Linyang_Li1;~Botian_Jiang1;~Pengyu_Wang2;~Ke_Ren2;~Hang_Yan2;~Xipeng_Qiu1", "aff": "Fudan University;Fudan University;Fudan University;Fudan University;;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;;fudan.edu.cn", "position": "PhD student;MS student;PhD student;MS student;;Full Professor", "bibtex": "@inproceedings{\nli2023watermarking,\ntitle={Watermarking {LLM}s with Weight Quantization},\nauthor={Linyang Li and Botian Jiang and Pengyu Wang and Ke Ren and Hang Yan and Xipeng Qiu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ouiQX2XWYc}\n}", "github": "", "project": "", "reviewers": "HMaJ;noHj;faok", "site": "https://openreview.net/forum?id=ouiQX2XWYc", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;5;4", "reproducibility": "4;4;3", "correctness": "3;3;2", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0001-7163-5247", "linkedin": ";%E5%8D%9A%E5%A4%A9-%E5%A7%9C-01a120227?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3BDQvpyioVTMKEM8AgLhbJKQ%3D%3D;;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "ovkb6woHvT", "title": "GLEN: General-Purpose Event Detection for Thousands of Types", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The progress of event extraction research has been hindered by the absence of wide-coverage, large-scale datasets. \nTo make event extraction systems more accessible, we build a general-purpose event detection dataset GLEN, which covers 205K event mentions with 3,465 different types, making it more than 20x larger in ontology than today's largest event dataset. GLEN is created by utilizing the DWD Overlay, which provides a mapping between Wikidata Qnodes and PropBank rolesets. This enables us to use the abundant existing annotation for PropBank as distant supervision.\nIn addition, we also propose a new multi-stage event detection model specifically designed to handle the large ontology size in GLEN. We show that our model exhibits superior performance compared to a range of baselines including InstructGPT.\nFinally, we perform error analysis and show that label noise is still the largest challenge for improving performance for this new dataset.", "keywords": "event extraction; event detection; dataset", "primary_area": "", "supplementary_material": "", "author": "Sha Li;Qiusi Zhan;Kathryn Conger;Martha Palmer;Heng Ji;Jiawei Han", "authorids": "~Sha_Li1;~Qiusi_Zhan1;~Kathryn_Conger1;~Martha_Palmer1;~Heng_Ji3;~Jiawei_Han1", "gender": "F;F;F;F;F;M", "homepage": ";https://zqs1943.github.io/;;https://www.colorado.edu/faculty/palmer-martha/;http://blender.cs.illinois.edu/hengji.html;http://hanj.cs.illinois.edu/", "dblp": ";321/4704.html;;p/MarthaStonePalmer.html;;h/JiaweiHan.html", "google_scholar": "OIo8J2YAAAAJ;XaYJrgoAAAAJ;;pxc_-XYAAAAJ;z7GCqT4AAAAJ;https://scholar.google.com.tw/citations?user=Kv9AbjMAAAAJ", "or_profile": "~Sha_Li1;~Qiusi_Zhan1;~Kathryn_Conger1;~Martha_Palmer1;~Heng_Ji3;~Jiawei_Han1", "aff": "University of Illinois, Urbana Champaign;University of Illinois Urbana-Champaign;University of Colorado at Boulder;University of Colorado at Boulder;University of Illinois, Urbana-Champaign;University of Illinois at Urbana-Champaign (UIUC)", "aff_domain": "illinois.edu;illinois.edu;colorado.edu;colorado.edu;uiuc.edu;illinois.edu", "position": "PhD student;PhD student;PhD student;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2023glen,\ntitle={{GLEN}: General-Purpose Event Detection for Thousands of Types},\nauthor={Sha Li and Qiusi Zhan and Kathryn Conger and Martha Palmer and Heng Ji and Jiawei Han},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ovkb6woHvT}\n}", "github": "", "project": "", "reviewers": "ohVy;rTom;7vXC", "site": "https://openreview.net/forum?id=ovkb6woHvT", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;3;4", "excitement": "5;4;4", "reproducibility": "5;5;4", "correctness": "5;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.333333333333333, "reproducibility_avg": 4.666666666666667, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-9864-6974;;0000-0002-3629-2696", "linkedin": ";https://linkedin.com/in/zhan-qiusi-56265a1b6;;https://www.linkedin.com/feed/?trk=homepage-basic_signin-form_submit;;", "aff_unique_index": "0;0;1;1;2;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of Colorado;University of Illinois", "aff_unique_dep": ";;", "aff_unique_url": "https://illinois.edu;https://www.colorado.edu;https://illinois.edu", "aff_unique_abbr": "UIUC;CU;UIUC", "aff_campus_unique_index": "0;0;1;1;0;0", "aff_campus_unique": "Urbana-Champaign;Boulder", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "owc65ImkyU", "title": "Plan, Verify and Switch: Integrated Reasoning with Diverse X-of-Thoughts", "track": "main", "status": "Long Main", "tldr": "", "abstract": "As large language models (LLMs) have shown effectiveness with different prompting methods, such as Chain of Thought, Program of Thought, we find that these methods have formed a great complementarity to each other on math reasoning tasks. In this work, we propose XoT, an integrated problem solving framework by prompting LLMs with diverse reasoning thoughts. For each question, XoT always begins with selecting the most suitable method then executes each method iteratively. Within each iteration, XoT actively checks the validity of the generated answer and incorporates the feedback from external executors, allowing it to dynamically switch among different prompting methods. Through extensive experiments on 10 popular math reasoning datasets, we demonstrate the effectiveness of our proposed approach and thoroughly analyze the strengths of each module. Moreover, empirical results suggest that our framework is orthogonal to recent work that makes improvements on single reasoning methods and can further generalise to logical reasoning domain. By allowing method switching, XoT provides a fresh perspective on the collaborative integration of diverse reasoning thoughts in a unified framework.", "keywords": "Math Reasoning;Chain-of-Thought", "primary_area": "", "supplementary_material": "", "author": "Tengxiao Liu;Qipeng Guo;Yuqing Yang;Xiangkun Hu;Yue Zhang;Xipeng Qiu;Zheng Zhang", "authorids": "~Tengxiao_Liu1;~Qipeng_Guo1;~Yuqing_Yang2;~Xiangkun_Hu1;~Yue_Zhang7;~Xipeng_Qiu1;~Zheng_Zhang1", "gender": ";M;F;M;M;M;M", "homepage": "https://tengxiaoliu.github.io;;https://ayyyq.github.io/;;http://frcchang.github.io;https://xpqiu.github.io/;https://shanghai.nyu.edu/academics/faculty/directory/zheng-zhang", "dblp": "165/9039;172/1046;91/9064-4;224/5990;47/722-4;69/1395;", "google_scholar": ";k3mPGKgAAAAJ;https://scholar.google.com/citations?hl=en;_-0MpawAAAAJ;;Pq4Yp_kAAAAJ;https://scholar.google.com.hk/citations?user=k0KiE4wAAAAJ", "or_profile": "~Tengxiao_Liu1;~Qipeng_Guo1;~Yuqing_Yang2;~Xiangkun_Hu1;~Yue_Zhang7;~Xipeng_Qiu1;~Zheng_Zhang1", "aff": "Amazon;Amazon;Fudan University;Amazon;Westlake University;Fudan University;Amazon", "aff_domain": "amazon.com;amazon.com;fudan.edu.cn;amazon.com;westlake.edu.cn;fudan.edu.cn;amazon.com", "position": "Intern;Researcher;MS student;Applied Scientist;Full Professor;Full Professor;Senior Principal Scientist", "bibtex": "@inproceedings{\nliu2023plan,\ntitle={Plan, Verify and Switch: Integrated Reasoning with Diverse X-of-Thoughts},\nauthor={Tengxiao Liu and Qipeng Guo and Yuqing Yang and Xiangkun Hu and Yue Zhang and Xipeng Qiu and Zheng Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=owc65ImkyU}\n}", "github": "", "project": "", "reviewers": "wiK8;hMke;JpnC", "site": "https://openreview.net/forum?id=owc65ImkyU", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;5;3", "excitement": "3;3;3", "reproducibility": "3;3;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-3339-9607;;;;0000-0002-5214-2268;0000-0001-7163-5247;", "linkedin": ";;;;;;", "aff_unique_index": "0;0;1;0;2;1;0", "aff_unique_norm": "Amazon;Fudan University;Westlake University", "aff_unique_dep": "Amazon.com, Inc.;;", "aff_unique_url": "https://www.amazon.com;https://www.fudan.edu.cn;https://www.westlake.edu.cn", "aff_unique_abbr": "Amazon;Fudan;WU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;1;1;0", "aff_country_unique": "United States;China" }, { "id": "oxZKOzePQX", "title": "SWEET - Weakly Supervised Person Name Extraction for Fighting Human Trafficking", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In this work, we propose a weak supervision pipeline SWEET: Supervise Weakly for Entity Extraction to fight Trafficking for extracting person names from noisy escort advertisements. Our method combines the simplicity of rule-matching (through antirules, i.e., negated rules) and the generalizability of large language models fine-tuned on benchmark, domain-specific and synthetic datasets, treating them as weak labels.\nOne of the major challenges in this domain is limited labeled data. SWEET addresses this by obtaining multiple weak labels through labeling functions and effectively aggregating them. SWEET outperforms the previous supervised SOTA method for this task by 9% F1 score on domain data and better generalizes to common benchmark datasets. Furthermore, we also release HTGEN, a synthetically generated dataset of escort advertisements (built using ChatGPT) to facilitate further research within the community.", "keywords": "Information Extraction;Large Language Models;Generation;NLP Applications;Resources and Evaluation", "primary_area": "", "supplementary_material": "", "author": "Javin Liu;Hao Yu;Vidya Sujaya;Pratheeksha Nair;Kellin Pelrine;Reihaneh Rabbany", "authorids": "~Javin_Liu1;~Hao_Yu15;~Vidya_Sujaya1;~Pratheeksha_Nair2;~Kellin_Pelrine1;~Reihaneh_Rabbany1", "gender": "M;;F;F;;F", "homepage": "https://mila.quebec/en/person/javin-liu/;;https://github.com/vidyasujaya;https://nair-p.github.io/;https://kellinpelrine.github.io/;http://www.reirab.com/", "dblp": ";;;233/1231;281/0602;94/9024", "google_scholar": ";;;;_s2HT_0AAAAJ;https://scholar.google.ca/citations?user=Foh_c-QAAAAJ", "or_profile": "~Javin_Liu1;~Hao_Yu15;~Vidya_Sujaya1;~Pratheeksha_Nair2;~Kellin_Pelrine1;~Reihaneh_Rabbany1", "aff": "Mila - Quebec AI Institute ;;McGill University, McGill University;McGill University;McGill University;Montreal Institute for Learning Algorithms, University of Montreal, University of Montreal", "aff_domain": "mila.quebec;;mail.mcgill.ca;mcgill.ca;mcgill.ca;mila.umontreal.ca", "position": "Intern;;Undergrad student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nliu2023sweet,\ntitle={{SWEET} - Weakly Supervised Person Name Extraction for Fighting Human Trafficking},\nauthor={Javin Liu and Hao Yu and Vidya Sujaya and Pratheeksha Nair and Kellin Pelrine and Reihaneh Rabbany},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=oxZKOzePQX}\n}", "github": "", "project": "", "reviewers": "6zce;pn8R;5iZ8", "site": "https://openreview.net/forum?id=oxZKOzePQX", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;4;4", "excitement": "3;3;4", "reproducibility": "3;4;2", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";;;pnair96/;kellin-pelrine/;", "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "Quebec AI Institute;McGill University;University of Montreal", "aff_unique_dep": "AI Institute;;Montreal Institute for Learning Algorithms", "aff_unique_url": "https://mila.quebec;https://www.mcgill.ca;https://www.umontreal.ca", "aff_unique_abbr": "Mila;McGill;UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "Canada" }, { "id": "p0GyMJugcE", "title": "Once is Enough: A Light-Weight Cross-Attention for Fast Sentence Pair Modeling", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Transformer-based models have achieved great success on sentence pair modeling tasks, such as answer selection and natural language inference (NLI). These models generally perform cross-attention over input pairs, leading to prohibitive computational cost. Recent studies propose dual-encoder and late interaction architectures for faster computation. However, the balance between the expressive of cross-attention and computation speedup still needs better coordinated. To this end, this paper introduces a novel paradigm TopicAns for efficient sentence pair modeling. TopicAns involves a lightweight cross-attention mechanism. It conducts query encoding only once while modeling the query-candidate interaction in parallel. Extensive experiments conducted on four tasks demonstrate that our TopicAnscan speed up sentence pairing by over 113x while achieving comparable performance as the more expensive cross-attention models.", "keywords": "information retrieval;text matching;embedding", "primary_area": "", "supplementary_material": "", "author": "Yuanhang Yang;Shiyi Qi;Chuanyi Liu;Qifan Wang;Cuiyun Gao;Zenglin Xu", "authorids": "~Yuanhang_Yang1;~Shiyi_Qi1;~Chuanyi_Liu1;~Qifan_Wang2;~Cuiyun_Gao1;~Zenglin_Xu2", "gender": "M;M;M;M;F;M", "homepage": ";https://github.com/yikouchunzhen;http://ids.hitsz.edu.cn/team_details.jsp?urltype=news.NewsContentUrl&wbtreeid=1185&wbnewsid=1020;https://wqfcr.github.io/;https://cuiyungao.github.io/;https://faculty.fudan.edu.cn/xuzenglin/en/index.htm", "dblp": "219/1699;321/1594;;33/8610;;68/1538", "google_scholar": "5GUZrRkAAAAJ;;Aec7FSQAAAAJ;LrSyLosAAAAJ;9I2hTmQAAAAJ;gF0H9nEAAAAJ", "or_profile": "~Yuanhang_Yang1;~Shiyi_Qi1;~Chuanyi_Liu1;~Qifan_Wang2;~Cuiyun_Gao1;~Zenglin_Xu1", "aff": "Harbin Institute of Technology (Shenzhen);Harbin Institute of Technology, shenzhen;Harbin Institute of Technology;Meta AI;;Harbin Institute of Technology Shenzhen", "aff_domain": "hit.edu.cn;hit.edu.cn;hit.edu.cn;fb.com;;hit.edu.cn", "position": "MS student;MS student;Full Professor;Principal Researcher;;Full Professor", "bibtex": "@inproceedings{\nyang2023once,\ntitle={Once is Enough: A Light-Weight Cross-Attention for Fast Sentence Pair Modeling},\nauthor={Yuanhang Yang and Shiyi Qi and Chuanyi Liu and Qifan Wang and Cuiyun Gao and Zenglin Xu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=p0GyMJugcE}\n}", "github": "", "project": "", "reviewers": "go6q;TknS;7S85", "site": "https://openreview.net/forum?id=p0GyMJugcE", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "3;4;3", "reproducibility": "4;5;4", "correctness": "3;4;3", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-8060-1273;;0000-0002-7570-5756;;0000-0001-5550-6461", "linkedin": ";;;;;", "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "Harbin Institute of Technology;Meta", "aff_unique_dep": ";Meta AI", "aff_unique_url": "http://en.hhit.edu.cn/;https://meta.com", "aff_unique_abbr": "HIT;Meta", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Shenzhen;Harbin;", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;United States" }, { "id": "p2P1Q4FpEB", "title": "A Framework for Vision-Language Warm-up Tasks in Multimodal Dialogue Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Most research on multimodal open-domain dialogue agents has focused on pretraining and multi-task learning using additional rich datasets beyond a given target dataset. However, methods for exploiting these additional datasets can be quite limited in real-world settings, creating a need for more efficient methods for constructing agents based solely on the target dataset. To address these issues, we present a new learning strategy called vision-language warm-up tasks for multimodal dialogue models (VLAW-MDM). This strategy does not require the use of large pretraining or multi-task datasets but rather relies solely on learning from target data. Moreover, our proposed approach automatically generate captions for images and incorporate them into the model's input to improve the contextualization of visual information. Using this novel approach, we empirically demonstrate that our learning strategy is effective for limited data and relatively small models. The result show that our method achieved comparable and in some cases superior performance compared to existing state-of-the-art models on various evaluation metrics.", "keywords": "multimodal agent;multimodal dialogue agent;multi-turn dialogue agent;warm-up task", "primary_area": "", "supplementary_material": "", "author": "Jaewook Lee;Seongsik Park;Seong-Heum Park;Hongjin KIM;Harksoo Kim", "authorids": "~Jaewook_Lee8;~Seongsik_Park3;~Seong-Heum_Park1;~Hongjin_KIM1;~Harksoo_Kim2", "gender": "M;M;M;M;M", "homepage": ";http://nlp.konkuk.ac.kr/;http://nlp.konkuk.ac.kr/;;http://nlp.konkuk.ac.kr/", "dblp": ";93/11156;;;http://dblp.uni-trier.de/pers/hy/k/Kim:Harksoo", "google_scholar": "https://scholar.google.com/citations?hl=ko;;;KP0iikIAAAAJ;D2wbG84AAAAJ", "or_profile": "~Jaewook_Lee8;~Seongsik_Park3;~Seong-Heum_Park1;~Hongjin_KIM1;~Harksoo_Kim2", "aff": "Konkuk University;Konkuk University;Konkuk University;Konkuk University;Konkuk University", "aff_domain": "konkuk.ac.kr;konkuk.ac.kr;konkuk.ac.kr;konkuk.ac.kr;konkuk.ac.kr", "position": "MS student;PhD student;MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nlee2023a,\ntitle={A Framework for Vision-Language Warm-up Tasks in Multimodal Dialogue Models},\nauthor={Jaewook Lee and Seongsik Park and Seong-Heum Park and Hongjin KIM and Harksoo Kim},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=p2P1Q4FpEB}\n}", "github": "", "project": "", "reviewers": "qMM1;twFK;v7if;CkdE", "site": "https://openreview.net/forum?id=p2P1Q4FpEB", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;3;3;3", "excitement": "3;3;3;3", "reproducibility": "4;3;4;1", "correctness": "3;3;3;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-0462-2408;;0000-0002-3492-2543;0000-0002-8286-7198", "linkedin": "jaewook-lee-1075652a3/;;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Konkuk University", "aff_unique_dep": "", "aff_unique_url": "http://www.konkuk.edu", "aff_unique_abbr": "KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "pFTBsdZ1UM", "title": "Indicative Summarization of Long Discussions", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Online forums encourage the exchange and discussion of different stances on many topics. Not only do they provide an opportunity to present one's own arguments, but may also gather a broad cross-section of others' arguments. However, the resulting long discussions are difficult to overview. This paper presents a novel unsupervised approach using large language models (LLMs) to generating indicative summaries for long discussions that basically serve as tables of contents. Our approach first clusters argument sentences, generates cluster labels as abstractive summaries, and classifies the generated cluster labels into argumentation frames resulting in a two-level summary. Based on an extensively optimized prompt engineering approach, we evaluate 19 LLMs for generative cluster labeling and frame classification. To evaluate the usefulness of our indicative summaries, we conduct a purpose-driven user study via a new visual interface called **Discussion Explorer**: It shows that our proposed indicative summaries serve as a convenient navigation tool to explore long discussions.", "keywords": "Summarization;Computational Argumentation;Large Language Models;Social Media Discussions", "primary_area": "", "supplementary_material": "", "author": "Shahbaz Syed;Dominik Schwabe;Khalid Al Khatib;Martin Potthast", "authorids": "~Shahbaz_Syed1;~Dominik_Schwabe1;~Khalid_Al_Khatib1;~Martin_Potthast1", "gender": ";;M;M", "homepage": ";;https://khalid-alkhatib.github.io/;http://www.temir.org", "dblp": ";;31/8936;87/6573", "google_scholar": ";U08YZEEAAAAJ;https://scholar.google.com/citations?hl=en;a0W8R-cAAAAJ", "or_profile": "~Shahbaz_Syed1;~Dominik_Schwabe1;~Khalid_Al_Khatib1;~Martin_Potthast1", "aff": ";Universit\u00e4t Leipzig;University of Groningen;Leipzig University and ScaDS.AI", "aff_domain": ";uni-leipzig.de;rug.nl;uni-leipzig.de", "position": ";MS student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nsyed2023indicative,\ntitle={Indicative Summarization of Long Discussions},\nauthor={Shahbaz Syed and Dominik Schwabe and Khalid Al Khatib and Martin Potthast},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=pFTBsdZ1UM}\n}", "github": "", "project": "", "reviewers": "oFS8;UYXG;RYVk", "site": "https://openreview.net/forum?id=pFTBsdZ1UM", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;3;4", "reproducibility": "3;3;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0009-0006-7255-5349;0000-0003-2451-0665", "linkedin": ";;khalid-alkhatib/;potthast", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Leipzig;University of Groningen;Leipzig University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uni-leipzig.de;https://www.rug.nl;https://www.uni-leipzig.de", "aff_unique_abbr": "Uni Leipzig;RUG;Uni Leipzig", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0", "aff_country_unique": "Germany;Netherlands" }, { "id": "pGlnFVmI4x", "title": "Boosting Summarization with Normalizing Flows and Aggressive Training", "track": "main", "status": "Long Main", "tldr": "", "abstract": "This paper presents FlowSUM, a normalizing flows-based variational encoder-decoder framework for Transformer-based summarization. Our approach tackles two primary challenges in variational summarization: insufficient semantic information in latent representations and posterior collapse during training. To address these challenges, we employ normalizing flows to enable flexible latent posterior modeling, and we propose a controlled alternate aggressive training (CAAT) strategy with an improved gate mechanism. Experimental results show that FlowSUM significantly enhances the quality of generated summaries and unleashes the potential for knowledge distillation with minimal impact on inference time. Furthermore, we investigate the issue of posterior collapse in normalizing flows and analyze how the summary quality is affected by the training strategy, gate initialization, and the type and number of normalizing flows used, offering valuable insights for future research.", "keywords": "summarization;normalizing flows;posterior collapse;aggressive training", "primary_area": "", "supplementary_material": "", "author": "Yu Yang;Xiaotong Shen", "authorids": "~Yu_Yang8;~Xiaotong_Shen1", "gender": "F;M", "homepage": "https://yuyangyy.com;http://users.stat.umn.edu/~xshen/", "dblp": ";", "google_scholar": ";KLC90IoAAAAJ", "or_profile": "~Yu_Yang8;~Xiaotong_Shen1", "aff": "University of Minnesota, Minneapolis;University of Minnesota - Twin Cities", "aff_domain": "umn.edu;umn.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nyang2023boosting,\ntitle={Boosting Summarization with Normalizing Flows and Aggressive Training},\nauthor={Yu Yang and Xiaotong Shen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=pGlnFVmI4x}\n}", "github": "", "project": "", "reviewers": "BWR7;FZCB;6vj4", "site": "https://openreview.net/forum?id=pGlnFVmI4x", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;2", "excitement": "3;4;4", "reproducibility": "4;3;3", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-7355-6702;", "linkedin": "yuyangstat/;", "aff_unique_index": "0;0", "aff_unique_norm": "University of Minnesota", "aff_unique_dep": "", "aff_unique_url": "https://www.minnesota.edu", "aff_unique_abbr": "UMN", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Minneapolis;Twin Cities", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "pHrNmdzX2C", "title": "FinGPT: Large Generative Models for a Small Language", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) excel in many tasks in NLP and beyond, but most open models have very limited coverage of smaller languages and LLM work tends to focus on languages where nearly unlimited data is available for pretraining. In this work, we study the challenges of creating LLMs for Finnish, a language spoken by less than 0.1\\% of the world population. We compile an extensive dataset of Finnish combining web crawls, news, social media and eBooks. We pursue two approaches to pretrain models: 1) we train seven monolingual models from scratch (186M to 13B parameters) dubbed FinGPT, 2) we continue the pretraining of the multilingual BLOOM model on a mix of its original training data and Finnish, resulting in a 176 billion parameter model we call BLUUMI.\nFor model evaluation, we introduce FIN-bench, a version of BIG-bench with Finnish tasks. We also assess other model qualities such as toxicity and bias. Our models and tools are openly available at \\url{https://turkunlp.org/gpt3-finnish}.", "keywords": "large language models;gpt;causal model;finnish;transformers;monolingual language models;BLOOM", "primary_area": "", "supplementary_material": "", "author": "Risto Luukkonen;Ville Komulainen;Jouni Luoma;Anni Eskelinen;Jenna Kanerva;Hanna-Mari Kristiina Kupari;Filip Ginter;Veronika Laippala;Niklas Muennighoff;Aleksandra Piktus;Thomas Wang;Nouamane Tazi;Teven Le Scao;Thomas Wolf;Osma Suominen;Samuli Sairanen;Mikko Merioksa;Jyrki Heinonen;Aija Vahtola;Samuel Antao;Sampo Pyysalo", "authorids": "~Risto_Luukkonen1;~Ville_Komulainen1;~Jouni_Luoma1;~Anni_Eskelinen1;~Jenna_Kanerva1;~Hanna-Mari_Kristiina_Kupari1;~Filip_Ginter2;~Veronika_Laippala1;~Niklas_Muennighoff1;~Aleksandra_Piktus1;~Thomas_Wang1;~Nouamane_Tazi1;~Teven_Le_Scao1;~Thomas_Wolf1;~Osma_Suominen1;~Samuli_Sairanen1;~Mikko_Merioksa1;~Jyrki_Heinonen1;~Aija_Vahtola1;~Samuel_Antao1;~Sampo_Pyysalo2", "gender": "M;;M;F;;F;M;;M;F;;;;M;;;Not Specified;;;;M", "homepage": ";;;https://turkunlp.org/;;https://www.utu.fi/fi/ihmiset/hanna-mari-kupari;https://turkunlp.org;https://turkunlp.org/;https://muennighoff.github.io/;;;;;https://thomwolf.io;;https://fi.linkedin.com/in/samuli-sairanen-707041155;https://www.helsinki.fi/fi/tutustu-meihin/ihmiset/henkilohaku/mikko-merioksa-9378302;;;;", "dblp": ";;51/2835;;149/0691;;;;281/6745;241/7090;;;;;;;;;;;", "google_scholar": ";;https://scholar.google.com/citations?hl=en;;;;8XsfOy8AAAAJ;;Me0IoRMAAAAJ;bXvehs4AAAAJ;;q2bZs1IAAAAJ;;D2H5EFEAAAAJ;;;;;;APuhgJgAAAAJ;GUHpTS0AAAAJ", "or_profile": "~Risto_Luukkonen1;~Ville_Komulainen1;~Jouni_Luoma1;~Anni_Eskelinen1;~Jenna_Kanerva1;~Hanna-Mari_Kristiina_Kupari1;~Filip_Ginter2;~Veronika_Laippala1;~Niklas_Muennighoff1;~Aleksandra_Piktus1;~Thomas_Wang1;~Nouamane_Tazi1;~Teven_Le_Scao1;~Thomas_Wolf1;~Osma_Suominen1;~Samuli_Sairanen1;~Mikko_Merioksa1;~Jyrki_Heinonen1;~Aija_Vahtola1;~Samuel_Antao1;~Sampo_Pyysalo2", "aff": "University of Turku;;University of Turku;University of Turku;University of Turku;University of Turku;University of Turku;University of Turku;Hugging Face;Hugging Face;Hugging Face;Hugging Face;;Hugging Face;;University of Helsinki;University of Helsinki;;;;University of Turku", "aff_domain": "utu.fi;;utu.fi;utu.fi;utu.fi;utu.fi;utu.fi;utu.fi;gmail.com;huggingface.co;huggingface.co;huggingface.co;;huggingface.co;;helsinki.fi;helsinki.fi;;;;utu.fi", "position": "MS student;;PhD student;MS student;Postdoc;PhD student;Full Professor;Full Professor;Researcher;Researcher;Researcher;Researcher;;Researcher;;Researcher;MS student;;;;Principal Researcher", "bibtex": "@inproceedings{\nluukkonen2023fingpt,\ntitle={Fin{GPT}: Large Generative Models for a Small Language},\nauthor={Risto Luukkonen and Ville Komulainen and Jouni Luoma and Anni Eskelinen and Jenna Kanerva and Hanna-Mari Kristiina Kupari and Filip Ginter and Veronika Laippala and Niklas Muennighoff and Aleksandra Piktus and Thomas Wang and Nouamane Tazi and Teven Le Scao and Thomas Wolf and Osma Suominen and Samuli Sairanen and Mikko Merioksa and Jyrki Heinonen and Aija Vahtola and Samuel Antao and Sampo Pyysalo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=pHrNmdzX2C}\n}", "github": "", "project": "", "reviewers": "yW8g;ddtQ;Y78r", "site": "https://openreview.net/forum?id=pHrNmdzX2C", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;2;4", "excitement": "3;4;4", "reproducibility": "4;0;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 21, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-9286-1868;;0000-0003-4580-5366;;0000-0002-5484-6103;;;;;;0000-0002-7052-3048;;;;;;;;", "linkedin": "risto-luukkonen/;;jouniluoma/;;;;;;niklasmuennighoff/;;thomas-w-394479109/;nouamanetazi/;;;osmasuominen/;;;;aija-vahtola-485020a3/;samuelfantao/;", "aff_unique_index": "0;0;0;0;0;0;0;1;1;1;1;1;2;2;0", "aff_unique_norm": "University of Turku;Hugging Face;University of Helsinki", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utu.fi;https://huggingface.co;https://www.helsinki.fi", "aff_unique_abbr": "UTU;Hugging Face;UH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;1;1;1;1;1;0;0;0", "aff_country_unique": "Finland;United States" }, { "id": "pHwLbEkB0J", "title": "Cross-lingual Prompting: Improving Zero-shot Chain-of-Thought Reasoning across Languages", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Chain-of-thought (CoT) is capable of eliciting models to explicitly generate reasoning paths, thus promoting reasoning accuracy and attracting increasing attention. Specifically, zero-shot CoT achieves remarkable improvements in a wide range of reasoning tasks by simply instructing the LLM with the prompt \"Let's think step by step!\". Despite the success of zero-shot CoT, the existing zero-shot prompting techniques remain limited to a single language, making it challenging to generalize to other languages and hindering global development. In this work, we introduce cross-lingual prompting (CLP), aiming to improve zero-shot CoT reasoning across languages. Specifically, CLP consists of two main components: (1) cross-lingual alignment prompting and (2) task-specific solver prompting. The cross-lingual alignment prompting is responsible for aligning representations across different languages, whereas the task-specific solver prompting is used to generate the final chain of thoughts and results for the reasoning task. In addition, we further introduce cross-lingual self-consistent prompting (CLSP) to ensemble different reasoning paths across languages. Our experimental evaluations on several benchmarks demonstrate that CLP and CLSP significantly outperform the existing prompting methods and achieve state-of-the-art performance. We hope this work will inspire further breakthroughs in cross-lingual CoT.", "keywords": "Chain-of-Thought; Cross-lingual Prompting; Cross-lingual self-consistency Prompting", "primary_area": "", "supplementary_material": "", "author": "Libo Qin;Qiguang Chen;Fuxuan Wei;Shijue Huang;Wanxiang Che", "authorids": "~Libo_Qin1;~Qiguang_Chen1;~Fuxuan_Wei1;~Shijue_Huang1;~Wanxiang_Che1", "gender": ";M;M;M;M", "homepage": ";https://scholar.google.com/citations?user=8j8AfF0AAAAJ;;;http://ir.hit.edu.cn/~car/", "dblp": ";292/9953;;302/4692;https://dblp.uni-trier.de/pers/hd/c/Che:Wanxiang", "google_scholar": ";8j8AfF0AAAAJ;bkeyeAIAAAAJ;https://scholar.google.com.hk/citations?user=C0Xc3dYAAAAJ;SVlQ6IEAAAAJ", "or_profile": "~Libo_Qin1;~Qiguang_Chen1;~Fuxuan_Wei1;~Shijue_Huang1;~Wanxiang_Che1", "aff": ";Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology, Shenzhen;Harbin Institute of Technology", "aff_domain": ";hit.edu.cn;hit.edu.cn;hit.edu.cn;hit.edu.cn", "position": ";PhD student;MS student;MS student;Full Professor", "bibtex": "@inproceedings{\nqin2023crosslingual,\ntitle={Cross-lingual Prompting: Improving Zero-shot Chain-of-Thought Reasoning across Languages},\nauthor={Libo Qin and Qiguang Chen and Fuxuan Wei and Shijue Huang and Wanxiang Che},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=pHwLbEkB0J}\n}", "github": "", "project": "", "reviewers": "wcFZ;prAk;mrxR", "site": "https://openreview.net/forum?id=pHwLbEkB0J", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "3;3;4", "reproducibility": "3;5;3", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-3373-4698;;", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Harbin Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hit.edu.cn/", "aff_unique_abbr": "HIT", "aff_campus_unique_index": "0;0;1;0", "aff_campus_unique": "Harbin;Shenzhen", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "pJwlMI7AYm", "title": "NERetrieve: Dataset for Next Generation Named Entity Recognition and Retrieval", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recognizing entities in texts is a central need in many information-seeking scenarios, and indeed, Named Entity Recognition (NER) is arguably one of the most successful examples of a widely adopted NLP task and corresponding NLP technology. Recent advances in large language models (LLMs) appear to provide effective solutions (also) for NER tasks that were traditionally handled with dedicated models, often matching or surpassing the abilities of the dedicated models. Should NER be considered a solved problem? We argue to the contrary: the capabilities provided by LLMs are not the end of NER research, but rather an exciting beginning. They allow taking NER to the next level, tackling increasingly more useful, and increasingly more challenging, variants. We present three variants of the NER task, together with a dataset to support them. The first is a move towards more fine-grained---and intersectional---entity types. The second is a move towards zero-shot recognition and extraction of these fine-grained types based on entity-type labels. The third, and most challenging, is the move from the recognition setup to a novel retrieval setup, where the query is a zero-shot entity type, and the expected result is all the sentences from a large, pre-indexed corpus that contain entities of these types, and their corresponding spans. We show that all of these are far from being solved. We provide a large, silver-annotated corpus of 4 million paragraphs covering 500 entity types, to facilitate research towards all of these three goals.", "keywords": "zero shot ner;retrieval;exhaustive search", "primary_area": "", "supplementary_material": "", "author": "Uri Katz;Matan Vetzler;Amir David Nissan Cohen;Yoav Goldberg", "authorids": "~Uri_Katz1;~Matan_Vetzler2;~Amir_David_Nissan_Cohen1;~Yoav_Goldberg1", "gender": "M;M;M;M", "homepage": "https://katzurik.github.io/;https://www.linkedin.com/in/matanvetzler/;http://www.cs.technion.ac.il/~amirc/;https://www.cs.biu.ac.il/~yogo", "dblp": "94/11232;;;68/5296", "google_scholar": "DkQ5W4wAAAAJ;https://scholar.google.com/citations?hl=en;KRkQizcAAAAJ;https://scholar.google.co.il/citations?user=0rskDKgAAAAJ", "or_profile": "~Uri_Katz1;~Matan_Vetzler2;~Amir_David_Nissan_Cohen1;~Yoav_Goldberg1", "aff": "Bar-Ilan University;Bar-Ilan University;Bar Ilan University;Allen Institute for Artificial Intelligence", "aff_domain": "biu.ac.il;biu.ac.il;biu.ac.il;allenai.org", "position": "PhD student;PhD student;PhD student;Principal Researcher", "bibtex": "@inproceedings{\nkatz2023neretrieve,\ntitle={{NER}etrieve: Dataset for Next Generation Named Entity Recognition and Retrieval},\nauthor={Uri Katz and Matan Vetzler and Amir David Nissan Cohen and Yoav Goldberg},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=pJwlMI7AYm}\n}", "github": "", "project": "", "reviewers": "EbDr;jvxe;Yuir", "site": "https://openreview.net/forum?id=pJwlMI7AYm", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;4", "excitement": "4;3;3", "reproducibility": "4;2;0", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Bar-Ilan University;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.biu.ac.il;https://allenai.org", "aff_unique_abbr": "BIU;AI2", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "Israel;United States" }, { "id": "pMCRGmB7Rv", "title": "BioPlanner: Automatic Evaluation of LLMs on Protocol Planning in Biology", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The ability to automatically generate accurate protocols for scientific experiments would represent a major step towards the automation of science. Large Language Models (LLMs) have impressive capabilities on a wide range of tasks, such as question answering and the generation of coherent text and code. However, LLMs can struggle with multi-step problems and long-term planning, which are crucial for designing scientific experiments. Moreover, evaluation of the accuracy of scientific protocols is challenging, because experiments can be described correctly in many different ways, require expert knowledge to evaluate, and cannot usually be executed automatically. Here we present an automatic evaluation framework for the task of planning experimental protocols, and we introduce BioProt: a dataset of biology protocols with corresponding pseudocode representations. To measure performance on generating scientific protocols, we use an LLM to convert a natural language protocol into pseudocode, and then evaluate an LLM's ability to reconstruct the pseudocode from a high-level description and a list of admissible pseudocode functions. We evaluate GPT-3 and GPT-4 on this task and explore their robustness. We externally validate the utility of pseudocode representations of text by generating accurate novel protocols using retrieved pseudocode, and we run a generated protocol successfully in our biological laboratory. Our framework is extensible to the evaluation and improvement of language model", "keywords": "LLMs;GPT-4;Science;Biology;Evaluation", "primary_area": "", "supplementary_material": "", "author": "Odhran ODonoghue;Aleksandar Shtedritski;John Ginger;Ralph Abboud;Ali Essam Ghareeb;Samuel G Rodriques", "authorids": "~Odhran_ODonoghue1;~Aleksandar_Shtedritski1;~John_Ginger1;~Ralph_Abboud1;~Ali_Essam_Ghareeb1;~Samuel_G_Rodriques1", "gender": ";M;M;M;M;M", "homepage": "https://www.bdi.ox.ac.uk/Team/odhran-odonoghue;;https://johnginger.co.uk;https://www.ralphabboud.com;;", "dblp": ";284/9612;;226/4657;;", "google_scholar": ";cGnonsQAAAAJ;;KBiHfLQAAAAJ;dlWmbncAAAAJ;yGKwWGEAAAAJ", "or_profile": "~Odhran_ODonoghue1;~Aleksandar_Shtedritski1;~John_Ginger1;~Ralph_Abboud1;~Ali_Essam_Ghareeb1;~Samuel_G_Rodriques1", "aff": "University of Oxford;University of Oxford;;Schmidt Futures;University College London, University of London;The Francis Crick Institute", "aff_domain": "ox.ac.uk;oxford.ac.uk;;schmidtfutures.com;ucl.ac.uk;crick.ac.uk", "position": "PhD student;PhD student;;Researcher;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nodonoghue2023bioplanner,\ntitle={BioPlanner: Automatic Evaluation of {LLM}s on Protocol Planning in Biology},\nauthor={Odhran ODonoghue and Aleksandar Shtedritski and John Ginger and Ralph Abboud and Ali Essam Ghareeb and Samuel G Rodriques},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=pMCRGmB7Rv}\n}", "github": "", "project": "", "reviewers": "3v5p;9Mgr;Lo15", "site": "https://openreview.net/forum?id=pMCRGmB7Rv", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;2", "excitement": "3;5;3", "reproducibility": "4;4;3", "correctness": "3;5;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-8552-3139;", "linkedin": ";;;;;samuel-g-rodriques-080a9b22/", "aff_unique_index": "0;0;1;2;3", "aff_unique_norm": "University of Oxford;Schmidt Futures;University College London;Francis Crick Institute", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ox.ac.uk;https://www.schmidtfutures.com;https://www.ucl.ac.uk;https://www.crick.ac.uk", "aff_unique_abbr": "Oxford;Schmidt Futures;UCL;Crick", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "United Kingdom;United States" }, { "id": "pO7YD7PADN", "title": "Understanding the Effect of Model Compression on Social Bias in Large Language Models", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Large Language Models (LLMs) trained with self-supervision on vast corpora of web text fit to the social biases of that text. Without intervention, these social biases persist in the model's predictions in downstream tasks, leading to representational harm.\nMany strategies have been proposed to mitigate the effects of inappropriate social biases learned during pretraining. \nSimultaneously, methods for model compression have become increasingly popular to reduce the computational burden of LLMs. Despite the popularity and need for both approaches, little work has been done to explore the interplay between these two.\nWe perform a carefully controlled study of the impact of model compression via quantization and knowledge distillation on measures of social bias in LLMs.\nLonger pretraining and larger models led to higher social bias, and quantization showed a regularizer effect with its best trade-off around 20\\% of the original pretraining time.", "keywords": "social bias;large language models;model compression;quantization;knowledge distillation", "primary_area": "", "supplementary_material": "", "author": "Gustavo Gon\u00e7alves;Emma Strubell", "authorids": "~Gustavo_Gon\u00e7alves1;~Emma_Strubell1", "gender": "M;Non-Binary", "homepage": ";http://strubell.github.io", "dblp": "218/0861;153/2253", "google_scholar": ";UCDMtM0AAAAJ", "or_profile": "~Gustavo_Gon\u00e7alves1;~Emma_Strubell1", "aff": "Universidade NOVA de Lisboa;Allen Institute for Artificial Intelligence", "aff_domain": "unl.pt;allenai.org", "position": "PhD student;Visiting Researcher", "bibtex": "@inproceedings{\ngon{\\c{c}}alves2023understanding,\ntitle={Understanding the Effect of Model Compression on Social Bias in Large Language Models},\nauthor={Gustavo Gon{\\c{c}}alves and Emma Strubell},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=pO7YD7PADN}\n}", "github": "", "project": "", "reviewers": "KtY1;nH78;az8p", "site": "https://openreview.net/forum?id=pO7YD7PADN", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "3;3;3", "reproducibility": "4;4;3", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;1", "aff_unique_norm": "NOVA University of Lisbon;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.unl.pt;https://allenai.org", "aff_unique_abbr": "UNL;AI2", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Portugal;United States" }, { "id": "pPiJykFn0K", "title": "Harnessing the power of LLMs: Evaluating human-AI text co-creation through the lens of news headline generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "To explore how humans can best leverage LLMs for writing and how interacting with these models affects feelings of ownership and trust in the writing process, we compared common human-AI interaction types (e.g., guiding system, selecting from system outputs, post-editing outputs) in the context of LLM-assisted news headline generation. While LLMs alone can generate satisfactory news headlines, on average, human control is needed to fix undesirable model outputs. Of the interaction methods, guiding and selecting model output added the most benefit with the lowest cost (in time and effort). Further, AI assistance did not harm participants\u2019 perception of control compared to freeform editing.", "keywords": "human-centered NLP;large language model;human-AI collaboration;text summarization", "primary_area": "", "supplementary_material": "", "author": "Zijian Ding;Alison Smith-Renner;Wenjuan Zhang;Joel R. Tetreault;Alejandro Jaimes", "authorids": "~Zijian_Ding2;~Alison_Smith-Renner1;~Wenjuan_Zhang1;~Joel_R._Tetreault2;~Alejandro_Jaimes2", "gender": "M;F;;M;", "homepage": "https://jason-ding.com/;https://alisonmsmith.github.io/;;http://www.alexjaimes.com;https://www.cs.rochester.edu/~tetreaul/academic.html", "dblp": ";;;45/956;40/4518", "google_scholar": "nHgFeKcAAAAJ;z9vKkYsAAAAJ;;-iy0DxMAAAAJ;Fn52EXUAAAAJ", "or_profile": "~Zijian_Ding2;~Alison_Smith-Renner1;~Wenjuan_Zhang1;~Alejandro_Jaimes2;~Joel_R_Tetreault1", "aff": "University of Maryland, College Park;Dataminr;;Dataminr;Dataminr", "aff_domain": "umd.edu;dataminr.com;;dataminr.com;dataminr.com", "position": "PhD student;Researcher;;Principal Researcher;Principal Researcher", "bibtex": "@inproceedings{\nding2023harnessing,\ntitle={Harnessing the power of {LLM}s: Evaluating human-{AI} text co-creation through the lens of news headline generation},\nauthor={Zijian Ding and Alison Smith-Renner and Wenjuan Zhang and Joel R. Tetreault and Alejandro Jaimes},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=pPiJykFn0K}\n}", "github": "", "project": "", "reviewers": "DQTz;Nsef;YiDy", "site": "https://openreview.net/forum?id=pPiJykFn0K", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;3;3", "reproducibility": "3;3;0", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 2.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "zijian-ding/;;isabel-wenjuan-zhang-3aa32367/;alexjaimes/;joel-tetreault-67234512", "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University of Maryland;Dataminr", "aff_unique_dep": ";", "aff_unique_url": "https://www/umd.edu;https://www.dataminr.com", "aff_unique_abbr": "UMD;Dataminr", "aff_campus_unique_index": "0", "aff_campus_unique": "College Park;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "pQFgViJp77", "title": "The Skipped Beat: A Study of Sociopragmatic Understanding in LLMs for 64 Languages", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Instruction tuned large language models (LLMs), such as ChatGPT, demonstrate remarkable performance in a wide range of tasks. Despite numerous recent studies that examine the performance of instruction-tuned LLMs on various NLP benchmarks, there remains a lack of comprehensive investigation into their ability to understand cross-lingual sociopragmatic meaning (SM), i.e., meaning embedded within social and interactive contexts. This deficiency arises partly from SM not being adequately represented in any of the existing benchmarks. To address this gap, we present SPARROW, an extensive multilingual benchmark specifically designed for SM understanding. SPARROW comprises 169 datasets covering 13 task types across six primary categories (e.g., anti-social language detection, emotion recognition). SPARROW datasets encompass 64 different languages originating from 12 language families representing 16 writing scripts. We evaluate the performance of various multilingual pretrained language models (e.g., mT5) and instruction-tuned LLMs (e.g., BLOOMZ, ChatGPT) on SPARROW through fine-tuning, zero-shot, and/or few-shot learning. Our comprehensive analysis reveals that existing open-source instruction tuned LLMs still struggle to understand SM across various languages, performing close to a random baseline in some cases. We also find that although ChatGPT outperforms many LLMs, it still falls behind task-specific finetuned models with a gap of 12.19 SPARROW score. Our benchmark is available at: https://github.com/UBC-NLP/SPARROW", "keywords": "sociopragmatics;benchmark;large language models;social media;ChatGPT;multilinguality", "primary_area": "", "supplementary_material": "", "author": "Chiyu Zhang;Khai Duy Doan;Qisheng Liao;Muhammad Abdul-Mageed", "authorids": "~Chiyu_Zhang1;~Khai_Duy_Doan1;~Qisheng_Liao1;~Muhammad_Abdul-Mageed2", "gender": "M;M;M;", "homepage": "https://chiyuzhang94.github.io/;;https://qishengl.github.io/;", "dblp": ";;;", "google_scholar": "https://scholar.google.ca/citations?user=oEGK73YAAAAJ;https://scholar.google.com/citations?hl=en;;", "or_profile": "~Chiyu_Zhang1;~Khai_Duy_Doan1;~Qisheng_Liao1;~Muhammad_Abdul-Mageed2", "aff": "Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;Mohamed bin Zayed University of Artificial Intelligence;", "aff_domain": "mbzuai.ac.ae;mbzuai.ac.ae;mbzuai.ac.ae;", "position": "Researcher;MS student;MS student;", "bibtex": "@inproceedings{\nzhang2023the,\ntitle={The Skipped Beat: A Study of Sociopragmatic Understanding in {LLM}s for 64 Languages},\nauthor={Chiyu Zhang and Khai Duy Doan and Qisheng Liao and Muhammad Abdul-Mageed},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=pQFgViJp77}\n}", "github": "", "project": "", "reviewers": "GKU9;B91m;fC9A", "site": "https://openreview.net/forum?id=pQFgViJp77", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "4;5;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-5981-352X;0009-0004-9601-944X;;", "linkedin": "chiyuzhang94/;khaidoan25;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": "", "aff_unique_url": "https://mbzuai.ac.ae", "aff_unique_abbr": "MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United Arab Emirates" }, { "id": "pW6xXXnCQu", "title": "PsyCoT: Psychological Questionnaire as Powerful Chain-of-Thought for Personality Detection", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recent advances in large language models (LLMs), such as ChatGPT, have showcased remarkable zero-shot performance across various NLP tasks. However, the potential of LLMs in personality detection, which involves identifying an individual's personality from their written texts, remains largely unexplored. Drawing inspiration from Psychological Questionnaires, which are carefully designed by psychologists to evaluate individual personality traits through a series of targeted items, we argue that these items can be regarded as a collection of well-structured chain-of-thought (CoT) processes. By incorporating these processes, LLMs can enhance their capabilities to make more reasonable inferences on personality from textual input. In light of this, we propose a novel personality detection method, called PsyCoT, which mimics the way individuals complete psychological questionnaires in a multi-turn dialogue manner. In particular, we employ a LLM as an AI assistant with a specialization in text analysis. We prompt the assistant to rate individual items at each turn and leverage the historical rating results to derive a conclusive personality preference. Our experiments demonstrate that PsyCoT significantly improves the performance and robustness of GPT-3.5 in personality detection, achieving an average F1 score improvement of 4.23/10.63 points on two benchmark datasets compared to the standard prompting method. Our code is available at \\url{https://github.com/TaoYang225/PsyCoT.", "keywords": "personality detection;psychological questionnaire;chain-of-thought", "primary_area": "", "supplementary_material": "", "author": "Tao Yang;Tianyuan Shi;Fanqi Wan;Xiaojun Quan;Qifan Wang;Bingzhe Wu;Jiaxiang Wu", "authorids": "~Tao_Yang13;~Tianyuan_Shi1;~Fanqi_Wan1;~Xiaojun_Quan1;~Qifan_Wang2;~Bingzhe_Wu1;~Jiaxiang_Wu1", "gender": "M;M;M;M;M;M;M", "homepage": "https://taoyang225.github.io/;https://www.sysu.edu.cn/;https://fanqiwan.github.io/;https://sites.google.com/site/xiaojunquan/;https://wqfcr.github.io/;;", "dblp": ";341/4890;347/8267;90/5936;33/8610;207/4843;119/6799-1.html", "google_scholar": "i3to2x8AAAAJ;;AeS1tmEAAAAJ;dRpg4t8AAAAJ;LrSyLosAAAAJ;_3hgtf8AAAAJ;https://scholar.google.com.hk/citations?user=puazh38AAAAJ", "or_profile": "~Tao_Yang13;~Tianyuan_Shi1;~Fanqi_Wan1;~Xiaojun_Quan1;~Qifan_Wang2;~Bingzhe_Wu1;~Jiaxiang_Wu1", "aff": "SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;SUN YAT-SEN UNIVERSITY;Meta AI;Tencent AI Lab;Tencent AI Lab", "aff_domain": "sysu.edu.cn;sysu.edu.cn;sysu.edu.cn;sysu.edu.cn;fb.com;tencent.com;tencent.com", "position": "PhD student;PhD student;MS student;Full Professor;Principal Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nyang2023psycot,\ntitle={PsyCoT: Psychological Questionnaire as Powerful Chain-of-Thought for Personality Detection},\nauthor={Tao Yang and Tianyuan Shi and Fanqi Wan and Xiaojun Quan and Qifan Wang and Bingzhe Wu and Jiaxiang Wu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=pW6xXXnCQu}\n}", "github": "", "project": "", "reviewers": "3hiz;6BCv;QAsf", "site": "https://openreview.net/forum?id=pW6xXXnCQu", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;3", "excitement": "4;3;4", "reproducibility": "5;4;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-7570-5756;;", "linkedin": ";;fanqiwan/;;;;", "aff_unique_index": "0;0;0;0;1;2;2", "aff_unique_norm": "Sun Yat-sen University;Meta;Tencent", "aff_unique_dep": ";Meta AI;Tencent AI Lab", "aff_unique_url": "http://www.sysu.edu.cn;https://meta.com;https://ai.tencent.com", "aff_unique_abbr": "SYSU;Meta;Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "id": "pYRCUypbuq", "title": "Did You Mean...? Confidence-based Trade-offs in Semantic Parsing", "track": "main", "status": "Short Main", "tldr": "", "abstract": "We illustrate how a calibrated model can help balance common trade-offs in task-oriented parsing. \nIn a simulated annotator-in-the-loop experiment, we show that well-calibrated confidence scores allow us to balance cost with annotator load, improving accuracy with a small number of interactions. \nWe then examine how confidence scores can help optimize the trade-off between usability and safety. \nWe show that confidence-based thresholding can substantially reduce the number of incorrect low-confidence programs executed; however, this comes at a cost to usability. \nWe propose the DidYouMean system which better balances usability and safety by rephrasing low-confidence inputs.", "keywords": "calibration;semantic parsing;safety;paraphrasing", "primary_area": "", "supplementary_material": "", "author": "Elias Stengel-Eskin;Benjamin Van Durme", "authorids": "~Elias_Stengel-Eskin1;~Benjamin_Van_Durme2", "gender": "M;", "homepage": "https://esteng.github.io;", "dblp": "212/6138;", "google_scholar": "gr_ZVSQAAAAJ;", "or_profile": "~Elias_Stengel-Eskin1;~Benjamin_Van_Durme2", "aff": "Johns Hopkins University;", "aff_domain": "jhu.edu;", "position": "PhD student;", "bibtex": "@inproceedings{\nstengel-eskin2023did,\ntitle={Did You Mean...? Confidence-based Trade-offs in Semantic Parsing},\nauthor={Elias Stengel-Eskin and Benjamin Van Durme},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=pYRCUypbuq}\n}", "github": "", "project": "", "reviewers": "k4cA;q8g5;DWn7;11rs", "site": "https://openreview.net/forum?id=pYRCUypbuq", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "2;4;3;1", "excitement": "3;4;3;3", "reproducibility": "2;4;3;3", "correctness": "4;4;3;3", "rating_avg": 4.0, "confidence_avg": 2.5, "excitement_avg": 3.25, "reproducibility_avg": 3.0, "correctness_avg": 3.5, "replies_avg": 6, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-6689-505X;", "linkedin": ";", "aff_unique_index": "0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "paUJOst3OE", "title": "MAPO: Boosting Large Language Model Performance with Model-Adaptive Prompt Optimization", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Prompt engineering, as an efficient and effective way to leverage Large Language Models (LLM), has drawn a lot of attention from the research community. \nThe existing research primarily emphasizes the importance of adapting prompts to specific tasks, rather than specific LLMs.\nHowever, a good prompt is not solely defined by its wording, but also binds to the nature of the LLM in question.\nIn this work, we first quantitatively demonstrate that different prompts should be adapted to different LLMs to enhance their capabilities across various downstream tasks in NLP. Then we novelly propose a model-adaptive prompt optimizer (MAPO) method that optimizes the original prompts for each specific LLM in downstream tasks. Extensive experiments indicate that the proposed method can effectively refine prompts for an LLM, leading to significant improvements over various downstream tasks.", "keywords": "Prompts Optimization;Large Language Models;Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Yuyan Chen;Zhihao Wen;Ge Fan;Zhengyu Chen;Wei Wu;Dayiheng Liu;Zhixu Li;Bang Liu;Yanghua Xiao", "authorids": "~Yuyan_Chen1;~Zhihao_Wen1;~Ge_Fan1;~Zhengyu_Chen3;~Wei_Wu1;~Dayiheng_Liu1;~Zhixu_Li2;~Bang_Liu1;~Yanghua_Xiao1", "gender": "F;Not Specified;M;;M;M;M;M;", "homepage": "https://scholar.google.com.hk/citations?user=LNSE_VcAAAAJ&hl=zh-CN;;http://fange.pro/;;https://sites.google.com/view/wei-wu-homepage;https://dayihengliu.github.io/;http://demigroup.cn/staff/ZhiXuLi;http://www-labs.iro.umontreal.ca/~liubang/;", "dblp": "96/11155.html;292/8251;30/6877;;95/6985-14;https://dblp.uni-trier.de/pers/hd/l/Liu:Dayiheng;38/3988;;96/999", "google_scholar": "https://scholar.google.com.hk/citations?user=LNSE_VcAAAAJ;pf79pdQAAAAJ;pD4HWA0AAAAJ;;https://scholar.google.co.jp/citations?hl=en;pPLQrX4AAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;lmfAnP4AAAAJ;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Yuyan_Chen1;~Zhihao_Wen1;~Ge_Fan1;~Zhengyu_Chen3;~Wei_Wu1;~Dayiheng_Liu1;~Zhixu_Li2;~Bang_Liu1;~Yanghua_Xiao1", "aff": "Fudan University;Singapore Management University;Tencent Inc.;;Ant Research;Alibaba Group;Fudan University;University of Montreal;Fudan University", "aff_domain": "fudan.edu.cn;smu.edu.sg;tencent.com;;antgroup.com;alibaba-inc.com;fudan.edu.cn;umontreal.ca;fudan.edu.cn", "position": "PhD student;PhD student;Researcher;;Researcher;Researcher;Researcher;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nchen2023mapo,\ntitle={{MAPO}: Boosting Large Language Model Performance with Model-Adaptive Prompt Optimization},\nauthor={Yuyan Chen and Zhihao Wen and Ge Fan and Zhengyu Chen and Wei Wu and Dayiheng Liu and Zhixu Li and Bang Liu and Yanghua Xiao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=paUJOst3OE}\n}", "github": "", "project": "", "reviewers": "DfN7;Wer2;9pBE", "site": "https://openreview.net/forum?id=paUJOst3OE", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;3", "excitement": "3;3;3", "reproducibility": "4;3;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 9, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-4381-486X;0000-0002-7688-5381;0000-0001-5653-1626;;0000-0001-6079-7697;0000-0002-8755-8941;0000-0003-2355-288X;0000-0002-9483-8984;0000-0001-8403-9591", "linkedin": ";zhihao-wen-5b9a02201/;;;;;;bang-liu-12b66789/?originalSubdomain=ca;", "aff_unique_index": "0;1;2;3;4;0;5;0", "aff_unique_norm": "Fudan University;Singapore Management University;Tencent;Ant Research;Alibaba Group;University of Montreal", "aff_unique_dep": ";;Tencent;;;", "aff_unique_url": "https://www.fudan.edu.cn;https://www.smu.edu.sg;https://www.tencent.com;https://www.antgroup.com;https://www.alibaba.com;https://wwwumontreal.ca", "aff_unique_abbr": "Fudan;SMU;Tencent;Ant Research;Alibaba;UM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0;2;0", "aff_country_unique": "China;Singapore;Canada" }, { "id": "pfeod9GPAw", "title": "Extractive Summarization via ChatGPT for Faithful Summary Generation", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Extractive summarization is a crucial task in natural language processing that aims to condense long documents into shorter versions by directly extracting sentences. The recent introduction of large language models has attracted significant interest in the NLP community due to its remarkable performance on a wide range of downstream tasks. This paper first presents a thorough evaluation of ChatGPT's performance on extractive summarization and compares it with traditional fine-tuning methods on various benchmark datasets. Our experimental analysis reveals that ChatGPT exhibits inferior extractive summarization performance in terms of ROUGE scores compared to existing supervised systems, while achieving higher performance based on LLM-based evaluation metrics. In addition, we explore the effectiveness of in-context learning and chain-of-thought reasoning for enhancing its performance. Furthermore, we find that applying an extract-then-generate pipeline with ChatGPT yields significant performance improvements over abstractive baselines in terms of summary faithfulness. These observations highlight potential directions for enhancing ChatGPT's capabilities in faithful summarization using two-stage approaches.", "keywords": "summarization;large language model;faithfulness", "primary_area": "", "supplementary_material": "", "author": "Haopeng Zhang;Xiao Liu;Jiawei Zhang", "authorids": "~Haopeng_Zhang3;~Xiao_Liu22;~Jiawei_Zhang3", "gender": "M;M;", "homepage": "https://hpzhang94.github.io/;https://haroldliuj.github.io;http://jiaweizhang.net/", "dblp": "256/5136;82/1364-34;10/239-1", "google_scholar": "https://scholar.google.com/citations?hl=en;E97kG9IAAAAJ;7AkZSJsAAAAJ", "or_profile": "~Haopeng_Zhang3;~Xiao_Liu22;~Jiawei_Zhang3", "aff": "University of California, Davis;University of California, Davis;University of California, Davis", "aff_domain": "ucdavis.edu;ucdavis.edu;ucdavis.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nzhang2023extractive,\ntitle={Extractive Summarization via Chat{GPT} for Faithful Summary Generation},\nauthor={Haopeng Zhang and Xiao Liu and Jiawei Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=pfeod9GPAw}\n}", "github": "", "project": "", "reviewers": "hwbk;jH6Y;6bTC;dGKd", "site": "https://openreview.net/forum?id=pfeod9GPAw", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;3;3;4", "excitement": "4;3;2;3", "reproducibility": "2;4;4;5", "correctness": "3;3;2;3", "rating_avg": 3.0, "confidence_avg": 3.5, "excitement_avg": 3.0, "reproducibility_avg": 3.75, "correctness_avg": 2.75, "replies_avg": 13, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-2111-7617", "linkedin": ";%E9%AA%81-%E5%88%98-2777101a1/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Davis", "aff_unique_dep": "", "aff_unique_url": "https://www.ucdavis.edu", "aff_unique_abbr": "UC Davis", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Davis", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "pgEIr2HY2E", "title": "Improving Summarization with Human Edits", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recent work has shown the promise of learning with human feedback paradigms to produce human-determined high-quality text. Existing works use human feedback to train large language models (LLMs) in general domain abstractive summarization and have obtained summary quality exceeding traditional likelihood training. In this paper, we focus on a less explored form of human feedback -- Human Edits. We propose Sequence Alignment (un)Likelihood Training (SALT), a novel technique to use both the human-edited and model-generated data together in the training loop. In addition, we demonstrate simulating Human Edits with ground truth summaries coming from existing training data -- Imitation edits, along with the model-generated summaries obtained after the training, to reduce the need for expensive human-edit data. In our experiments, we extend human feedback exploration from general domain summarization to medical domain summarization. Our results demonstrate the effectiveness of SALT in improving the summary quality with Human and Imitation Edits. Through additional experiments, we show that SALT outperforms the conventional RLHF method (designed for human preferences) -- DPO, when applied to human-edit data. We hope the evidence in our paper prompts researchers to explore, collect, and better use different human feedback approaches scalably.", "keywords": "Human-aligned AI; Unlikelihood Training; Human Edits; Imitation Edits; Human Feedback", "primary_area": "", "supplementary_material": "", "author": "Zonghai Yao;Benjamin J Schloss;Sai P Selvaraj", "authorids": "~Zonghai_Yao1;~Benjamin_J_Schloss1;~Sai_P_Selvaraj1", "gender": "M;M;M", "homepage": "https://www.linkedin.com/in/zonghaiyao/;;http://saiprabhakar.github.io", "dblp": "276/5864;;183/0945", "google_scholar": "oVYZ904AAAAJ;gTuG8BsAAAAJ;avFhCGUAAAAJ", "or_profile": "~Zonghai_Yao1;~Benjamin_J_Schloss1;~Sai_Prabhakar_Pandi_Selvaraj1", "aff": "University of Massachusetts at Amherst;Abridge AI;Abridge AI Inc", "aff_domain": "umass.edu;abridge.com;abridge.com", "position": "PhD student;Researcher;Researcher", "bibtex": "@inproceedings{\nyao2023improving,\ntitle={Improving Summarization with Human Edits},\nauthor={Zonghai Yao and Benjamin J Schloss and Sai P Selvaraj},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=pgEIr2HY2E}\n}", "github": "", "project": "", "reviewers": "sCPx;88MQ;ZppW;6R2L", "site": "https://openreview.net/forum?id=pgEIr2HY2E", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "4;4;2;4", "excitement": "4;4;3;3", "reproducibility": "4;2;2;4", "correctness": "3;3;3;4", "rating_avg": 5.0, "confidence_avg": 3.5, "excitement_avg": 3.5, "reproducibility_avg": 3.0, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-5707-8410;;0000-0003-2934-6147", "linkedin": "zonghaiyao/;benjamin-schloss-25625847/;sai-prabhakar-regrefree/", "aff_unique_index": "0;1;2", "aff_unique_norm": "University of Massachusetts Amherst;Abridge AI;Abridge AI Inc", "aff_unique_dep": ";;", "aff_unique_url": "https://www.umass.edu;https://www.abridge.ai;", "aff_unique_abbr": "UMass Amherst;Abridge AI;", "aff_campus_unique_index": "0", "aff_campus_unique": "Amherst;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "phJtMADSdy", "title": "Multi-User MultiWOZ: Task-Oriented Dialogues among Multiple Users", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "While most task-oriented dialogues assume conversations between the agent and one user at a time, dialogue systems are increasingly expected to communicate with multiple users simultaneously who make decisions collaboratively. To facilitate development of such systems, we release the Multi-User MultiWOZ dataset: task-oriented dialogues among two users and one agent. To collect this dataset, each user utterance from MultiWOZ 2.2 was replaced with a small chat between two users that is semantically and pragmatically consistent with the original user utterance, thus resulting in the same dialogue state and system response. These dialogues reflect interesting dynamics of collaborative decision-making in task-oriented scenarios, e.g., social chatter and deliberation. Supported by this data, we propose the novel task of multi-user contextual query rewriting: to rewrite a task-oriented chat between two users as a concise task-oriented query that retains only task-relevant information and that is directly consumable by the dialogue system. We demonstrate that in multi-user dialogues, using predicted rewrites substantially improves dialogue state tracking without modifying existing dialogue systems that are trained for single-user dialogues. Further, this method surpasses training a medium-sized model directly on multi-user dialogues and generalizes to unseen domains.", "keywords": "Multiparty Dialogues;Contextual Query Rewriting;Dialogue Summarization;Dialogue Systems", "primary_area": "", "supplementary_material": "", "author": "Yohan Jo;Xinyan Zhao;Arijit Biswas;Nikoletta Basiou;Vincent Auvray;Nikolaos Malandrakis;Angeliki Metallinou;Alexandros Potamianos", "authorids": "~Yohan_Jo1;~Xinyan_Zhao2;~Arijit_Biswas1;~Nikoletta_Basiou1;~Vincent_Auvray3;~Nikolaos_Malandrakis1;~Angeliki_Metallinou1;~Alexandros_Potamianos2", "gender": ";M;;F;;M;F;M", "homepage": "https://yohanjo.github.io/;;;;;;;https://slp-ntua.github.io/potam/", "dblp": "40/8877;;16/5085;;15/6769.html;;87/4411;17/2202.html", "google_scholar": "xp3LGRQAAAAJ;https://scholar.google.com/citations?hl=en;Hu2Ht28AAAAJ;;KMaW_KwAAAAJ;W6ec5ZsAAAAJ;;pBQViyUAAAAJ", "or_profile": "~Yohan_Jo1;~Xinyan_Zhao2;~Arijit_Biswas1;~Nikoletta_Basiou1;~Vincent_Auvray3;~Nikolaos_Malandrakis1;~Angeliki_Metallinou1;~Alexandros_Potamianos2", "aff": "Amazon;Amazon;Amazon;;Amazon;Amazon;;National Technical University of Athens", "aff_domain": "amazon.com;amazon.com;amazon.com;;amazon.com;amazon.com;;ntua.gr", "position": "Applied Scientist;Researcher;Researcher;;Applied Scientist;Researcher;;Associate Professor", "bibtex": "@inproceedings{\njo2023multiuser,\ntitle={Multi-User Multi{WOZ}: Task-Oriented Dialogues among Multiple Users},\nauthor={Yohan Jo and Xinyan Zhao and Arijit Biswas and Nikoletta Basiou and Vincent Auvray and Nikolaos Malandrakis and Angeliki Metallinou and Alexandros Potamianos},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=phJtMADSdy}\n}", "github": "", "project": "", "reviewers": "CX5P;6huk;bSQy", "site": "https://openreview.net/forum?id=phJtMADSdy", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "3;3;3", "reproducibility": "4;5;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-5937-0688;;;;;;", "linkedin": ";;arijit-biswas-46690236/;nikolettabasiou;auvray/;;;apotam/", "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Amazon;National Technical University of Athens", "aff_unique_dep": "Amazon.com, Inc.;", "aff_unique_url": "https://www.amazon.com;https://www.ntua.gr", "aff_unique_abbr": "Amazon;NTUA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "United States;Greece" }, { "id": "pi764D1Xrx", "title": "Ask Language Model to Clean Your Noisy Translation Data", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "TTransformer models have demonstrated remarkable performance in neural machine translation (NMT). However, their vulnerability to noisy input poses a significant challenge in practical implementation, where generating clean output from noisy input is crucial. The MTNT dataset is widely used as a benchmark for evaluating the robustness of NMT models against noisy input. Nevertheless, its utility is limited due to the presence of noise in both the source and target sentences. To address this limitation, we focus on cleaning the noise from the target sentences in MTNT, making it more suitable as a benchmark for noise evaluation. Leveraging the capabilities of large language models (LLMs), we observe their impressive abilities in noise removal. For example, they can remove emojis while considering their semantic meaning. Additionally, we show that LLM can effectively rephrase slang, jargon, and profanities. The resulting datasets, called C-MTNT, exhibit significantly less noise in the target sentences while preserving the semantic integrity of the original sentences. Our human and GPT-4 evaluations also lead to a consistent conclusion that LLM performs well on this task. Lastly, experiments on C-MTNT showcased its effectiveness in evaluating the robustness of NMT models, highlighting the potential of advanced language models for data cleaning and emphasizing C-MTNT as a valuable resource.", "keywords": "Machine Translation;Large Language Models;Data Generation;Noise", "primary_area": "", "supplementary_material": "", "author": "Quinten Bolding;Baohao Liao;Brandon James Denis;Jun Luo;Christof Monz", "authorids": "~Quinten_Bolding1;~Baohao_Liao1;~Brandon_James_Denis1;~Jun_Luo10;~Christof_Monz1", "gender": "M;M;;;M", "homepage": ";https://baohaoliao.github.io/;;;https://staff.fnwi.uva.nl/c.monz/", "dblp": ";234/4096;;;m/ChristofMonz", "google_scholar": ";Fbys5c8AAAAJ;;;0r3PWLQAAAAJ", "or_profile": "~Quinten_Bolding1;~Baohao_Liao1;~Brandon_James_Denis1;~Jun_Luo10;~Christof_Monz1", "aff": "University of Amsterdam;University of Amsterdam;Huawei Technologies Ltd.;Huawei Technologies Ltd.;University of Amsterdam, University of Amsterdam", "aff_domain": "uva.nl;uva.nl;huawei.com;huawei.com;ivi.uva.nl", "position": "MS student;PhD student;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\nbolding2023ask,\ntitle={Ask Language Model to Clean Your Noisy Translation Data},\nauthor={Quinten Bolding and Baohao Liao and Brandon James Denis and Jun Luo and Christof Monz},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=pi764D1Xrx}\n}", "github": "", "project": "", "reviewers": "6pw7;Z2kB;pKJv", "site": "https://openreview.net/forum?id=pi764D1Xrx", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "3;2;3", "reproducibility": "4;4;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-8335-4573;;;", "linkedin": "quinten-bolding-89a6a123a/;baohaoliao;https://linkedin.com/in/brandon-denis;junluo;", "aff_unique_index": "0;0;1;1;0", "aff_unique_norm": "University of Amsterdam;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "https://www.uva.nl;https://www.huawei.com", "aff_unique_abbr": "UvA;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;1;0", "aff_country_unique": "Netherlands;China" }, { "id": "piC2Dm47U1", "title": "Novel Relation Detection: Discovering Unknown Relation Types via Multi-Strategy Self-Supervised Learning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Conventional approaches to relation extraction can only recognize predefined relation types. In the real world, new or out-of-scope relation types may keep challenging the deployed models. In this paper, we formalize such a challenging problem as Novel Relation Detection (NRD), which aims to discover potential new relation types based on training samples of known relations. To this end, we construct two NRD datasets and exhaustively investigate a variety of out-of-scope detection methods. We further propose an effective NRD method that utilizes multi-strategy self-supervised learning to handle the problem of shallow semantic similarity in the NRD task. Experimental results demonstrate the effectiveness of our method, which significantly outperforms previous state-of-the-art methods on both datasets.", "keywords": "Relation Detection;Out-of-Scope Detection;Self-Supervised Learning", "primary_area": "", "supplementary_material": "", "author": "Qingbin Liu;Yin Kung;Yanchao Hao;Dianbo Sui;Siyuan Cheng;Xi Chen;Ningyu Zhang;Jiaoyan Chen", "authorids": "~Qingbin_Liu1;~Yin_Kung1;~Yanchao_Hao1;~Dianbo_Sui1;~Siyuan_Cheng2;~Xi_Chen21;~Ningyu_Zhang1;~Jiaoyan_Chen1", "gender": "M;F;M;M;M;M;M;M", "homepage": "https://scholar.google.com.hk/citations?user=FGxyOtYAAAAJ&hl=zh-CN;https://github.com/Dandelion0417;;;https://github.com/cheng-simian;;https://person.zju.edu.cn/en/ningyu;https://chenjiaoyan.github.io/", "dblp": "137/6023.html;;190/1825;254/8270;;;139/4181-1.html;56/8110-1", "google_scholar": "https://scholar.google.com.hk/citations?user=FGxyOtYAAAAJ;R_RmroAAAAAJ;;yi639zEAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;xQDOPvsAAAAJ;https://scholar.google.ch/citations?user=5Cy4z8wAAAAJ", "or_profile": "~Qingbin_Liu1;~Yin_Kung1;~Yanchao_Hao1;~Dianbo_Sui1;~Siyuan_Cheng2;~Xi_Chen21;~Ningyu_Zhang1;~Jiaoyan_Chen1", "aff": "Tencent;Peking University;Tencent PCG;Harbin Institute of Technology;Zhejiang University;Tencent Content and Platform Group;Zhejiang University;University of Oxford", "aff_domain": "tencent.com;pku.edu;tencent.com;hit.edu.cn;zju.edu.cn;tencent.com;zju.edu.cn;cs.ox.ac.uk", "position": "Researcher;MS student;Principal Researcher;Lecturer;MS student;Researcher;Associate Professor;Senior Researcher", "bibtex": "@inproceedings{\nliu2023novel,\ntitle={Novel Relation Detection: Discovering Unknown Relation Types via Multi-Strategy Self-Supervised Learning},\nauthor={Qingbin Liu and Yin Kung and Yanchao Hao and Dianbo Sui and Siyuan Cheng and Xi Chen and Ningyu Zhang and Jiaoyan Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=piC2Dm47U1}\n}", "github": "", "project": "", "reviewers": "wbgR;a4GX;cVH9;tVzT", "site": "https://openreview.net/forum?id=piC2Dm47U1", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;4;3", "excitement": "3;4;3;4", "reproducibility": "4;4;4;4", "correctness": "4;3;3;3", "rating_avg": 3.0, "confidence_avg": 3.75, "excitement_avg": 3.5, "reproducibility_avg": 4.0, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;0000-0002-1970-0678;0000-0003-4643-6750", "linkedin": ";;;;;;ningyuzhang/;", "aff_unique_index": "0;1;0;2;3;0;3;4", "aff_unique_norm": "Tencent;Peking University;Harbin Institute of Technology;Zhejiang University;University of Oxford", "aff_unique_dep": "Tencent Holdings Limited;;;;", "aff_unique_url": "https://www.tencent.com;http://www.pku.edu.cn;http://www.hit.edu.cn/;https://www.zju.edu.cn;https://www.ox.ac.uk", "aff_unique_abbr": "Tencent;Peking U;HIT;ZJU;Oxford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Harbin", "aff_country_unique_index": "0;0;0;0;0;0;0;1", "aff_country_unique": "China;United Kingdom" }, { "id": "pk0OZZkYMP", "title": "Analyzing Modular Approaches for Visual Question Decomposition", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Modular neural networks without additional training have recently been shown to surpass end-to-end neural networks on challenging vision\u2013language tasks. The latest such methods simultaneously introduce LLM-based code generation to build programs and a number of skill-specific, task-oriented modules to execute them. In this paper, we focus on ViperGPT and ask where its additional performance comes from and how much is due to the (state-of-art, end-to-end) BLIP-2 model it subsumes vs. additional symbolic components. To do so, we conduct a controlled study (comparing end-to-end, modular, and prompting-based methods across several VQA benchmarks). We find that ViperGPT's reported gains over BLIP-2 can be attributed to its selection of task-specific modules, and when we run ViperGPT using a more task-agnostic selection of modules, these gains go away. ViperGPT retains much of its performance if we make prominent alterations to its selection of modules: e.g. removing or retaining only BLIP-2. We also compare ViperGPT against a prompting-based decomposition strategy and find that, on some benchmarks, modular approaches significantly benefit by representing subtasks with natural language, instead of code. Our code is fully available at https://github.com/brown-palm/visual-question-decomposition.", "keywords": "Vision\u2013language;visual question answering;modular;neuro-symbolic;prompting;question decomposition", "primary_area": "", "supplementary_material": "", "author": "Apoorv Khandelwal;Ellie Pavlick;Chen Sun", "authorids": "~Apoorv_Khandelwal1;~Ellie_Pavlick1;~Chen_Sun1", "gender": "M;F;M", "homepage": "http://apoorvkh.com;http://cs.brown.edu/people/epavlick/;https://chensun.me", "dblp": "126/9502-1;141/4059;01/6072-2", "google_scholar": "gwUMjlsAAAAJ;sFyrSa8AAAAJ;vQa7heEAAAAJ", "or_profile": "~Apoorv_Khandelwal1;~Ellie_Pavlick1;~Chen_Sun1", "aff": "Brown University;Brown University;Google", "aff_domain": "brown.edu;brown.edu;google.com", "position": "PhD student;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\nkhandelwal2023analyzing,\ntitle={Analyzing Modular Approaches for Visual Question Decomposition},\nauthor={Apoorv Khandelwal and Ellie Pavlick and Chen Sun},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=pk0OZZkYMP}\n}", "github": "", "project": "", "reviewers": "oZRE;a58D;9Ggp", "site": "https://openreview.net/forum?id=pk0OZZkYMP", "pdf_size": 0, "rating": "5;5;5", "confidence": "2;4;4", "excitement": "3;4;4", "reproducibility": "3;4;4", "correctness": "3;3;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0;1", "aff_unique_norm": "Brown University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.brown.edu;https://www.google.com", "aff_unique_abbr": "Brown;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "pkZcvEYZEm", "title": "NAIL: Lexical Retrieval Indices with Efficient Non-Autoregressive Decoders", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Neural document rerankers are extremely effective in terms of accuracy. However, the best models require dedicated hardware for serving, which is costly and often not feasible. To avoid this servingtime requirement, we present a method of capturing up to 86% of the gains of a Transformer cross-attention model with a lexicalized scoring function that only requires 10\u22126% of the Transformer\u2019s FLOPs per document and can be served using commodity CPUs. When combined with a BM25 retriever, this approach matches the quality of a state-of-the art dual encoder retriever, that still requires an accelerator for query encoding. We introduce nail (Non-Autoregressive Indexing with Language models) as a model architecture that is compatible with recent encoder-decoder and decoder-only large language models, such as T5, GPT-3 and PaLM. This model architecture can leverage existing pre-trained checkpoints and can be fine-tuned for efficiently constructing document representations that do not require neural processing of queries.", "keywords": "information retrieval;text retrieval;large language models;non-autoregressive decoders", "primary_area": "", "supplementary_material": "", "author": "Livio Baldini Soares;Daniel Gillick;Jeremy R. Cole;Tom Kwiatkowski", "authorids": "~Livio_Baldini_Soares2;~Daniel_Gillick1;~Jeremy_R._Cole1;~Tom_Kwiatkowski1", "gender": "M;M;M;M", "homepage": ";https://jrc436.github.io;https://research.google.com/pubs/105075.html;https://liviosoares.github.io/", "dblp": "73/7157;189/4976;33/9012;178/3562", "google_scholar": "LCeRsUcAAAAJ;WCzWsG0AAAAJ;https://scholar.google.no/citations?user=MpZ6dTEAAAAJ;C3s1jqIAAAAJ", "or_profile": "~Daniel_Gillick1;~Jeremy_R._Cole1;~Tom_Kwiatkowski1;~Livio_Baldini_Soares1", "aff": ";Google DeepMind;;Google Deepmind", "aff_domain": ";google.com;;google.com", "position": ";Researcher;;Software Engineer", "bibtex": "@inproceedings{\nsoares2023nail,\ntitle={{NAIL}: Lexical Retrieval Indices with Efficient Non-Autoregressive Decoders},\nauthor={Livio Baldini Soares and Daniel Gillick and Jeremy R. Cole and Tom Kwiatkowski},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=pkZcvEYZEm}\n}", "github": "", "project": "", "reviewers": "w6rA;Esnj;oAAC", "site": "https://openreview.net/forum?id=pkZcvEYZEm", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "4;4;3", "reproducibility": "4;4;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-7147-5888;;", "linkedin": ";jeremy-cole;;", "aff_unique_index": "0;1", "aff_unique_norm": "Google;DeepMind", "aff_unique_dep": "Google DeepMind;DeepMind", "aff_unique_url": "https://deepmind.com;https://deepmind.com", "aff_unique_abbr": "DeepMind;DeepMind", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "pnnab961TD", "title": "Towards Informative Open-ended Text Generation with Dynamic Knowledge Triples", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Pretrained language models (PLMs), especially large language models (LLMs) demonstrate impressive capabilities in open-ended text generation. While our statistical results show that LLMs often suffer from over-concentrated information, where the generated texts overly focus on the given prompt and fail to provide sufficient background and detailed information as humans do. To address this issue, we propose a dynamic knowledge-guided informative open-ended text generation approach, that utilizes a knowledge graph to help the model generate more contextually related entities and detailed facts. Specifically, we first employ a local knowledge filter to extract relevant knowledge from the comprehensive knowledge graph for a given topic sentence. Then we introduce a dynamic knowledge selector to predict the entity to be mentioned in the subsequent sentence. Finally, we utilize a knowledge-enhanced text generator to produce a more informative output. To evaluate the effectiveness of our approach, we evaluate the proposed approach in two scenarios: fine-tuning for small PLMs and prompt tuning for LLMs. Experimental results show that our approach could generate more informative texts than baselines.", "keywords": "Informativeness;Knowledge graph;open-ended text generation", "primary_area": "", "supplementary_material": "", "author": "Zixuan Ren;Yang Zhao;Chengqing Zong", "authorids": "~Zixuan_Ren1;~Yang_Zhao26;~Chengqing_Zong1", "gender": "Not Specified;M;M", "homepage": "https://github.com/PINE4PPLE;http://www.nlpr.ia.ac.cn/cip/english/zong.htm;https://yzhaoiacas.netlify.app/", "dblp": ";38/6093;", "google_scholar": ";l8lvKOQAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN", "or_profile": "~Zixuan_Ren1;~Chengqing_Zong1;~Zhao_Yang1", "aff": "Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;ia.ac.cn;ia.ac.cn", "position": "MS student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nren2023towards,\ntitle={Towards Informative Open-ended Text Generation with Dynamic Knowledge Triples},\nauthor={Zixuan Ren and Yang Zhao and Chengqing Zong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=pnnab961TD}\n}", "github": "", "project": "", "reviewers": "GfBH;yWwV;GVGV", "site": "https://openreview.net/forum?id=pnnab961TD", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "4;3;2", "reproducibility": "4;3;3", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation", "aff_unique_url": "http://www.ia.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "ppaIkXurvg", "title": "The Troubling Emergence of Hallucination in Large Language Models - An Extensive Definition, Quantification, and Prescriptive Remediations", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The recent advancements in Large Language Models (LLMs) have garnered widespread acclaim for their remarkable emerging capabilities. However, the issue of hallucination has parallelly emerged as a by-product, posing significant concerns. While some recent endeavors have been made to identify and mitigate different types of hallucination, there has been a limited emphasis on the nuanced categorization of hallucination and associated mitigation methods. To address this gap, we offer a fine-grained discourse on profiling hallucination based on its degree, orientation, and category, along with offering strategies for alleviation. As such, we define two overarching orientations of hallucination: (i) factual mirage (FM) and (ii) silver lining (SL). To provide a more comprehensive understanding, both orientations are further sub-categorized into intrinsic and extrinsic, with three degrees of severity - (i) mild, (ii) moderate, and (iii) alarming. We also meticulously categorize hallucination into six types: (i) acronym ambiguity, (ii) numeric nuisance, (iii) generated golem, (iv) virtual voice, (v) geographic erratum, and (vi) time wrap. Furthermore, we curate HallucInation eLiciTation (HILT), a publicly available dataset comprising of 75,000 samples generated using 15 contemporary LLMs along with human annotations for the aforementioned categories. Finally, to establish a method for quantifying and to offer a comparative spectrum that allows us to evaluate and rank LLMs based on their vulnerability to producing hallucinations, we propose Hallucination Vulnerability Index (HVI). Amidst the extensive deliberations on policy-making for regulating AI development, it is of utmost importance to assess and measure which LLM is more vulnerable towards hallucination. We firmly believe that HVI holds significant value as a tool for the wider NLP community, with the potential to serve as a rubric in AI-related policy-making. In conclusion, we propose two solution strategies for mitigating hallucinations.", "keywords": "large language models;hallucination;dataset;mitigation", "primary_area": "", "supplementary_material": "", "author": "Vipula Rawte;SWAGATA CHAKRABORTY;Agnibh Pathak;ANUBHAV SARKAR;S.M Towhidul Islam Tonmoy;Aman Chadha;Amit P. Sheth;Amitava Das", "authorids": "~Vipula_Rawte1;~SWAGATA_CHAKRABORTY1;~Agnibh_Pathak1;~ANUBHAV_SARKAR1;~S.M_Towhidul_Islam_Tonmoy1;~Aman_Chadha1;~Amit_P._Sheth1;~Amitava_Das3", "gender": "Not Specified;F;M;M;M;M;M;M", "homepage": "https://vr25.github.io/;;;;;https://aman.ai;http://aiisc.ai/amit;https://amitavadas.com/", "dblp": "203/0222;;;;;55/10360;s/AmitPSheth;", "google_scholar": "https://scholar.google.co.in/citations?user=cJdK7lUAAAAJ;https://scholar.google.co.in/citations?view_op=list_works;;iZQM0hQAAAAJ;3lmZN3gAAAAJ;gPGQuBQAAAAJ;https://scholar.google.com/citations?hl=en;", "or_profile": "~Vipula_Rawte1;~SWAGATA_CHAKRABORTY1;~Agnibh_Pathak1;~ANUBHAV_SARKAR1;~S.M_Towhidul_Islam_Tonmoy1;~Aman_Chadha1;~Amit_P._Sheth1;~Amitava_Das3", "aff": "University of South Carolina;Christ University;Christ University;Christ University;University of South Carolina;Amazon Web Services;University of South Carolina;University of South Carolina", "aff_domain": "sc.edu;christuniversity.in;christuniversity.in;christuniversity.in;sc.edu;amazon.com;sc.edu;uofsc.edu", "position": "Researcher;MS student;MS student;MS student;Intern;GenAI Science Manager;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nrawte2023the,\ntitle={The Troubling Emergence of Hallucination in Large Language Models - An Extensive Definition, Quantification, and Prescriptive Remediations},\nauthor={Vipula Rawte and SWAGATA CHAKRABORTY and Agnibh Pathak and ANUBHAV SARKAR and S.M Towhidul Islam Tonmoy and Aman Chadha and Amit P. Sheth and Amitava Das},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ppaIkXurvg}\n}", "github": "", "project": "", "reviewers": "L4uh;rKxV;3m5D", "site": "https://openreview.net/forum?id=ppaIkXurvg", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;5;4", "excitement": "4;2;3", "reproducibility": "4;4;2", "correctness": "3;3;4", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-4355-1393;0000-0002-0216-7408;;0000-0003-3422-4232;0009-0000-6076-7068;0000-0001-6621-9003;0000-0002-0021-5293;", "linkedin": "vipula-rawte/;swagata-chakraborty-3bab06193/;agnibh-pathak-7752bb249/;anubhav-sarkar/;towhidultonmoy/;https://linkedin.aman.ai/;amitsheth/;", "aff_unique_index": "0;1;1;1;0;2;0;0", "aff_unique_norm": "University of South Carolina;Christ University;Amazon", "aff_unique_dep": ";;Amazon Web Services", "aff_unique_url": "https://www.sc.edu;https://www.christuniversity.in;https://aws.amazon.com", "aff_unique_abbr": "USC;CU;AWS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;0;0;0;0", "aff_country_unique": "United States;India" }, { "id": "ppb7gyhc7k", "title": "Learning Retrieval Augmentation for Personalized Dialogue Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Personalized dialogue generation, focusing on generating highly tailored responses by leveraging persona profiles and dialogue context, has gained significant attention in conversational AI applications. However, persona profiles, a prevalent setting in current personalized dialogue datasets, typically composed of merely four to five sentences, may not offer comprehensive descriptions of the persona about the agent, posing a challenge to generate truly personalized dialogues. To handle this problem, we propose $\\textbf{L}$earning Retrieval $\\textbf{A}$ugmentation for $\\textbf{P}$ersonalized $\\textbf{D}$ial$\\textbf{O}$gue $\\textbf{G}$eneration ($\\textbf{LAPDOG}$), which studies the potential of leveraging external knowledge for persona dialogue generation. Specifically, the proposed LAPDOG model consists of a story retriever and a dialogue generator. The story retriever uses a given persona profile as queries to retrieve relevant information from the story document, which serves as a supplementary context to augment the persona profile. The dialogue generator utilizes both the dialogue history and the augmented persona profile to generate personalized responses. For optimization, we adopt a joint training framework that collaboratively learns the story retriever and dialogue generator, where the story retriever is optimized towards desired ultimate metrics (e.g., BLEU) to retrieve content for the dialogue generator to generate personalized responses. Experiments conducted on the CONVAI2 dataset with ROCStory as a supplementary data source show that the proposed LAPDOG method substantially outperforms the baselines, indicating the effectiveness of the proposed method. The LAPDOG model code is publicly available for further exploration.", "keywords": "Personalized Dialogue Generation;Retrieval-augmented Dialogue Generation;Persona-Based Dialogue Generation", "primary_area": "", "supplementary_material": "", "author": "Qiushi Huang;Shuai Fu;Xubo Liu;Wenwu Wang;Tom Ko;Yu Zhang;Lilian Tang", "authorids": "~Qiushi_Huang1;~Shuai_Fu1;~Xubo_Liu1;~Wenwu_Wang1;~Tom_Ko2;~Yu_Zhang3;~Lilian_Tang1", "gender": "M;M;M;M;M;M;F", "homepage": ";;https://liuxubo717.github.io/;http://personal.ee.surrey.ac.uk/Personal/W.Wang/;https://tomkocse.github.io/;http://cse.sustech.edu.cn/faculty/~zhangy/;https://www.surrey.ac.uk/people/h-lilian-tang", "dblp": "204/2933;;235/1970/;https://dblp.org/pers/hd/w/Wang:Wenwu;96/8762;50/671-6;", "google_scholar": "F_yGB9sAAAAJ;https://scholar.google.com.hk/citations?user=QXoLj2oAAAAJ;-OlNYSgAAAAJ;https://scholar.google.co.uk/citations?user=JQFnV5IAAAAJ;26-lhTQAAAAJ;https://scholar.google.com.hk/citations?user=jaRS5w4AAAAJ;", "or_profile": "~Qiushi_Huang1;~Shuai_Fu1;~Xubo_Liu1;~Wenwu_Wang1;~Tom_Ko2;~Yu_Zhang3;~Lilian_Tang1", "aff": "University of Surrey;Southern University of Science and Technology;University of Surrey;University of Surrey;ByteDance AI Lab;Southern University of Science and Technology;University of Surrey", "aff_domain": "surrey.ac.uk;sustech.edu.cn;surrey.ac.uk;surrey.ac.uk;bytedance.com;sustc.edu.cn;surrey.ac.uk", "position": "PhD student;Research Assistant;PhD student;Full Professor;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nhuang2023learning,\ntitle={Learning Retrieval Augmentation for Personalized Dialogue Generation},\nauthor={Qiushi Huang and Shuai Fu and Xubo Liu and Wenwu Wang and Tom Ko and Yu Zhang and Lilian Tang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ppb7gyhc7k}\n}", "github": "", "project": "", "reviewers": "Afr1;d41o;jXtS", "site": "https://openreview.net/forum?id=ppb7gyhc7k", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "4;4;3", "reproducibility": "4;3;3", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;", "linkedin": ";;;https://uk.linkedin.com/in/wenwu;;;", "aff_unique_index": "0;1;0;0;2;1;0", "aff_unique_norm": "University of Surrey;Southern University of Science and Technology;ByteDance", "aff_unique_dep": ";;AI Lab", "aff_unique_url": "https://www.surrey.ac.uk;https://www.sustech.edu.cn;https://www.bytedance.com", "aff_unique_abbr": "Surrey;SUSTech;ByteDance", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;1;1;0", "aff_country_unique": "United Kingdom;China" }, { "id": "psv7operF8", "title": "Adaptive Textual Label Noise Learning based on Pre-trained Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The label noise in real-world scenarios is unpredictable and can even be a mixture of different types of noise. To meet this challenge, we develop an adaptive textual label noise learning framework based on pre-trained models, which consists of an adaptive warm-up stage and a hybrid training stage. Specifically, an early stopping method, relying solely on the training set, is designed to dynamically terminate the warm-up process based on the model's fit level to different noise scenarios. The hybrid training stage incorporates several generalization strategies to gradually correct mislabeled instances, thereby making better use of noisy data. Experiments on multiple datasets demonstrate that our approach performs comparably or even surpasses the state-of-the-art methods in various noise scenarios, including scenarios with the mixture of multiple types of noise.", "keywords": "learning with noisy labels;label noise learning;pre-trained models;text classification", "primary_area": "", "supplementary_material": "", "author": "Shaohuan Cheng;Wenyu Chen;fu Mingsheng;Xuanting Xie;Hong Qu", "authorids": "~Shaohuan_Cheng1;~Wenyu_Chen3;~fu_Mingsheng1;~Xuanting_Xie1;~Hong_Qu1", "gender": "F;M;M;M;M", "homepage": ";;;https://github.com/everfor2019;https://www.scse.uestc.edu.cn/info/1081/11251.htm", "dblp": "304/3487;55/6538;;;", "google_scholar": ";;;;", "or_profile": "~Shaohuan_Cheng1;~Wenyu_Chen3;~fu_Mingsheng1;~Xuanting_Xie1;~Hong_Qu1", "aff": "University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China;University of Electronic Science and Technology of China", "aff_domain": "uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn;uestc.edu.cn", "position": "PhD student;Full Professor;Associate Professor;MS student;Full Professor", "bibtex": "@inproceedings{\ncheng2023adaptive,\ntitle={Adaptive Textual Label Noise Learning based on Pre-trained Models},\nauthor={Shaohuan Cheng and Wenyu Chen and fu Mingsheng and Xuanting Xie and Hong Qu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=psv7operF8}\n}", "github": "", "project": "", "reviewers": "keZc;fkV9;SWmB", "site": "https://openreview.net/forum?id=psv7operF8", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "3;2;3", "reproducibility": "3;3;3", "correctness": "3;2;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 2.6666666666666665, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-8197-6632;0000-0002-9933-8014;0000-0002-9257-126X;;", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "University of Electronic Science and Technology of China", "aff_unique_dep": "", "aff_unique_url": "https://www.uestc.edu.cn", "aff_unique_abbr": "UESTC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "ptcFuwr4YD", "title": "Can you Summarize my learnings? Towards Perspective-based Educational Dialogue Summarization", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The steady increase in the utilization of Virtual Tutors (VT) over recent years has allowed for a more efficient, personalized, and interactive AI-based learning experiences. A vital aspect in these educational chatbots is summarizing the conversations between the VT and the students, as it is critical in consolidating learning points and monitoring progress. However, the approach to summarization should be tailored according to the perspective. Summarization from the VTs perspective should emphasize on its teaching efficiency and potential improvements. Conversely, student-oriented summaries should distill learning points, track progress, and suggest scope for improvements. Based on this hypothesis, in this work, we propose a new task of Multi-modal Perspective based Dialogue Summarization (MM-PerSumm), demonstrated in an educational setting. Towards this aim, we introduce a novel dataset, CIMA-Summ that summarizes educational dialogues from three unique perspectives: the Student, the Tutor, and a Generic viewpoint. In addition, we propose an Image and Perspective-guided Dialogue Summarization (IP-Summ) model which is a Seq2Seq language model incorporating (i) multi-modal learning from images and (ii) a perspective-based encoder that constructs a dialogue graph capturing the intentions and actions of both the VT and the student, enabling the summarization of a dialogue from diverse perspectives. Lastly, we conduct detailed analyses of our model's performance, highlighting the aspects that could lead to optimal modeling of IP-Summ.", "keywords": "Summarization;Text-generation;AI4Education", "primary_area": "", "supplementary_material": "", "author": "Raghav Jain;Tulika Saha;Jhagrut Lalwani;Sriparna Saha", "authorids": "~Raghav_Jain1;~Tulika_Saha1;~Jhagrut_Lalwani1;~Sriparna_Saha1", "gender": "M;F;M;F", "homepage": ";https://sahatulika15.github.io/index.html;https://jhagrutlalwani.netlify.app/;http://www.iitp.ac.in/~sriparna", "dblp": ";230/8625.html;;27/1664-1", "google_scholar": ";https://scholar.google.co.in/citations?user=_GJugiIAAAAJ;;https://scholar.google.co.in/citations?user=Fj7jA_AAAAAJ", "or_profile": "~Raghav_Jain1;~Tulika_Saha1;~Jhagrut_Lalwani1;~Sriparna_Saha1", "aff": "Indian Institute of Technology, Patna.;University of Liverpool;Veermata Jijabai Technological Institute;Indian Institute of Technology Patna, India", "aff_domain": "iitp.ac.in;liverpool.ac.uk;vjti.ac.in;iitp.ac.in", "position": "Researcher;Lecturer;Undergrad student;Associate Professor", "bibtex": "@inproceedings{\njain2023can,\ntitle={Can you Summarize my learnings? Towards Perspective-based Educational Dialogue Summarization},\nauthor={Raghav Jain and Tulika Saha and Jhagrut Lalwani and Sriparna Saha},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ptcFuwr4YD}\n}", "github": "", "project": "", "reviewers": "q3T3;gdPt;uLLc", "site": "https://openreview.net/forum?id=ptcFuwr4YD", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "2;4;3", "reproducibility": "4;2;3", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "raghav-jain-3a8076214;tulika-saha-2547a4187/;jhagrut-lalwani/;sriparna-saha-1a1338161/", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Indian Institute of Technology Patna;University of Liverpool;Veermata Jijabai Technological Institute", "aff_unique_dep": ";;", "aff_unique_url": "https://www.iitp.ac.in;https://www.liverpool.ac.uk;http://www.vjti.ac.in", "aff_unique_abbr": "IIT Patna;Liv Uni;VJTI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Patna;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "India;United Kingdom" }, { "id": "puLH3BEl93", "title": "Improving Zero-shot Reader by Reducing Distractions from Irrelevant Documents in Open-Domain Question Answering", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Large language models (LLMs) enable zero-shot approaches in open-domain question answering (ODQA), yet with limited advancements as the reader is compared to the retriever. This study aims at the feasibility of a zero-shot reader that addresses the challenges of computational cost and the need for labeled data. We find that LLMs are distracted due to irrelevant documents in the retrieved set and the overconfidence of the generated answers when they are exploited as zero-shot readers. To tackle these problems, we mitigate the impact of such documents via Distraction-aware Answer Selection (DAS) with a negation-based instruction and score adjustment for proper answer selection. Experimental results show that our approach successfully handles distraction across diverse scenarios, enhancing the performance of zero-shot readers. Furthermore, unlike supervised readers struggling with unseen data, zero-shot readers demonstrate outstanding transferability without any training.", "keywords": "Open-Domain Question Answering;Large Language Model;Prompt Engineering", "primary_area": "", "supplementary_material": "", "author": "Sukmin Cho;Jeongyeon Seo;Soyeong Jeong;Jong C. Park", "authorids": "~Sukmin_Cho1;~Jeongyeon_Seo1;~Soyeong_Jeong1;~Jong_C._Park2", "gender": "M;;F;M", "homepage": "http://nlpcl.kaist.ac.kr/home/;http://nlpcl.kaist.ac.kr/home/;https://starsuzi.github.io/;http://nlpcl.kaist.ac.kr/prof", "dblp": "316/9906;;164/0452;73/5376", "google_scholar": "https://scholar.google.co.kr/citations?user=YuV8kEoAAAAJ;;0wnquCEAAAAJ;XP5heVgAAAAJ", "or_profile": "~Sukmin_Cho1;~Jeongyeon_Seo1;~Soyeong_Jeong1;~Jong_C._Park2", "aff": "Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;kaist.edu;kaist.ac.kr;kaist.ac.kr", "position": "PhD student;MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\ncho2023improving,\ntitle={Improving Zero-shot Reader by Reducing Distractions from Irrelevant Documents in Open-Domain Question Answering},\nauthor={Sukmin Cho and Jeongyeon Seo and Soyeong Jeong and Jong C. Park},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=puLH3BEl93}\n}", "github": "", "project": "", "reviewers": "Y5qm;ubV7;CcX8", "site": "https://openreview.net/forum?id=puLH3BEl93", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;3", "excitement": "3;2;2", "reproducibility": "4;4;3", "correctness": "4;4;4", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 2.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-8859-5111", "linkedin": ";;soyeong-jeong-900155141;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.kaist.ac.kr", "aff_unique_abbr": "KAIST", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "puMfaHb1hY", "title": "G-Eval: NLG Evaluation using Gpt-4 with Better Human Alignment", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The quality of texts generated by natural language generation (NLG) systems is hard to measure automatically. \nConventional reference-based metrics, such as BLEU and ROUGE, have been shown to have relatively low correlation with human judgments, especially for tasks that require creativity and diversity. \nRecent studies suggest using large language models (LLMs) as reference-free metrics for NLG evaluation, which have the benefit of being applicable to new tasks that lack human references.\nHowever, these LLM-based evaluators still have lower human correspondence than medium-size neural evaluators. \nIn this work, we present G-Eval, a framework of using large language models with chain-of-thoughts (CoT) and a form-filling paradigm, to assess the quality of NLG outputs. We experiment with two generation tasks, text summarization and dialogue generation. \nWe show that G-Eval with GPT-4 as the backbone model achieves a Spearman correlation of $0.514$ with human on summarization task, outperforming all previous methods by a large margin.\nWe also propose analysis on the behavior of LLM-based evaluators, and highlight the potential concern of LLM-based evaluators having a bias towards the LLM-generated texts.", "keywords": "generation evaluation", "primary_area": "", "supplementary_material": "", "author": "Yang Liu;Dan Iter;Yichong Xu;Shuohang Wang;Ruochen Xu;Chenguang Zhu", "authorids": "~Yang_Liu50;~Dan_Iter1;~Yichong_Xu1;~Shuohang_Wang1;~Ruochen_Xu2;~Chenguang_Zhu1", "gender": "M;Not Specified;M;M;M;M", "homepage": "https://nlp-yang.github.io/;https://daniter-cu.github.io/;http://xycking.wixsite.com/yichongxu;;https://xrc10.github.io/;", "dblp": ";63/10689.html;154/6421;173/5469.html;188/3515;48/7536-1.html", "google_scholar": "HxTr-CtMdrsC;bg8RrSkAAAAJ;sYza2XwAAAAJ;mN-IO6wAAAAJ;HTp5S00AAAAJ;1b2kKWoAAAAJ", "or_profile": "~Yang_Liu50;~Dan_Iter1;~Yichong_Xu1;~Shuohang_Wang1;~Ruochen_Xu2;~Chenguang_Zhu1", "aff": "Microsoft;Microsoft;Microsoft;Microsoft;Microsoft Research;Zoom", "aff_domain": "microsoft.com;microsoft.com;microsoft.com;microsoft.com;research.microsoft.com;zoom.us", "position": "Researcher;Researcher;Senior Researcher;Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nliu2023geval,\ntitle={G-Eval: {NLG} Evaluation using Gpt-4 with Better Human Alignment},\nauthor={Yang Liu and Dan Iter and Yichong Xu and Shuohang Wang and Ruochen Xu and Chenguang Zhu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=puMfaHb1hY}\n}", "github": "", "project": "", "reviewers": "qnnB;EnEs;LBER", "site": "https://openreview.net/forum?id=puMfaHb1hY", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;5", "excitement": "4;3;3", "reproducibility": "4;4;1", "correctness": "4;4;2", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";daniter;;;ruochenx/;", "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Microsoft;Zoom Video Communications Inc.", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://zoom.us", "aff_unique_abbr": "Microsoft;Zoom", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "pvEkYbUPVW", "title": "Measuring Faithful and Plausible Visual Grounding in VQA", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Metrics for Visual Grounding (VG) in Visual Question Answering (VQA) systems primarily aim to measure a system's reliance on relevant parts of the image when inferring an answer to the given question. Lack of VG has been a common problem among state-of-the-art VQA systems and can manifest in over-reliance on irrelevant image parts or a disregard for the visual modality entirely. Although inference capabilities of VQA models are often illustrated by a few qualitative illustrations, most systems are not quantitatively assessed for their VG properties. \nWe believe, an easily calculated criterion for meaningfully measuring a system's VG can help remedy this shortcoming, as well as add another valuable dimension to model evaluations and analysis. \nTo this end, we propose a new VG metric that captures if a model a) identifies question-relevant objects in the scene, and b) actually relies on the information contained in the relevant objects when producing its answer, i.e., if its visual grounding is both \"faithful\" and \"plausible\". Our metric, called Faithful \\& Plausible Visual Grounding (FPVG), is straightforward to determine for most VQA model designs.\n\nWe give a detailed description of FPVG and evaluate several reference systems spanning various VQA architectures. Code to support the metric calculations on the GQA data set is available on GitHub.", "keywords": "Visual Grounding;Visual Question Answering", "primary_area": "", "supplementary_material": "", "author": "Daniel Reich;Felix Putze;Tanja Schultz", "authorids": "~Daniel_Reich2;~Felix_Putze1;~Tanja_Schultz3", "gender": "M;M;F", "homepage": ";;http://csl.uni-bremen.de", "dblp": ";;s/TanjaSchultz", "google_scholar": ";qBUfBtoAAAAJ;https://scholar.google.de/citations?user=CupDmmcAAAAJ", "or_profile": "~Daniel_Reich2;~Felix_Putze1;~Tanja_Schultz3", "aff": "Universit\u00e4t Bremen;Universit\u00e4t Bremen;Universit\u00e4t Bremen", "aff_domain": "uni-bremen.de;uni-bremen.de;uni-bremen.de", "position": "PhD student;Postdoc;Full Professor", "bibtex": "@inproceedings{\nreich2023measuring,\ntitle={Measuring Faithful and Plausible Visual Grounding in {VQA}},\nauthor={Daniel Reich and Felix Putze and Tanja Schultz},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=pvEkYbUPVW}\n}", "github": "", "project": "", "reviewers": "bzSZ;9Aq7;fV5W", "site": "https://openreview.net/forum?id=pvEkYbUPVW", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "2;4;4", "reproducibility": "4;4;4", "correctness": "2;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-9809-7028", "linkedin": "mrdanielreich/;;tanjaschultz", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Bremen", "aff_unique_dep": "", "aff_unique_url": "https://www.uni-bremen.de", "aff_unique_abbr": "Uni Bremen", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Germany" }, { "id": "pxscU6TidP", "title": "AutoPlan: Automatic Planning of Interactive Decision-Making Tasks With Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recent large language models (LLMs) are promising for making decisions in grounded environments. However, LLMs frequently fail in complex decision-making tasks due to the misalignment between the pre-trained knowledge in LLMs and the actual rules in the environment. Existing methods require either costly gradient computation or lengthy in-context demonstrations. In this paper, we propose AutoPlan, an approach to guide LLM-based agents to accomplish interactive decision-making tasks. AutoPlan augments the LLM prompt with a task-solving plan and optimizes it through iterative experience collection and reflection. Our experiments show that AutoPlan, though using no in-context demonstrations, achieves success rates on par with the baselines using human-written demonstrations on ALFWorld and even outperforms them by 8% on HotpotQA. The code is available at https://github.com/owaski/AutoPlan.", "keywords": "Planning;Decision Making;Large Language Model", "primary_area": "", "supplementary_material": "", "author": "Siqi Ouyang;Lei Li", "authorids": "~Siqi_Ouyang2;~Lei_Li11", "gender": "M;M", "homepage": "https://owaski.github.io/;https://www.cs.cmu.edu/~leili", "dblp": "224/0162;13/7007-5.html", "google_scholar": "https://scholar.google.com/citations?hl=en;BYXqAlwAAAAJ", "or_profile": "~Siqi_Ouyang2;~Lei_Li11", "aff": "UC Santa Barbara;Computer Science Department, UC Santa Barbara", "aff_domain": "ucsb.edu;cs.ucsb.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nouyang2023autoplan,\ntitle={AutoPlan: Automatic Planning of Interactive Decision-Making Tasks With Large Language Models},\nauthor={Siqi Ouyang and Lei Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=pxscU6TidP}\n}", "github": "", "project": "", "reviewers": "kvA8;gLwg;hFiy", "site": "https://openreview.net/forum?id=pxscU6TidP", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;3;3", "reproducibility": "4;3;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-3095-9776", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Santa Barbara", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsb.edu", "aff_unique_abbr": "UCSB", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Santa Barbara", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "pyjppDCsq7", "title": "Influence Scores at Scale for Efficient Language Data Sampling", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Modern ML systems ingest data aggregated from diverse sources, such as synthetic, human-annotated, and live customer traffic. Understanding \\textit{which} examples are important to the performance of a learning algorithm is crucial for efficient model training. Recently, a growing body of literature has given rise to various \u201cinfluence scores,\u201d which use training artifacts such as model confidence or checkpointed gradients to identify important subsets of data. However, these methods have primarily been developed in computer vision settings, and it remains unclear how well they generalize to language-based tasks using pretrained models.\n\nIn this paper, we explore the applicability of influence scores in language classification tasks. We evaluate a diverse subset of these scores on the SNLI dataset by quantifying accuracy changes in response to pruning training data through random and influence-score-based sampling. We then stress-test one of the scores \u2013 \"variance of gradients\" (VoG) from Agarwal and Hooker (2022) \u2013 in an NLU model stack that was exposed to dynamic user speech patterns in a voice assistant type of setting. Our experiments demonstrate that in many cases, encoder-based language models can be fine-tuned on roughly 50% of the original data without degradation in performance metrics. Along the way, we summarize lessons learned from applying out-of-the-box implementations of influence scores, quantify the effects of noisy and class-imbalanced data, and offer recommendations on score-based sampling for better accuracy and training efficiency.", "keywords": "data effiency;data sampling;difficulty metrics;influence scores;pruning", "primary_area": "", "supplementary_material": "", "author": "Nikhil Anand;Joshua Tan;Maria Minakova", "authorids": "~Nikhil_Anand2;~Joshua_Tan1;~Maria_Minakova1", "gender": "M;M;F", "homepage": "https://nikhilanand91.github.io/;https://joshktan.com;", "dblp": ";;", "google_scholar": "https://scholar.google.com/citations?hl=en;01YOPpAAAAAJ;", "or_profile": "~Nikhil_Anand2;~Joshua_Tan1;~Maria_Minakova1", "aff": "Amazon;Amazon;Amazon", "aff_domain": "amazon.com;amazon.com;amazon.com", "position": "Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nanand2023influence,\ntitle={Influence Scores at Scale for Efficient Language Data Sampling},\nauthor={Nikhil Anand and Joshua Tan and Maria Minakova},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=pyjppDCsq7}\n}", "github": "", "project": "", "reviewers": "CqZv;ABYD;g8MK", "site": "https://openreview.net/forum?id=pyjppDCsq7", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "3;3;3", "reproducibility": "4;3;3", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "nikhil-anand-782aa5177/;;mariaminakova/", "aff_unique_index": "0;0;0", "aff_unique_norm": "Amazon", "aff_unique_dep": "Amazon.com, Inc.", "aff_unique_url": "https://www.amazon.com", "aff_unique_abbr": "Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "q09vTY1Cqh", "title": "RepoCoder: Repository-Level Code Completion Through Iterative Retrieval and Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The task of repository-level code completion is to continue writing the unfinished code based on a broader context of the repository. While for automated code completion tools, it is difficult to utilize the useful information scattered in different files. We propose RepoCoder, a simple, generic, and effective framework to address the challenge. It streamlines the repository-level code completion process by incorporating a similarity-based retriever and a pre-trained code language model in an iterative retrieval-generation pipeline. RepoCoder makes effective utilization of repository-level information for code completion and has the ability to generate code at various levels of granularity. Moreover, we propose a new benchmark RepoBench, which consists of the latest and high-quality real-world repositories covering line, API invocation, and function body completion scenarios. Experimental results indicate that RepoCoder significantly improves the In-File completion baseline by over 10% in all settings and consistently outperforms the vanilla retrieval-augmented code completion approach. Furthermore, we validate the effectiveness of RepoCoder through comprehensive analysis, providing valuable insights for future research. Our source code and benchmark will be publicly available after the paper review.", "keywords": "code completion;large pre-trained language model;code repository;retrieval augmented generation", "primary_area": "", "supplementary_material": "", "author": "Fengji Zhang;Bei Chen;Yue Zhang;Jacky Keung;Jin Liu;Daoguang Zan;Yi Mao;Jian-Guang Lou;Weizhu Chen", "authorids": "~Fengji_Zhang1;~Bei_Chen3;~Yue_Zhang11;~Jacky_Keung1;~Jin_Liu9;~Daoguang_Zan1;~Yi_Mao1;~Jian-Guang_Lou1;~Weizhu_Chen1", "gender": "M;F;F;;M;M;;M;M", "homepage": "https://github.com/zfj1998;http://ml.cs.tsinghua.edu.cn/~beichen/;https://www.linkedin.com/in/yue-zhang-46584419a/;;http://cs.whu.edu.cn/teacherinfo.aspx?id=214;;;https://www.microsoft.com/en-us/research/people/jlou/;https://www.microsoft.com/en-us/research/people/wzchen/", "dblp": "287/8086;;;;;305/5798;;37/1917;79/2536", "google_scholar": "plXSJ7IAAAAJ;Po65v_MAAAAJ;;;;https://scholar.google.com/citations?hl=zh-CN;;alDxINIAAAAJ;LG_E-4EAAAAJ", "or_profile": "~Fengji_Zhang1;~Bei_Chen3;~Yue_Zhang11;~Jacky_Keung1;~Jin_Liu9;~Daoguang_Zan1;~Yi_Mao1;~Jian-Guang_Lou1;~Weizhu_Chen1", "aff": "City University of Hong Kong;Microsoft;;;Wuhan University;Institute of Software, Chinese Academy of Sciences;;Microsoft Research Asia;Microsoft GenAI", "aff_domain": "cityu.edu.hk;microsoft.com;;;whu.edu.cn;ucas.ac.cn;;microsoft.com;microsoft.com", "position": "PhD student;Researcher;;;Full Professor;PhD student;;Principal Researcher;Vice President", "bibtex": "@inproceedings{\nzhang2023repocoder,\ntitle={RepoCoder: Repository-Level Code Completion Through Iterative Retrieval and Generation},\nauthor={Fengji Zhang and Bei Chen and Yue Zhang and Jacky Keung and Jin Liu and Daoguang Zan and Yi Mao and Jian-Guang Lou and Weizhu Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=q09vTY1Cqh}\n}", "github": "", "project": "", "reviewers": "rrM2;ReXb;LvEZ", "site": "https://openreview.net/forum?id=q09vTY1Cqh", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;5;4", "excitement": "4;4;3", "reproducibility": "4;5;3", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 4.666666666666667, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0003-0359-0248;0009-0009-4269-8543;;;", "linkedin": ";;;;;;;;", "aff_unique_index": "0;1;2;3;1;1", "aff_unique_norm": "City University of Hong Kong;Microsoft;Wuhan University;Chinese Academy of Sciences", "aff_unique_dep": ";Microsoft Corporation;;Institute of Software", "aff_unique_url": "https://www.cityu.edu.hk;https://www.microsoft.com;http://www.whu.edu.cn/;http://www.ios.ac.cn", "aff_unique_abbr": "CityU;Microsoft;WHU;CAS", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Hong Kong SAR;;Asia", "aff_country_unique_index": "0;1;0;0;0;1", "aff_country_unique": "China;United States" }, { "id": "q0c1JTukWE", "title": "On Surgical Fine-tuning for Language Encoders", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Fine-tuning all the layers of a pre-trained neural language encoder (either using all the parameters or using parameter-efficient methods) is often the de-facto way of adapting it to a new task. We show evidence that for different downstream language tasks, fine-tuning only a subset of layers is sufficient to obtain performance that is close to and often better than fine-tuning all the layers in the language encoder. We propose an efficient metric based on the diagonal of the Fisher information matrix (FIM score), to select the candidate layers for selective fine-tuning. We show, empirically on GLUE and SuperGLUE tasks and across distinct language encoders, that this metric can effectively select layers leading to a strong downstream performance. Our work highlights that task-specific information corresponding to a given downstream task is often localized within a few layers, and tuning only those is sufficient for strong performance. Additionally, we demonstrate the robustness of the FIM score to rank layers in a manner that remains constant during the optimization process.", "keywords": "Efficient Fine Tuning;Language Models;Language Encoders;Linguistics;Optimization;Distributional Shifts;Temporal Shifts", "primary_area": "", "supplementary_material": "", "author": "Abhilasha Lodha;Gayatri Vyankatesh Belapurkar;Saloni Chalkapurkar;Yuanming Tao;Reshmi Ghosh;Samyadeep Basu;Dmitrii M Petrov;Soundararajan Srinivasan", "authorids": "~Abhilasha_Lodha1;~Gayatri_Vyankatesh_Belapurkar1;~Saloni_Chalkapurkar1;~Yuanming_Tao1;~Reshmi_Ghosh1;~Samyadeep_Basu1;~Dmitrii_M_Petrov1;~Soundararajan_Srinivasan1", "gender": "F;F;;M;F;M;M;M", "homepage": "https://abhilashalodha.github.io/;;;;https://reshmighosh.github.io;https://samyadeepbasu.github.io/;https://lodurality.github.io/;", "dblp": ";278/0492;;296/1506;324/2458;250/9138;;02/5955", "google_scholar": "https://scholar.google.com/citations?hl=en;;;https://scholar.google.com/citations?hl=en;ui8JeF5lKNMC;6aRwDecAAAAJ;ztQNTloAAAAJ;https://scholar.google.com/scholar?hl=en", "or_profile": "~Abhilasha_Lodha1;~Gayatri_Vyankatesh_Belapurkar1;~Saloni_Chalkapurkar1;~Yuanming_Tao1;~Reshmi_Ghosh1;~Samyadeep_Basu1;~Dmitrii_M_Petrov1;~Soundararajan_Srinivasan1", "aff": "University of Massachusetts at Amherst;University of Massachusetts at Amherst;University of Massachusetts at Amherst;University of Massachusetts at Amherst;Microsoft;University of Maryland, College Park;Department of Computer Science, University of Massachusetts at Amherst;Microsoft", "aff_domain": "umass.edu;umass.edu;umass.edu;umass.edu;microsoft.com;umd.edu;cs.umass.edu;microsoft.com", "position": "MS student;MS student;MS student;MS student;Researcher;PhD student;PhD student;Researcher", "bibtex": "@inproceedings{\nlodha2023on,\ntitle={On Surgical Fine-tuning for Language Encoders},\nauthor={Abhilasha Lodha and Gayatri Vyankatesh Belapurkar and Saloni Chalkapurkar and Yuanming Tao and Reshmi Ghosh and Samyadeep Basu and Dmitrii M Petrov and Soundararajan Srinivasan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=q0c1JTukWE}\n}", "github": "", "project": "", "reviewers": "2Coy;BW1p;rfrx", "site": "https://openreview.net/forum?id=q0c1JTukWE", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;3;3", "excitement": "2;3;3", "reproducibility": "3;3;4", "correctness": "2;2;3", "rating_avg": 2.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 2.3333333333333335, "replies_avg": 10, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;", "linkedin": "AbhilashaLodha/;gayatribelapurkar/;saloni-chalkapurkar/;yuanmingtao/;reshmi-ghosh/;;;soundararajansrinivasan/", "aff_unique_index": "0;0;0;0;1;2;0;1", "aff_unique_norm": "University of Massachusetts Amherst;Microsoft;University of Maryland", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "https://www.umass.edu;https://www.microsoft.com;https://www/umd.edu", "aff_unique_abbr": "UMass Amherst;Microsoft;UMD", "aff_campus_unique_index": "0;0;0;0;2;0", "aff_campus_unique": "Amherst;;College Park", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "q4oWkMHkQx", "title": "Task-Level Thinking Steps Help Large Language Models for Challenging Classification Task", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) have shown incredible performance on many tasks such as dialogue generation, commonsense reasoning and question answering. In-context learning (ICL) is an important paradigm for adapting LLMs to the downstream tasks by prompting few demonstrations. However, the distribution of demonstrations can severely affect the performance, especially for challenging classification tasks. In this paper, we propose the concept of task-level thinking steps that can eliminate bias introduced by demonstrations. Further, to help LLMs distinguish confusing classes, we design a progressive revision framework, which can improve the thinking steps by correcting hard demonstrations. Experimental results prove the superiority of our proposed method, achieving best performance on three kinds of challenging classification tasks in the zero-shot and few-shot settings. Besides, with task-level thinking steps, automatically generated chain-of-thoughts (CoTs) bring more competitive performance.", "keywords": "Large Language Models;Prompt Engineering;Text Classification;Chain of Thought", "primary_area": "", "supplementary_material": "", "author": "chunhui du;Jidong Tian;Haoran Liao;Jindou Chen;Hao HE;Yaohui Jin", "authorids": "~chunhui_du1;~Jidong_Tian1;~Haoran_Liao2;~Jindou_Chen1;~Hao_HE4;~Yaohui_Jin2", "gender": ";M;;M;M;M", "homepage": ";;;https://github.com/Golden-Bean;;http://front.sjtu.edu.cn/~jinyh/", "dblp": ";230/4307.html;;;18/813-7.html;27/7040", "google_scholar": ";0iq39EUAAAAJ;;;;H_7_oVcAAAAJ", "or_profile": "~chunhui_du1;~Jidong_Tian1;~Haoran_Liao2;~Jindou_Chen1;~Hao_HE4;~Yaohui_Jin2", "aff": ";Shanghai Jiaotong University;;Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": ";sjtu.edu.cn;;sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": ";PhD student;;PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\ndu2023tasklevel,\ntitle={Task-Level Thinking Steps Help Large Language Models for Challenging Classification Task},\nauthor={chunhui du and Jidong Tian and Haoran Liao and Jindou Chen and Hao HE and Yaohui Jin},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=q4oWkMHkQx}\n}", "github": "", "project": "", "reviewers": "bGCR;ZJta;J9Vu", "site": "https://openreview.net/forum?id=q4oWkMHkQx", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "4;3;3", "reproducibility": "2;4;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-4851-7012;0000-0001-6158-6277", "linkedin": ";;;;;yaohui-jin-bab58511/", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "q7IvUsjEkb", "title": "Dynamic Voting for Efficient Reasoning in Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Multi-path voting methods like Self-consistency have been used to mitigate reasoning errors in large language models caused by factual errors and illusion generation.\nHowever, these methods require excessive computing resources as they generate numerous reasoning paths for each problem.\nAnd our experiments show that on the arithmetic reasoning task, SVAMP, half of the problems fail to obtain noticeable accuracy gains when voting with more than three paths.\nIn this paper, we propose a novel multi-path voting technique called Dynamic Voting, which effectively reduces the number of reasoning paths during multi-path voting while preserving accuracies by applying early exiting for problems that large language models can confidently solve.\nExperimental evaluations on arithmetic, commonsense, and symbolic reasoning tasks under few-shot and zero-shot settings demonstrate that Dynamic Voting achieves comparable accuracies employing significantly fewer reasoning paths.\nNotably, one of our Dynamic Voting strategies outperforms Self-consistency using only 24.7\\% of the number of paths on the LetterConcat task in the few-shot setting.\nFurthermore, Dynamic Voting showcases strong robustness in threshold selection.\nIt also demonstrates excellent generalizability when combined with other voting techniques, different models, and diverse prompts.", "keywords": "large language models;multi-path voting;computational resource;early exiting", "primary_area": "", "supplementary_material": "", "author": "Mingfeng Xue;Dayiheng Liu;Wenqiang Lei;Xingzhang Ren;Baosong Yang;Jun Xie;Yidan Zhang;Dezhong Peng;Jiancheng Lv", "authorids": "~Mingfeng_Xue1;~Dayiheng_Liu1;~Wenqiang_Lei1;~Xingzhang_Ren1;~Baosong_Yang1;~Jun_Xie9;~Yidan_Zhang2;~Dezhong_Peng1;~Jiancheng_Lv2", "gender": "M;M;M;M;M;F;M;M;Not Specified", "homepage": ";https://dayihengliu.github.io/;https://sites.google.com/view/wenqianghome/home;;https://baosongyang.site/;;https://cs.scu.edu.cn/info/1249/10284.htm;https://cs.scu.edu.cn/info/1303/13767.htm;", "dblp": ";https://dblp.uni-trier.de/pers/hd/l/Liu:Dayiheng;167/9604;218/6803.html;203/8245;;;;", "google_scholar": ";pPLQrX4AAAAJ;https://scholar.google.com.hk/citations?user=qexdxuEAAAAJ;3YzSsyIAAAAJ;https://scholar.google.com.tw/citations?user=fXsHJXkAAAAJ;;0gupif8AAAAJ;https://scholar.google.com/citations?hl=zh-CN;YjuM2GsAAAAJ", "or_profile": "~Mingfeng_Xue1;~Dayiheng_Liu1;~Wenqiang_Lei1;~Xingzhang_Ren1;~Baosong_Yang1;~Yidan_Zhang2;~Dezhong_Peng1;~Jiancheng_Lv2;~jun_xie5", "aff": "Sichuan University;Alibaba Group;Sichuan University;;Alibaba Group;Sichuan University;Sichuan University;Sichuan University;Alibaba DAMO Academy", "aff_domain": "scu.edu.cn;alibaba-inc.com;scu.edu.cn;;alibaba-inc.com;scu.edu.cn;scu.edu.cn;scu.edu.cn;alibaba-inc.com", "position": "PhD student;Researcher;Full Professor;;Researcher;PhD student;Full Professor;Full Professor;Principal Researcher", "bibtex": "@inproceedings{\nxue2023dynamic,\ntitle={Dynamic Voting for Efficient Reasoning in Large Language Models},\nauthor={Mingfeng Xue and Dayiheng Liu and Wenqiang Lei and Xingzhang Ren and Baosong Yang and Jun Xie and Yidan Zhang and Dezhong Peng and Jiancheng Lv},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=q7IvUsjEkb}\n}", "github": "", "project": "", "reviewers": "nuFX;NY3h;TXTM", "site": "https://openreview.net/forum?id=q7IvUsjEkb", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;3;4", "excitement": "3;2;3", "reproducibility": "4;3;3", "correctness": "4;2;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-8755-8941;;;;0000-0002-0440-2117;;;", "linkedin": ";;;;;;;;", "aff_unique_index": "0;1;0;1;0;0;0;1", "aff_unique_norm": "Sichuan University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.scu.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "SCU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "q88QSsc75T", "title": "Simultaneous Machine Translation with Tailored Reference", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Simultaneous machine translation (SiMT) generates translation while reading the whole source sentence. However, existing SiMT models are typically trained using the same reference disregarding the varying amounts of available source information at different latency. Training the model with ground-truth at low latency may introduce forced anticipations, whereas utilizing reference consistent with the source word order at high latency results in performance degradation. Consequently, it is crucial to train the SiMT model with appropriate reference that avoids forced anticipations during training while maintaining high quality. In this paper, we propose a novel method that provides tailored reference for the SiMT models trained at different latency by rephrasing the ground-truth. Specifically, we introduce the tailor, induced by reinforcement learning, to modify ground-truth to the tailored reference. The SiMT model is trained with the tailored reference and jointly optimized with the tailor to enhance performance. Importantly, our method is applicable to a wide range of current SiMT approaches. Experiments on three translation tasks demonstrate that our method achieves state-of-the-art performance in both fixed and adaptive policies.", "keywords": "Simultaneous Machine Translation;Machine Translation", "primary_area": "", "supplementary_material": "", "author": "Shoutao Guo;Shaolei Zhang;Yang Feng", "authorids": "~Shoutao_Guo1;~Shaolei_Zhang1;~Yang_Feng4", "gender": "M;M;", "homepage": ";https://zhangshaolei1998.github.io/;http://people.ucas.edu.cn/~yangfeng?language=en", "dblp": "331/5767;;07/6095-4.html", "google_scholar": ";https://scholar.google.com.hk/citations?user=gWwAWo4AAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Shoutao_Guo1;~Shaolei_Zhang1;~Yang_Feng4", "aff": "Institute of computing technology, Chinese Academy of Sciences;Key Laboratory of Intelligent Information Processing Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;ict.ac.cn;ict.ac.cn", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nguo2023simultaneous,\ntitle={Simultaneous Machine Translation with Tailored Reference},\nauthor={Shoutao Guo and Shaolei Zhang and Yang Feng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=q88QSsc75T}\n}", "github": "", "project": "", "reviewers": "DPEY;XdDD;aHPk;Boo4;AVCF;WzVC;NbPY", "site": "https://openreview.net/forum?id=q88QSsc75T", "pdf_size": 0, "rating": "2;2;2;2;2;2;2", "confidence": "4;4;2;4;4;2;3", "excitement": "4;4;4;3;2;4;4", "reproducibility": "3;3;3;3;2;4;3", "correctness": "4;4;4;3;2;4;4", "rating_avg": 2.0, "confidence_avg": 3.2857142857142856, "excitement_avg": 3.5714285714285716, "reproducibility_avg": 3.0, "correctness_avg": 3.5714285714285716, "replies_avg": 21, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-7254-9380;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Computing Technology", "aff_unique_url": "http://www.ict.ac.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "q8aTDcIXnO", "title": "TempTabQA: Temporal Question Answering for Semi-Structured Tables", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Semi-structured data, such as Infobox tables, often include temporal information about entities, either implicitly or explicitly. Can current\nNLP systems reason about such information in semi-structured tables? To tackle this question, we introduce the task of temporal\nquestion answering on semi-structured tables. We present a dataset, TEMPTABQA, which comprises 11,454 question-answer pairs extracted from 1,208 Wikipedia Infobox tables spanning more than 90 distinct domains. Using this dataset, we evaluate several state-of-the-art models for temporal reasoning. We observe that even the top-performing LLMs lag behind human performance by more than 13.5 F1 points. Given these results, our dataset has the potential to serve as a challenging benchmark to improve the temporal reasoning capabilities of NLP models.", "keywords": "Semi-structured;Temporal Reasoning;Inference;Question Answering;New Resource;New Dataset;Wikipedia InfoBox", "primary_area": "", "supplementary_material": "", "author": "Vivek Gupta;Pranshu Kandoi;Mahek Bhavesh Vora;Shuo Zhang;Yujie He;Ridho Reinanda;Vivek Srikumar", "authorids": "~Vivek_Gupta2;~Pranshu_Kandoi1;~Mahek_Bhavesh_Vora1;~Shuo_Zhang1;~Yujie_He1;~Ridho_Reinanda1;~Vivek_Srikumar1", "gender": "M;;F;M;M;;", "homepage": "https://vgupta123.github.io;https://www.linkedin.com/in/pranshu-kandoi;;https://imsure318.github.io/;;;https://svivek.com", "dblp": "71/5332-1;;;83/3714-6;;133/8798;37/44", "google_scholar": "https://scholar.google.co.in/citations?user=Bs5H0S4AAAAJ;;;https://scholar.google.com/citations?hl=en;FbeAZGgAAAAJ;;TsTUfOIAAAAJ", "or_profile": "~Vivek_Gupta2;~Pranshu_Kandoi1;~Mahek_Bhavesh_Vora1;~Shuo_Zhang1;~Yujie_He1;~Ridho_Reinanda1;~Vivek_Srikumar1", "aff": "University of Utah, United States;Indian Institute of Technology, Guwahati;Indian Institute of Technology, Guwahati;;Bloomberg L.P.;;University of Utah", "aff_domain": "cs.utah.edu;iitg.ac.in;iitg.ac.in;;bloomberg.net;;utah.edu", "position": "PhD student;Undergrad student;Undergrad student;;Researcher;;Associate Professor", "bibtex": "@inproceedings{\ngupta2023temptabqa,\ntitle={TempTab{QA}: Temporal Question Answering for Semi-Structured Tables},\nauthor={Vivek Gupta and Pranshu Kandoi and Mahek Bhavesh Vora and Shuo Zhang and Yujie He and Ridho Reinanda and Vivek Srikumar},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=q8aTDcIXnO}\n}", "github": "", "project": "", "reviewers": "XpmN;uQYi;tNdZ", "site": "https://openreview.net/forum?id=q8aTDcIXnO", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;5", "excitement": "4;3;4", "reproducibility": "5;4;4", "correctness": "3;5;4", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-3179-4125;;;", "linkedin": "keviv9/;;https://linkedin.com/in/mahekvora;shuo-zhang-58a1a9b1/?originalSubdomain=no;yujiehe/;;", "aff_unique_index": "0;1;1;2;0", "aff_unique_norm": "University of Utah;Indian Institute of Technology Guwahati;Bloomberg", "aff_unique_dep": ";;", "aff_unique_url": "https://www.utah.edu;https://www.iitg.ac.in;https://www.bloomberg.com", "aff_unique_abbr": "Utah;IIT Guwahati;Bloomberg", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Guwahati", "aff_country_unique_index": "0;1;1;0;0", "aff_country_unique": "United States;India" }, { "id": "qDspFDJEHP", "title": "Three Questions Concerning the Use of Large Language Models to Facilitate Mathematics Learning", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Due to the remarkable language understanding and generation abilities of large language models (LLMs), their use in educational applications has been explored. However, little work has been done on investigating the pedagogical ability of LLMs in helping students to learn mathematics. In this position paper, we discuss the challenges associated with employing LLMs to enhance students' mathematical problem-solving skills by providing adaptive feedback. Apart from generating the wrong reasoning processes, LLMs can misinterpret the meaning of the question, and also exhibit difficulty in understanding the given questions' rationales when attempting to correct students' answers. Three research questions are formulated.", "keywords": "Mathematics Learning;Adaptive Feedback;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "An-Zi Yen;Wei-Ling Hsu", "authorids": "~An-Zi_Yen1;~Wei-Ling_Hsu2", "gender": "F;M", "homepage": "https://azyen0522.github.io/;https://github.com/Hsu0208", "dblp": "204/3583;", "google_scholar": "https://scholar.google.com/citations?hl=zh-TW;", "or_profile": "~An-Zi_Yen1;~WEI-LING_HSU1", "aff": "Department of Computer Science, National Yang Ming Chiao Tung University;Department of Computer Science, National Yang Ming Chiao Tung University", "aff_domain": "nycu.edu.tw;cs.nycu.edu.tw", "position": "Assistant Professor;MS student", "bibtex": "@inproceedings{\nyen2023three,\ntitle={Three Questions Concerning the Use of Large Language Models to Facilitate Mathematics Learning},\nauthor={An-Zi Yen and Wei-Ling Hsu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=qDspFDJEHP}\n}", "github": "", "project": "", "reviewers": "pKga;RNa9;PZHc", "site": "https://openreview.net/forum?id=qDspFDJEHP", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "3;2;4", "reproducibility": "3;5;3", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "National Yang Ming Chiao Tung University", "aff_unique_dep": "Department of Computer Science", "aff_unique_url": "https://www.nctu.edu.tw", "aff_unique_abbr": "NYCU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Taiwan", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "qE5vtBMbCJ", "title": "LEXTREME: A Multi-Lingual and Multi-Task Benchmark for the Legal Domain", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Lately, propelled by phenomenal advances around the transformer architecture, the legal NLP field has enjoyed spectacular growth. To measure progress, well-curated and challenging benchmarks are crucial. Previous efforts have produced numerous benchmarks for general NLP models, typically based on news or Wikipedia. However, these may not fit specific domains such as law, with its unique lexicons and intricate sentence structures. Even though there is a rising need to build NLP systems for languages other than English, many benchmarks are available only in English and no multilingual benchmark exists in the legal NLP field. We survey the legal NLP literature and select 11 datasets covering 24 languages, creating LEXTREME. To fairly compare models, we propose two aggregate scores, i.e., dataset aggregate score and language aggregate score. Our results show that even the best baseline only achieves modest results, and also ChatGPT struggles with many tasks. This indicates that LEXTREME remains a challenging task with ample room for improvement. To facilitate easy use for researchers and practitioners, we release LEXTREME on huggingface along with a public leaderboard and the necessary code to evaluate models. We also provide a public Weights and Biases project containing all runs for transparency.", "keywords": "legal;nlp;benchmark;multilingual;multitask", "primary_area": "", "supplementary_material": "", "author": "Joel Niklaus;Veton Matoshi;Pooja Rani;Andrea Galassi;Matthias St\u00fcrmer;Ilias Chalkidis", "authorids": "~Joel_Niklaus1;~Veton_Matoshi1;~Pooja_Rani1;~Andrea_Galassi1;~Matthias_St\u00fcrmer1;~Ilias_Chalkidis1", "gender": "M;M;F;M;M;M", "homepage": "https://niklaus.ai;;https://poojaruhal.github.io;http://ai.unibo.it/people/A.Galassi;https://www.bfh.ch/de/matthias-stuermer;https://iliaschalkidis.github.io", "dblp": "232/4545;;153/4246-1.html;208/4245.html;33/4494.html;199/8161", "google_scholar": "qJ8iricAAAAJ;;2B8GcJ4AAAAJ;https://scholar.google.it/citations?user=OnzdCscAAAAJ;QtfXdRoAAAAJ;BrtAqz8AAAAJ", "or_profile": "~Joel_Niklaus1;~Veton_Matoshi1;~Pooja_Rani1;~Andrea_Galassi1;~Matthias_St\u00fcrmer1;~Ilias_Chalkidis1", "aff": "Stanford University;BFH - Bern University of Applied Sciences;;University of Bologna;Universit\u00e4t Bern;Copenhagen University", "aff_domain": "stanford.edu;bfh.ch;;unibo.it;unibe.ch;ku.dk", "position": "Researcher;Researcher;;Postdoc;Lecturer;Postdoc", "bibtex": "@inproceedings{\nniklaus2023lextreme,\ntitle={{LEXTREME}: A Multi-Lingual and Multi-Task Benchmark for the Legal Domain},\nauthor={Joel Niklaus and Veton Matoshi and Pooja Rani and Andrea Galassi and Matthias St{\\\"u}rmer and Ilias Chalkidis},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=qE5vtBMbCJ}\n}", "github": "", "project": "", "reviewers": "CdWt;RRMe;wXJc", "site": "https://openreview.net/forum?id=qE5vtBMbCJ", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;3;3", "reproducibility": "5;5;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.666666666666667, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-2779-1653;;0000-0001-5127-4042;0000-0001-9711-7042;0000-0001-9038-4041;0000-0002-0706-7772", "linkedin": "joelniklaus/;veton-matoshi-186a3093/;;a-galassi/;matthiasstuermer/;", "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Stanford University;Bern University of Applied Sciences;University of Bologna;University of Bern;University of Copenhagen", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.stanford.edu;https://www.bfh.ch;https://www.unibo.it;https://www.unibe.ch;https://www.ku.dk", "aff_unique_abbr": "Stanford;BFH;Unibo;UniBE;UCPH", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Stanford;Bern;", "aff_country_unique_index": "0;1;2;1;3", "aff_country_unique": "United States;Switzerland;Italy;Denmark" }, { "id": "qGr17uesSx", "title": "SimCKP: Simple Contrastive Learning of Keyphrase Representations", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Keyphrase generation (KG) aims to generate a set of summarizing words or phrases given a source document, while keyphrase extraction (KE) aims to identify them from the text. Because the search space is much smaller in KE, it is often combined with KG to predict keyphrases that may or may not exist in the corresponding document. However, current unified approaches adopt sequence labeling and maximization-based generation that primarily operate at a token level, falling short in observing and scoring keyphrases as a whole. In this work, we propose SimCKP, a simple contrastive learning framework that consists of two stages: 1) An extractor-generator that extracts keyphrases by learning context-aware phrase-level representations in a contrastive manner while also generating keyphrases that do not appear in the document; 2) A reranker that adapts scores for each generated phrase by likewise aligning their representations with the corresponding document. Experimental results on multiple benchmark datasets demonstrate the effectiveness of our proposed approach, which outperforms the state-of-the-art models by a significant margin.", "keywords": "keyphrase prediction;contrastive learning;reranking", "primary_area": "", "supplementary_material": "", "author": "Minseok Choi;Chaeheon Gwak;Seho Kim;Si hyeong Kim;Jaegul Choo", "authorids": "~Minseok_Choi2;~Chaeheon_Gwak1;~Seho_Kim1;~Si_hyeong_Kim1;~Jaegul_Choo1", "gender": "M;M;M;M;M", "homepage": ";;;;https://sites.google.com/site/jaegulchoo/", "dblp": "39/429;;25/10880;;07/2074", "google_scholar": "-KO4vkAAAAAJ;;https://scholar.google.co.kr/citations?hl=ko;;GHJYsLEAAAAJ", "or_profile": "~Minseok_Choi2;~Chaeheon_Gwak1;~Seho_Kim1;~Si_hyeong_Kim1;~Jaegul_Choo1", "aff": "Korea Advanced Institute of Science & Technology;NAVER WEBTOON;Naver Webtoon;;Korea Advanced Institute of Science & Technology", "aff_domain": "kaist.ac.kr;webtoonscorp.com;webtoonscorp.com;;kaist.ac.kr", "position": "PhD student;Researcher;Researcher;;Associate Professor", "bibtex": "@inproceedings{\nchoi2023simckp,\ntitle={Sim{CKP}: Simple Contrastive Learning of Keyphrase Representations},\nauthor={Minseok Choi and Chaeheon Gwak and Seho Kim and Si hyeong Kim and Jaegul Choo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=qGr17uesSx}\n}", "github": "", "project": "", "reviewers": "h8yT;8xSg;TNRt;zjaV", "site": "https://openreview.net/forum?id=qGr17uesSx", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;5;5", "excitement": "4;3;3;3", "reproducibility": "3;2;2;2", "correctness": "4;3;2;3", "rating_avg": 3.0, "confidence_avg": 4.5, "excitement_avg": 3.25, "reproducibility_avg": 2.25, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0009-0001-5560-7585;;", "linkedin": "brightjade/;inf1si;;si-hyeong-kim-796042176/;", "aff_unique_index": "0;1;1;0", "aff_unique_norm": "Korea Advanced Institute of Science and Technology;NAVER Corporation", "aff_unique_dep": ";WEBTOON", "aff_unique_url": "https://www.kaist.ac.kr;https://www.webtoons.com", "aff_unique_abbr": "KAIST;NAVER", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "qJqJXpysnh", "title": "Handshape-Aware Sign Language Recognition: Extended Datasets and Exploration of Handshape-Inclusive Methods", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The majority of existing work on sign language recognition encodes signed videos without explicitly acknowledging the phonological attributes of signs. Given that handshape is a vital parameter in sign languages, we explore the potential of handshape-aware sign language recognition. We augment the PHOENIX14T dataset with gloss-level handshape labels, resulting in the new PHOENIX14T-HS dataset. Two unique methods are proposed for handshape-inclusive sign language recognition: a single-encoder network and a dual-encoder network, complemented by a training strategy that simultaneously optimizes both the CTC loss and frame-level cross-entropy loss. The proposed methodology consistently outperforms the baseline performance. The dataset and code can be accessed at: www.anonymous.com.", "keywords": "Sign language recognition;handshape", "primary_area": "", "supplementary_material": "", "author": "Xuan Zhang;Kevin Duh", "authorids": "~Xuan_Zhang8;~Kevin_Duh1", "gender": "F;M", "homepage": "https://www.cs.jhu.edu/~xzhan138/;https://cs.jhu.edu/~kevinduh/", "dblp": ";58/3217", "google_scholar": "QUbIShAAAAAJ;M3BSiiQAAAAJ", "or_profile": "~Xuan_Zhang8;~Kevin_Duh1", "aff": "Johns Hopkins University;Johns Hopkins University", "aff_domain": "jhu.edu;jhu.edu", "position": "PhD student;Assistant Research Professor", "bibtex": "@inproceedings{\nzhang2023handshapeaware,\ntitle={Handshape-Aware Sign Language Recognition: Extended Datasets and Exploration of Handshape-Inclusive Methods},\nauthor={Xuan Zhang and Kevin Duh},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=qJqJXpysnh}\n}", "github": "", "project": "", "reviewers": "NMb4;TyCu;uuuR;ZC2W", "site": "https://openreview.net/forum?id=qJqJXpysnh", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;3;2;3", "excitement": "2;4;3;2", "reproducibility": "2;3;2;3", "correctness": "4;3;3;3", "rating_avg": 3.0, "confidence_avg": 2.75, "excitement_avg": 2.75, "reproducibility_avg": 2.5, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-5460-1176;", "linkedin": "xuan-zhang-19940216/;", "aff_unique_index": "0;0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "qMSG8S7zh0", "title": "On Uncertainty Calibration and Selective Generation in Probabilistic Neural Summarization: A Benchmark Study", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Modern deep models for summarization attains impressive benchmark performance, but they are prone to generating miscalibrated predictive uncertainty. This means that they assign high confidence to low-quality predictions, leading to compromised reliability and trustworthiness in real-world applications. Probabilistic deep learning methods are common solutions to the miscalibration problem. However, their relative effectiveness in complex autoregressive summarization tasks are not well-understood. In this work, we thoroughly investigate different state-of-the-art probabilistic methods' effectiveness in improving the uncertainty quality of the neural summarization models, across three large-scale benchmarks with varying difficulty using our newly introduced evaluation protocol. We show that the probabilistic methods consistently improve the model's generation and uncertainty quality, leading to improved selective generation performance (i.e., abstaining from low-quality summaries) in practice. We also reveal notable failure patterns of probabilistic methods widely-adopted in NLP community (e.g., Deep Ensemble and Monte Carlo Dropout), cautioning the importance of choosing appropriate method for the data setting.", "keywords": "Uncertainty;calibration;ensembles;generation;summarisation", "primary_area": "", "supplementary_material": "", "author": "Polina Zablotskaia;Du Phan;Joshua Maynez;Shashi Narayan;Jie Ren;Jeremiah Zhe Liu", "authorids": "~Polina_Zablotskaia1;~Du_Phan1;~Joshua_Maynez1;~Shashi_Narayan1;~Jie_Ren2;~Jeremiah_Zhe_Liu1", "gender": "F;M;M;M;F;M", "homepage": ";https://fehiepsi.github.io/;;https://sites.google.com/corp/view/shashinarayan/;;", "dblp": "188/6903;251/5646;220/3863;74/8458;;199/2301", "google_scholar": "Lfd5sYsAAAAJ;CeC9PtYAAAAJ;ZOYd-0oAAAAJ;prEcE9IAAAAJ;https://scholar.google.com/citations?hl=en;9jrmcG4AAAAJ", "or_profile": "~Polina_Zablotskaia1;~Du_Phan1;~Joshua_Maynez1;~Shashi_Narayan1;~Jie_Ren2;~Jeremiah_Zhe_Liu1", "aff": "Google;Google;Google;Google;Google;Google DeepMind", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com", "position": "Researcher;Researcher;Researcher;Research Scientist;Research Scientist;Research Scientist", "bibtex": "@inproceedings{\nzablotskaia2023on,\ntitle={On Uncertainty Calibration and Selective Generation in Probabilistic Neural Summarization: A Benchmark Study},\nauthor={Polina Zablotskaia and Du Phan and Joshua Maynez and Shashi Narayan and Jie Ren and Jeremiah Zhe Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=qMSG8S7zh0}\n}", "github": "", "project": "", "reviewers": "pqaw;Mqji;o7C1;X5Gn", "site": "https://openreview.net/forum?id=qMSG8S7zh0", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;3;4;3", "excitement": "4;4;3;4", "reproducibility": "3;4;3;4", "correctness": "4;4;3;4", "rating_avg": 3.0, "confidence_avg": 3.5, "excitement_avg": 3.75, "reproducibility_avg": 3.5, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": "https://www.linkedin.com/mwlite/in/polina-zablotskaia-8a7644a2;phandu/;;;;", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "qOOQW9DcpF", "title": "In-context Learning for Few-shot Multimodal Named Entity Recognition", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Thanks in part to the availability of copious annotated resources for some entity categories, existing studies have achieved superior performance in multimodal named entity recognition (MNER). However, in the real-world scenario, it is infeasible to enumerate all entity categories in advance. Therefore, in this paper, we formulate a new few-shot multimodal named entity recognition (FewMNER) task, which aims to effectively locate and identify named entities for a text-image pair only using a small number of labeled examples. Further, we explore the merit of in-context learning (ICL) and propose a novel framework to deal with FewMNER, where three points are taken into account: i.e., converting visual modality, selecting useful examples, and designing an effective task demonstration. Specifically, we first employ an image caption model to convert images into textual descriptions, enabling large language models to absorb information from visual modality. Then, we use the ranking of the sum of similarity rankings from both text and image modalities to select k-nearest examples, which form a demonstration context. Finally, we utilize the MNER definition and the meaning of each entity category as effective instruction. Extensive experimental results demonstrate that our framework outperforms baselines under several few-shot settings.", "keywords": "In-context Learning;Few-shot Multimodal Named Entity Recognition", "primary_area": "", "supplementary_material": "", "author": "Chenran Cai;Qianlong Wang;Bin Liang;Bing Qin;Min Yang;Kam-Fai Wong;Ruifeng Xu", "authorids": "~Chenran_Cai1;~Qianlong_Wang2;~Bin_Liang6;~Bing_Qin2;~Min_Yang6;~Kam-Fai_Wong2;~Ruifeng_Xu1", "gender": "M;M;M;;F;M;M", "homepage": ";;https://binliang-nlp.github.io/;http://ir.hit.edu.cn/~qinb;https://minyang.me/;http://www.se.cuhk.edu.hk/~kfwong;http://faculty.hitsz.edu.cn/xuruifeng", "dblp": "280/6646.html;;71/6053-4;86/5934.html;02/1640-7;w/KamFaiWong;93/5407-1", "google_scholar": "PIFIPKMAAAAJ;;djpQeLEAAAAJ;LKnCub0AAAAJ;_wop6KgAAAAJ;;mObXnNIAAAAJ", "or_profile": "~Chenran_Cai1;~Qianlong_Wang2;~Bin_Liang6;~Bing_Qin2;~Min_Yang6;~Kam-Fai_Wong2;~Ruifeng_Xu1", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;The Chinese University of Hong Kong;Harbin Institute of Technology;Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences;The Chinese University of Hong Kong;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;hit.edu.cn;cuhk.edu.hk;hit.edu.cn;siat.ac.cn;cuhk.edu.hk;hit.edu.cn", "position": "MS student;PhD student;Postdoc;Full Professor;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\ncai2023incontext,\ntitle={In-context Learning for Few-shot Multimodal Named Entity Recognition},\nauthor={Chenran Cai and Qianlong Wang and Bin Liang and Bing Qin and Min Yang and Kam-Fai Wong and Ruifeng Xu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=qOOQW9DcpF}\n}", "github": "", "project": "", "reviewers": "HAWy;BB5Z;DFxJ", "site": "https://openreview.net/forum?id=qOOQW9DcpF", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "2;3;4", "reproducibility": "5;3;5", "correctness": "2;4;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-3394-680X;0000-0002-3011-0580;0000-0001-7234-1347;0000-0002-2543-5604;;0000-0002-9427-5659;0000-0002-4009-5679", "linkedin": ";;;;;;", "aff_unique_index": "0;0;1;0;2;1;0", "aff_unique_norm": "Harbin Institute of Technology;Chinese University of Hong Kong;Chinese Academy of Sciences", "aff_unique_dep": ";;Shenzhen Institutes of Advanced Technology", "aff_unique_url": "http://www.hit.edu.cn/;https://www.cuhk.edu.hk;http://www.cas.cn", "aff_unique_abbr": "HIT;CUHK;CAS", "aff_campus_unique_index": "0;0;1;0;2;1;0", "aff_campus_unique": "Harbin;Hong Kong SAR;Shenzhen", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "qPIV6XQizX", "title": "A Reference-free Segmentation Quality Index (SegReFree)", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Topic segmentation, in the context of natural language processing, is the process of finding boundaries in a sequence of sentences that separate groups of adjacent sentences at shifts in semantic meaning. Currently, assessing the quality of a segmentation is done by comparing segmentation boundaries selected by a human or algorithm to those selected by a known good reference. This means that it is not possible to quantify the quality of a segmentation without a human annotator, which can be costly and time consuming. This work seeks to improve assessment of segmentation by proposing a reference-free segmentation quality index (SegReFree). The metric takes advantage of the fact that segmentation at a sentence level generally seeks to identify segment boundaries at semantic boundaries within the text. The proposed metric uses a modified cluster validity metric with semantic embeddings of the sentences to determine the quality of the segmentation. Multiple segmentation data sets are used to compare our proposed metric with existing reference-based segmentation metrics by progressively degrading the reference segmentation while computing all possible metrics; through this process, a strong correlation with existing segmentation metrics is shown. A Python library implementing the metric is released under the GNU General Public License and the repository is available at \\url{https://github.com/evan-person/reference_free_segmentation_metric}.", "keywords": "segmentation;metrics", "primary_area": "", "supplementary_material": "", "author": "Evan Lucas;Dylan Kangas;Timothy Havens", "authorids": "~Evan_Lucas1;~Dylan_Kangas1;~Timothy_Havens1", "gender": ";M;M", "homepage": ";https://mtu.edu;http://timhavens.com", "dblp": ";;77/3805", "google_scholar": ";;JHGQqLcAAAAJ", "or_profile": "~Evan_Lucas1;~Dylan_Kangas1;~Timothy_Havens1", "aff": "Michigan Technological University;Michigan Technological University;Michigan Technological University", "aff_domain": "mtu.edu;mtu.edu;mtu.edu", "position": "PhD student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nlucas2023a,\ntitle={A Reference-free Segmentation Quality Index (SegReFree)},\nauthor={Evan Lucas and Dylan Kangas and Timothy Havens},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=qPIV6XQizX}\n}", "github": "", "project": "", "reviewers": "8kNW;Cxez;ZTcw", "site": "https://openreview.net/forum?id=qPIV6XQizX", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "4;3;3", "reproducibility": "4;5;5", "correctness": "4;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.666666666666667, "correctness_avg": 3.6666666666666665, "replies_avg": 9, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-5746-3749", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Michigan Technological University", "aff_unique_dep": "", "aff_unique_url": "https://www.mtu.edu", "aff_unique_abbr": "MTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "qPfQq8c3kv", "title": "High-quality argumentative information in low resources approaches improve counter-narrative generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "It has been shown that high quality fine-tuning boosts the performance of language models, even if the size of the fine-tuning is small.\nIn this work we show how highly targeted fine-tuning improves the task of hate speech counter-narrative generation in user-generated text, even for very small sizes of training (1722 counter-narratives for English and 355 for Spanish). Providing a small subset of examples focusing on single argumentative strategies, together with the argumentative analysis relevant to that strategy, yields counter-narratives that are as satisfactory as providing the whole set of counter-narratives.\n\nWe also show that a good base model is required for the fine-tuning to have a positive impact. Indeed, for Spanish, the counter-narratives obtained without fine-tuning are mostly unacceptable, and, while fine-tuning improves their overall quality, the performance still remains quite unsatisfactory.", "keywords": "counter-narratives;argument mining;finetuning", "primary_area": "", "supplementary_material": "", "author": "Dami\u00e1n Furman;Pablo Torres;Jos\u00e9 Rodr\u00edguez;Diego Letzen;Maria Vanina Martinez;Laura Alonso Alemany", "authorids": "~Dami\u00e1n_Furman1;~Pablo_Torres2;~Jos\u00e9_Rodr\u00edguez2;~Diego_Letzen1;~Maria_Vanina_Martinez1;~Laura_Alonso_Alemany2", "gender": "M;;;;F;F", "homepage": "https://github.com/DamiFur;;;;https://mvmartinez.dc.uba.ar/;https://www.cs.famaf.unc.edu.ar/~laura/", "dblp": ";;;;01/5870;78/6966", "google_scholar": "g5rf2e8AAAAJ;;;y_N2YvgAAAAJ;ZUKUJDwAAAAJ;https://scholar.google.es/citations?user=ZhbejRkAAAAJ", "or_profile": "~Dami\u00e1n_Furman1;~Pablo_Torres2;~Jos\u00e9_Rodr\u00edguez2;~Diego_Letzen1;~Maria_Vanina_Martinez1;~Laura_Alonso_Alemany2", "aff": "Universidad de Buenos Aires;;;Universidad Nacional de C\u00f3rdoba;Computer Science Department, University of Buenos Aires;Universidad Nacional de C\u00f3rdoba", "aff_domain": "uba.ar;;;unc.edu.ar;dc.uba.ar;unc.edu.ar", "position": "PhD student;;;Full Professor;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nfurman2023highquality,\ntitle={High-quality argumentative information in low resources approaches improve counter-narrative generation},\nauthor={Dami{\\'a}n Furman and Pablo Torres and Jos{\\'e} Rodr{\\'\\i}guez and Diego Letzen and Maria Vanina Martinez and Laura Alonso Alemany},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=qPfQq8c3kv}\n}", "github": "", "project": "", "reviewers": "GdEP;8o9t;4FEa", "site": "https://openreview.net/forum?id=qPfQq8c3kv", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "3;3;4", "reproducibility": "2;3;4", "correctness": "3;2;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0003-2819-4735;0000-0001-6283-6266", "linkedin": ";;;;;laura-alonso-alemany-1125235/?originalSubdomain=ar", "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Universidad de Buenos Aires;Universidad Nacional de C\u00f3rdoba;University of Buenos Aires", "aff_unique_dep": ";;Computer Science Department", "aff_unique_url": "https://www.uba.ar;https://www.unc.edu.ar;https://www.db.uba.ar/", "aff_unique_abbr": "UBA;UNC;UBA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Argentina" }, { "id": "qRbhKhqp0b", "title": "The Past, Present and Better Future of Feedback Learning in Large Language Models for Subjective Human Preferences and Values", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Human feedback is increasingly used to steer the behaviours of Large Language Models (LLMs). However, it is unclear how to collect and incorporate feedback in a way that is efficient, effective and unbiased, especially for highly subjective human preferences and values. In this paper, we survey existing approaches for learning from human feedback, drawing on 95 papers primarily from the ACL and arXiv repositories. First, we summarise the past, pre-LLM trends for integrating human feedback into language models. Second, we give an overview of present techniques and practices, as well as the motivations for using feedback; conceptual frameworks for defining values and preferences; and how feedback is collected and from whom. Finally, we encourage a better future of feedback learning in LLMs by raising five unresolved conceptual and practical challenges.", "keywords": "Large language models;survey;review;human feedback;human preference;human values;alignment", "primary_area": "", "supplementary_material": "", "author": "Hannah Rose Kirk;Andrew Michael Bean;Bertie Vidgen;Paul Rottger;Scott A. Hale", "authorids": "~Hannah_Rose_Kirk1;~Andrew_Michael_Bean1;~Bertie_Vidgen1;~Paul_Rottger1;~Scott_A._Hale1", "gender": "F;M;M;Not Specified;M", "homepage": "https://www.hannahrosekirk.com/;https://www.turing.ac.uk/people/researchers/bertie-vidgen;https://paulrottger.com/;http://scott.hale.us;https://www.am-bean.github.io", "dblp": "284/9434;;282/4243;32/10840;244/9323", "google_scholar": "Fha8ldEAAAAJ;https://scholar.google.co.uk/citations?user=yRhnVoIAAAAJ;7rpmd9cAAAAJ;PBJL9ZEAAAAJ;https://scholar.google.com.mx/citations?hl=en", "or_profile": "~Hannah_Rose_Kirk1;~Bertie_Vidgen1;~Paul_Rottger1;~Scott_A._Hale1;~Andrew_Bean1", "aff": "Alan Turing Institute;University of Oxford;University of Oxford;Alan Turing Institute;University of Oxford", "aff_domain": "turing.ac.uk;ox.ac.uk;ox.ac.uk;turing.ac.uk;ox.ac.uk", "position": "Researcher;Visiting researcher;PhD student;Fellow;PhD student", "bibtex": "@inproceedings{\nkirk2023the,\ntitle={The Past, Present and Better Future of Feedback Learning in Large Language Models for Subjective Human Preferences and Values},\nauthor={Hannah Rose Kirk and Andrew Michael Bean and Bertie Vidgen and Paul Rottger and Scott A. Hale},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=qRbhKhqp0b}\n}", "github": "", "project": "", "reviewers": "wQJD;NxJU;TDYC", "site": "https://openreview.net/forum?id=qRbhKhqp0b", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "4;4;4", "reproducibility": "0;4;0", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 1.3333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-7419-5993;;0009-0008-7115-6893;0000-0002-6894-4951;0000-0001-8439-5975", "linkedin": "hannah-rose-kirk;bertie-vidgen-001/;paul-rottger/;https://linkedin.com/in/computermacgyver;", "aff_unique_index": "0;1;1;0;1", "aff_unique_norm": "Alan Turing Institute;University of Oxford", "aff_unique_dep": ";", "aff_unique_url": "https://www.turing.ac.uk;https://www.ox.ac.uk", "aff_unique_abbr": "ATI;Oxford", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "qRg3AxBDnN", "title": "Learning the Visualness of Text Using Large Vision-Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Visual text evokes an image in a person's mind, while non-visual text fails to do so. A method to automatically detect visualness in text will enable text-to-image retrieval and generation models to augment text with relevant images. This is particularly challenging with long-form text as text-to-image generation and retrieval models are often triggered for text that is designed to be explicitly visual in nature, whereas long-form text could contain many non-visual sentences. To this end, we curate a dataset of 3,620 English sentences and their visualness scores provided by multiple human annotators. We also propose a fine-tuning strategy that adapts large vision-language models like CLIP by modifying the model's contrastive learning objective to map text identified as non-visual to a common NULL image while matching visual text to their corresponding images in the document. We evaluate the proposed approach on its ability to (i) classify visual and non-visual text accurately, and (ii) attend over words that are identified as visual in psycholinguistic studies. Empirical evaluation indicates that our approach performs better than several heuristics and baseline models for the proposed task. Furthermore, to highlight the importance of modeling the visualness of text, we conduct qualitative analyses of text-to-image generation systems like DALL-E.", "keywords": "text visualness;vision-language models;multimodal learning", "primary_area": "", "supplementary_material": "", "author": "Gaurav Verma;Ryan A. Rossi;Christopher Tensmeyer;Jiuxiang Gu;Ani Nenkova", "authorids": "~Gaurav_Verma1;~Ryan_A._Rossi2;~Christopher_Tensmeyer1;~Jiuxiang_Gu2;~Ani_Nenkova1", "gender": "M;;M;M;", "homepage": "https://gaurav22verma.github.io/;;https://research.adobe.com/person/chris-tensmeyer/;http://gujiuxiang.com;", "dblp": ";;;173/4935.html;", "google_scholar": "qlwNRV0AAAAJ;;RjfiYWkAAAAJ;https://scholar.google.com.sg/citations?user=zPxKV9EAAAAJ;", "or_profile": "~Gaurav_Verma1;~Ryan_A._Rossi2;~Christopher_Tensmeyer1;~Jiuxiang_Gu2;~Ani_Nenkova1", "aff": "Microsoft Research;;Adobe Systems;Adobe Systems;", "aff_domain": "microsoft.com;;adobe.com;adobe.com;", "position": "Intern;;Research Scientist;Researcher;", "bibtex": "@inproceedings{\nverma2023learning,\ntitle={Learning the Visualness of Text Using Large Vision-Language Models},\nauthor={Gaurav Verma and Ryan A. Rossi and Christopher Tensmeyer and Jiuxiang Gu and Ani Nenkova},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=qRg3AxBDnN}\n}", "github": "", "project": "", "reviewers": "3qDT;dbaf;B9nx;4r5P", "site": "https://openreview.net/forum?id=qRg3AxBDnN", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;3;3;3", "excitement": "3;4;4;3", "reproducibility": "4;3;4;4", "correctness": "4;3;3;3", "rating_avg": 4.0, "confidence_avg": 3.25, "excitement_avg": 3.5, "reproducibility_avg": 3.75, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-6182-9857;;;;", "linkedin": "gaurav22verma/;;;;", "aff_unique_index": "0;1;1", "aff_unique_norm": "Microsoft;Adobe", "aff_unique_dep": "Microsoft Research;Adobe Systems Incorporated", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.adobe.com", "aff_unique_abbr": "MSR;Adobe", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "qS1ip2dGH0", "title": "The Shifted and The Overlooked: A Task-oriented Investigation of User-GPT Interactions", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recent progress in Large Language Models (LLMs) has produced models that exhibit remarkable performance across a variety of NLP tasks. However, it remains unclear whether the existing focus of NLP research accurately captures the genuine requirements of human users. This paper provides a comprehensive analysis of the divergence between academic research in NLP and the needs of real-world NLP applications via a large-scale collection of user-GPT conversations. We analyze a large-scale collection of real user queries to GPT. We compare these queries against existing NLP benchmark tasks and identify a significant gap between the tasks that users frequently request from LLMs and the tasks that are commonly studied in academic research. For example, we find that tasks such as \"design\" and \"planning\" are prevalent in user interactions but largely neglected or different from traditional NLP benchmarks. We investigate these overlooked tasks, dissect the practical challenges, and provide insights toward a roadmap to make LLMs better aligned with user needs.", "keywords": "Large language models;Real-world NLP applications;User-GPT interaction analysis", "primary_area": "", "supplementary_material": "", "author": "Siru Ouyang;Shuohang Wang;Yang Liu;Ming Zhong;Yizhu Jiao;Dan Iter;Reid Pryzant;Chenguang Zhu;Heng Ji;Jiawei Han", "authorids": "~Siru_Ouyang1;~Shuohang_Wang1;~Yang_Liu50;~Ming_Zhong2;~Yizhu_Jiao1;~Dan_Iter1;~Reid_Pryzant1;~Chenguang_Zhu1;~Heng_Ji3;~Jiawei_Han1", "gender": "F;M;M;M;F;Not Specified;;M;F;M", "homepage": "https://ozyyshr.github.io;;https://nlp-yang.github.io/;https://maszhongming.github.io/;https://yzjiao.github.io/;https://daniter-cu.github.io/;;;http://blender.cs.illinois.edu/hengji.html;http://hanj.cs.illinois.edu/", "dblp": "https://dblp.org/search/pid/api?q=author:Siru_Ouyang:;173/5469.html;;;https://dblp.uni-trier.de/pid/250/9757;63/10689.html;205/3986;48/7536-1.html;;h/JiaweiHan.html", "google_scholar": "fetoihAAAAAJ;mN-IO6wAAAAJ;HxTr-CtMdrsC;mnifqeUAAAAJ;sHgBvMgAAAAJ;bg8RrSkAAAAJ;FkufKDgAAAAJ;1b2kKWoAAAAJ;z7GCqT4AAAAJ;https://scholar.google.com.tw/citations?user=Kv9AbjMAAAAJ", "or_profile": "~Siru_Ouyang1;~Shuohang_Wang1;~Yang_Liu50;~Ming_Zhong2;~Yizhu_Jiao1;~Dan_Iter1;~Reid_Pryzant1;~Chenguang_Zhu1;~Heng_Ji3;~Jiawei_Han1", "aff": "University of Illinois Urbana-Champaign Champaign;Microsoft;Microsoft;University of Illinois Urbana Champaign;UIUC;Microsoft;Microsoft Research;Zoom;University of Illinois, Urbana-Champaign;University of Illinois at Urbana-Champaign (UIUC)", "aff_domain": "illinois.edu;microsoft.com;microsoft.com;illinois.edu;illinois.edu;microsoft.com;research.microsoft.com;zoom.us;uiuc.edu;illinois.edu", "position": "PhD student;Researcher;Researcher;PhD student;PhD student;Researcher;Researcher;Principal Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nouyang2023the,\ntitle={The Shifted and The Overlooked: A Task-oriented Investigation of User-{GPT} Interactions},\nauthor={Siru Ouyang and Shuohang Wang and Yang Liu and Ming Zhong and Yizhu Jiao and Dan Iter and Reid Pryzant and Chenguang Zhu and Heng Ji and Jiawei Han},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=qS1ip2dGH0}\n}", "github": "", "project": "", "reviewers": "XsxS;q1hb;XWHq", "site": "https://openreview.net/forum?id=qS1ip2dGH0", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "3;3;5", "reproducibility": "3;3;4", "correctness": "3;3;5", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 10, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0001-1331-424X;;;;;;;;;0000-0002-3629-2696", "linkedin": ";;;;;daniter;;;;", "aff_unique_index": "0;1;1;0;0;1;1;2;3;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;Microsoft;Zoom Video Communications Inc.;University of Illinois", "aff_unique_dep": ";Microsoft Corporation;;", "aff_unique_url": "https://illinois.edu;https://www.microsoft.com;https://zoom.us;https://illinois.edu", "aff_unique_abbr": "UIUC;Microsoft;Zoom;UIUC", "aff_campus_unique_index": "0;2;2;2;2", "aff_campus_unique": "Champaign;;Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "qT4bw58Yl2", "title": "MProto: Multi-Prototype Network with Denoised Optimal Transport for Distantly Supervised Named Entity Recognition", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Distantly supervised named entity recognition (DS-NER) aims to locate entity mentions and classify their types with only knowledge bases or gazetteers and unlabeled corpus. However, distant annotations are noisy and degrade the performance of NER models. In this paper, we propose a noise-robust prototype network named MProto for the DS-NER task. Different from previous prototype-based NER methods, MProto represents each entity type with multiple prototypes to characterize the intra-class variance among entity representations. To optimize the classifier, each token should be assigned an appropriate ground-truth prototype and we consider such token-prototype assignment as an optimal transport (OT) problem. Furthermore, to mitigate the noise from incomplete labeling, we propose a novel denoised optimal transport (DOT) algorithm. \nSpecifically, we utilize the assignment result between *Other* class tokens and all prototypes to distinguish unlabeled entity tokens from true negatives.\nExperiments on several DS-NER benchmarks demonstrate that our MProto achieves state-of-the-art performance. The source code is now available on Github.", "keywords": "Information Extraction;Named Entity Recognition;Distant Supervision", "primary_area": "", "supplementary_material": "", "author": "Shuhui Wu;Yongliang Shen;Zeqi Tan;Wenqi Ren;Jietian Guo;Shiliang Pu;Weiming Lu", "authorids": "~Shuhui_Wu1;~Yongliang_Shen1;~Zeqi_Tan1;~Wenqi_Ren3;~Jietian_Guo1;~Shiliang_Pu1;~Weiming_Lu1", "gender": "M;M;M;;M;M;", "homepage": "https://github.com/XiPotatonium;;;;http://www.hikvision.com;;", "dblp": "219/7204;221/5612-1.html;200/9648.html;;;155/3173;", "google_scholar": ";UT3NzFAAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;;https://scholar.google.com.hk/citations?user=NWR_wpoAAAAJ;", "or_profile": "~Shuhui_Wu1;~Yongliang_Shen1;~Zeqi_Tan1;~Wenqi_Ren3;~Jietian_Guo1;~Shiliang_Pu1;~Weiming_Lu1", "aff": "Zhejiang University;;University of Hong Kong;;Hikvision Research Institute;;", "aff_domain": "zju.edu.cn;;hku.hk;;hikvision.com;;", "position": "MS student;;Intern;;Researcher;;", "bibtex": "@inproceedings{\nwu2023mproto,\ntitle={{MP}roto: Multi-Prototype Network with Denoised Optimal Transport for Distantly Supervised Named Entity Recognition},\nauthor={Shuhui Wu and Yongliang Shen and Zeqi Tan and Wenqi Ren and Jietian Guo and Shiliang Pu and Weiming Lu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=qT4bw58Yl2}\n}", "github": "", "project": "", "reviewers": "orz4;7sbu;vkaK", "site": "https://openreview.net/forum?id=qT4bw58Yl2", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "4;4;3", "reproducibility": "3;4;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;", "linkedin": ";;;;;;", "aff_unique_index": "0;1;2", "aff_unique_norm": "Zhejiang University;University of Hong Kong;Hikvision Research Institute", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;https://www.hku.hk;https://www.hikvision.com/cn/", "aff_unique_abbr": "ZJU;HKU;Hikvision", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "qWbCkbBN1P", "title": "Reducing Spurious Correlations in Aspect-based Sentiment Analysis with Explanation from Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recently, aspect-based sentiment analysis (ABSA) models have yielded promising results. However, they are susceptible to learning spurious correlations between certain words of the input text and output labels while modeling the sentiment feature of the aspect. This spurious correlation will potentially undermine the performance of ABSA models. One direct solution for this problem is to make the model see and learn an explanation of sentiment expression rather than certain words. Motivated by this, we exploit explanations for the sentiment polarity of each aspect from large language models (LLMs) to reduce spurious correlations in ABSA. First, we formulate a prompt template that wraps the sentence, an aspect, and the sentiment label. This template is utilized to prompt LLMs to generate an appropriate explanation that states the sentiment cause. Then, we propose two straightforward yet effective methods to leverage the explanation for preventing the learning of spurious correlations. We conducted extensive comparative experiments on five datasets by integrating them with some representative ABSA models. Results show that our methods can achieve performance gains and enhance the performance and generalization ability of ABSA models.", "keywords": "aspect-based sentiment analysis;spurious correlations;large language models", "primary_area": "", "supplementary_material": "", "author": "Qianlong Wang;Keyang Ding;Bin Liang;Min Yang;Ruifeng Xu", "authorids": "~Qianlong_Wang2;~Keyang_Ding1;~Bin_Liang6;~Min_Yang6;~Ruifeng_Xu1", "gender": "M;M;M;F;M", "homepage": ";;https://binliang-nlp.github.io/;https://minyang.me/;http://faculty.hitsz.edu.cn/xuruifeng", "dblp": ";;71/6053-4;02/1640-7;93/5407-1", "google_scholar": ";NYJp1AUAAAAJ;djpQeLEAAAAJ;_wop6KgAAAAJ;mObXnNIAAAAJ", "or_profile": "~Qianlong_Wang2;~Keyang_Ding1;~Bin_Liang6;~Min_Yang6;~Ruifeng_Xu1", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;The Chinese University of Hong Kong;Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Chinese Academy of Sciences;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;hit.edu.cn;cuhk.edu.hk;siat.ac.cn;hit.edu.cn", "position": "PhD student;PhD student;Postdoc;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nwang2023reducing,\ntitle={Reducing Spurious Correlations in Aspect-based Sentiment Analysis with Explanation from Large Language Models},\nauthor={Qianlong Wang and Keyang Ding and Bin Liang and Min Yang and Ruifeng Xu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=qWbCkbBN1P}\n}", "github": "", "project": "", "reviewers": "BR1K;vdgb;NFXw", "site": "https://openreview.net/forum?id=qWbCkbBN1P", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;5;4", "excitement": "3;4;2", "reproducibility": "4;3;3", "correctness": "4;4;3", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-3011-0580;;0000-0001-7234-1347;;0000-0002-4009-5679", "linkedin": ";;;;", "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Harbin Institute of Technology;Chinese University of Hong Kong;Chinese Academy of Sciences", "aff_unique_dep": ";;Shenzhen Institutes of Advanced Technology", "aff_unique_url": "http://www.hit.edu.cn/;https://www.cuhk.edu.hk;http://www.cas.cn", "aff_unique_abbr": "HIT;CUHK;CAS", "aff_campus_unique_index": "0;0;1;2;0", "aff_campus_unique": "Harbin;Hong Kong SAR;Shenzhen", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "qZwsO1Qi3V", "title": "Syntactic Substitutability as Unsupervised Dependency Syntax", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Syntax is a latent hierarchical structure which underpins the robust and compositional nature of human language. In this work, we explore the hypothesis that syntactic dependencies can be represented in language model attention distributions and propose a new method to induce these structures theory-agnostically. Instead of modeling syntactic relations as defined by annotation schemata, we model a more general property implicit in the definition of dependency relations, syntactic substitutability. This property captures the fact that words at either end of a dependency can be substituted with words from the same category. Substitutions can be used to generate a set of syntactically invariant sentences whose representations are then used for parsing. We show that increasing the number of substitutions used improves parsing accuracy on natural data. On long-distance subject-verb agreement constructions, our method achieves 79.5% recall compared to 8.9% using a previous method. Our method also provides improvements when transferred to a different parsing setup, demonstrating that it generalizes.", "keywords": "unsupervised dependency parsing;syntax;syntactic probing;linguistically informed", "primary_area": "", "supplementary_material": "", "author": "Jasper Jian;Siva Reddy", "authorids": "~Jasper_Jian1;~Siva_Reddy1", "gender": "M;M", "homepage": "https://sites.google.com/view/jasperjian/;http://sivareddy.in", "dblp": "334/7860;64/8153", "google_scholar": "onaUVsAAAAAJ;", "or_profile": "~Jasper_Jian1;~Siva_Reddy1", "aff": "McGill University;Mila, McGill University", "aff_domain": "mail.mcgill.ca;mila.quebec", "position": "Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\njian2023syntactic,\ntitle={Syntactic Substitutability as Unsupervised Dependency Syntax},\nauthor={Jasper Jian and Siva Reddy},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=qZwsO1Qi3V}\n}", "github": "", "project": "", "reviewers": "z2Sw;5gCw;cSPA", "site": "https://openreview.net/forum?id=qZwsO1Qi3V", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;5;4", "excitement": "4;4;4", "reproducibility": "4;5;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "McGill University", "aff_unique_dep": "", "aff_unique_url": "https://www.mcgill.ca", "aff_unique_abbr": "McGill", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "id": "qae0FlfrG6", "title": "Does the Correctness of Factual Knowledge Matter for Factual Knowledge-Enhanced Pre-trained Language Models?", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In recent years, the injection of factual knowledge has been observed to have a significant positive correlation to the downstream task performance of pre-trained language models.\nHowever, existing work neither demonstrates that pre-trained models successfully learn the injected factual knowledge nor proves that there is a causal relation between injected factual knowledge and downstream performance improvements.\nIn this paper, we introduce a counterfactual-based analysis framework to explore the causal effects of factual knowledge injection on the performance of language models within pretrain-finetune paradigm.\nInstead of directly probing the language model or exhaustively enumerating potential confounding factors, we analyze this issue by perturbing the factual knowledge sources at different scales and comparing the performance of pre-trained language models before and after the perturbation. \nSurprisingly, throughout our experiments, we find that although the knowledge seems to be successfully injected, the correctness of injected knowledge only has a very limited effect on the models' downstream performance.\nThis finding strongly challenges previous assumptions that the injected factual knowledge is the key for language models to achieve performance improvements on downstream tasks in pretrain-finetune paradigm.", "keywords": "factual knowledge;language model;knowledge-enhanced", "primary_area": "", "supplementary_material": "", "author": "Boxi Cao;Qiaoyu Tang;Hongyu Lin;Xianpei Han;Le Sun", "authorids": "~Boxi_Cao1;~Qiaoyu_Tang1;~Hongyu_Lin1;~Xianpei_Han1;~Le_Sun1", "gender": "M;M;M;M;M", "homepage": "https://c-box.github.io;;http://linhongyu.top/;http://www.icip.org.cn/team/homepage/;http://www.icip.org.cn/team/sunle/", "dblp": "295/9057;347/9053;;57/2368;78/5897-1", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;;mu5lLakAAAAJ;pA88bm4AAAAJ;6bFNhtwAAAAJ", "or_profile": "~Boxi_Cao1;~Qiaoyu_Tang1;~Hongyu_Lin1;~Xianpei_Han1;~Le_Sun1", "aff": "Institute of Software, Chinese Academy of Sciences;Institute of Software, Chinese Academy of Sciences;Institute of Software, Chinese Academy of Sciences;Institute of Software, CAS;Institute of Software, Chinese Academy of Sciences", "aff_domain": "iscas.ac.cn;iscas.ac.cn;iscas.ac.cn;iscas.ac.cn;iscas.ac.cn", "position": "PhD student;PhD student;Associate Professor;Professor;Full Professor", "bibtex": "@inproceedings{\ncao2023does,\ntitle={Does the Correctness of Factual Knowledge Matter for Factual Knowledge-Enhanced Pre-trained Language Models?},\nauthor={Boxi Cao and Qiaoyu Tang and Hongyu Lin and Xianpei Han and Le Sun},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=qae0FlfrG6}\n}", "github": "", "project": "", "reviewers": "jAz2;tpiB;3qik", "site": "https://openreview.net/forum?id=qae0FlfrG6", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;1;2", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "4;3;2", "rating_avg": 4.0, "confidence_avg": 2.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Software", "aff_unique_url": "http://www.ios.ac.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "qegD54EWAl", "title": "Hiding in Plain Sight: Tweets with Hate Speech Masked by Homoglyphs", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "To avoid detection by current NLP monitoring applications, progenitors of hate speech often replace one or more letters in offensive words with homoglyphs, visually similar Unicode characters. Harvesting real-world hate speech containing homoglyphs is challenging due to the vast replacement possibilities. We developed a character substitution scraping method and assembled the Offensive Tweets with Homoglyphs (OTH) Dataset (N=90,788) with more than 1.5 million occurrences of 1,281 non-Latin characters (emojis excluded). In an annotated sample (n=700), 40.14% of the tweets were found to contain hate speech. We assessed the performance of seven transformer-based hate speech detection models and found that they performed poorly in a zero-shot setting (F1 scores between 0.04 and 0.52) but normalizing the data dramatically improved detection (F1 scores between 0.59 and 0.71). Training the models using the annotated data further boosted performance (highest micro-averaged F1 score=0.88, using five-fold cross validation). This study indicates that a dataset containing homoglyphs known and unknown to the scraping script can be collected, and that neural models can be trained to recognize camouflaged real-world hate speech.", "keywords": "Homoglyphs;Dataset;Hate Speech Detection;Twitter", "primary_area": "", "supplementary_material": "", "author": "Portia Cooper;Mihai Surdeanu;Eduardo Blanco", "authorids": "~Portia_Cooper1;~Mihai_Surdeanu1;~Eduardo_Blanco1", "gender": "F;;M", "homepage": ";http://surdeanu.info/mihai/;https://eduardoblanco.github.io/", "dblp": ";18/3479;32/369-2", "google_scholar": ";https://scholar.google.com/citations?hl=en;AqGa3-MAAAAJ", "or_profile": "~Portia_Cooper1;~Mihai_Surdeanu1;~Eduardo_Blanco1", "aff": "University of Arizona;University of Arizona;University of Arizona", "aff_domain": "arizona.edu;arizona.edu;arizona.edu", "position": "Undergrad student;Full Professor;Associate Professor", "bibtex": "@inproceedings{\ncooper2023hiding,\ntitle={Hiding in Plain Sight: Tweets with Hate Speech Masked by Homoglyphs},\nauthor={Portia Cooper and Mihai Surdeanu and Eduardo Blanco},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=qegD54EWAl}\n}", "github": "", "project": "", "reviewers": "BrA1;GyN8;KfRS", "site": "https://openreview.net/forum?id=qegD54EWAl", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;4;4", "excitement": "3;3;3", "reproducibility": "3;4;5", "correctness": "3;2;4", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-9049-6027;;", "linkedin": "portia-cooper;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Arizona", "aff_unique_dep": "", "aff_unique_url": "https://www.arizona.edu", "aff_unique_abbr": "UA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "qhwYFIrSm7", "title": "A Diachronic Analysis of Paradigm Shifts in NLP Research: When, How, and Why?", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Understanding the fundamental concepts and trends in a scientific field is crucial for keeping abreast of its continuous advancement. \nIn this study, we propose a systematic framework for analyzing the evolution of research topics in a scientific field using causal discovery and inference techniques. We define three variables to encompass diverse facets of the evolution of research topics within NLP and utilize a causal discovery algorithm to unveil the causal connections among these variables using observational data. Subsequently, we leverage this structure to measure the intensity of these relationships. By conducting extensive experiments on the ACL Anthology corpus, we demonstrate that our framework effectively uncovers evolutionary trends and the underlying causes for a wide range of NLP research topics. Specifically, we show that tasks and methods are primary drivers of research in NLP, with datasets following, while metrics have minimal impact.", "keywords": "Scholarly Document Processing;NLP Scientometrics", "primary_area": "", "supplementary_material": "", "author": "Aniket Pramanick;Yufang Hou;Saif M. Mohammad;Iryna Gurevych", "authorids": "~Aniket_Pramanick1;~Yufang_Hou2;~Saif_M._Mohammad1;~Iryna_Gurevych1", "gender": ";F;M;", "homepage": ";https://yufanghou.github.io/;http://saifmohammad.com;", "dblp": ";;58/380;", "google_scholar": ";-fBym-EAAAAJ;zJHymXh9EVwC;", "or_profile": "~Aniket_Pramanick1;~Yufang_Hou2;~Saif_M._Mohammad1;~Iryna_Gurevych1", "aff": ";IBM Research Ireland;National Research Council Canada;", "aff_domain": ";ibm.com;nrc-cnrc.gc.ca;", "position": ";Principal Researcher;Researcher;", "bibtex": "@inproceedings{\npramanick2023a,\ntitle={A Diachronic Analysis of Paradigm Shifts in {NLP} Research: When, How, and Why?},\nauthor={Aniket Pramanick and Yufang Hou and Saif M. Mohammad and Iryna Gurevych},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=qhwYFIrSm7}\n}", "github": "", "project": "", "reviewers": "Lzm4;2oor;s1DD", "site": "https://openreview.net/forum?id=qhwYFIrSm7", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;4", "excitement": "3;4;4", "reproducibility": "3;4;3", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-2716-7516;", "linkedin": ";;;", "aff_unique_index": "0;1", "aff_unique_norm": "IBM;National Research Council Canada", "aff_unique_dep": "Research;", "aff_unique_url": "https://www.ibm.com/research;https://www.nrc-cnrc.gc.ca", "aff_unique_abbr": "IBM;NRC-CNRC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Ireland;Canada" }, { "id": "qiV0mvkVyq", "title": "PROSE: A Pronoun Omission Solution for Chinese-English Spoken Language Translation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Neural Machine Translation (NMT) systems encounter a significant challenge when translating a pro-drop ('pronoun-dropping') language (e.g., Chinese) to a non-pro-drop one (e.g., English), since the pro-drop phenomenon demands NMT systems to recover omitted pronouns. This unique and crucial task, however, lacks sufficient datasets for benchmarking. To bridge this gap, we introduce PROSE, a new benchmark featured in diverse pro-drop instances for document-level Chinese-English spoken language translation. Furthermore, we conduct an in-depth investigation of the pro-drop phenomenon in spoken Chinese on this dataset, reconfirming that pro-drop reduces the performance of NMT systems in Chinese-English translation. To alleviate the negative impact introduced by pro-drop, we propose Mention-Aware Semantic Augmentation, a novel approach that leverages the semantic embedding of dropped pronouns to augment training pairs. Results from the experiments on four Chinese-English translation corpora show that our proposed method outperforms existing methods regarding omitted pronoun retrieval and overall translation quality.", "keywords": "Chinese-English Spoken Language Translation;Zero-Pronoun;Mention-Aware Semantic Augmentation", "primary_area": "", "supplementary_material": "", "author": "Ke Wang;Xiutian Zhao;Yanghui Li;Wei Peng", "authorids": "~Ke_Wang2;~Xiutian_Zhao1;~Yanghui_Li1;~Wei_Peng6", "gender": "M;M;M;M", "homepage": ";https://xiutian.github.io;;https://www.rmit.edu.au/profiles/p/wei-peng3", "dblp": "https://dblp.uni-trier.de/pid/181/2613.html;362/7856;;", "google_scholar": "https://scholar.google.com/citations?hl=en;HfOmKncAAAAJ;;", "or_profile": "~Ke_Wang2;~Xiutian_Zhao1;~Yanghui_Li1;~Wei_Peng6", "aff": "Huawei Technologies Ltd.;;;Huawei Technologies Ltd.", "aff_domain": "huawei.com;;;huawei.com", "position": "Researcher;;;Principal Researcher", "bibtex": "@inproceedings{\nwang2023prose,\ntitle={{PROSE}: A Pronoun Omission Solution for Chinese-English Spoken Language Translation},\nauthor={Ke Wang and Xiutian Zhao and Yanghui Li and Wei Peng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=qiV0mvkVyq}\n}", "github": "", "project": "", "reviewers": "NDb2;x45u;DfAP", "site": "https://openreview.net/forum?id=qiV0mvkVyq", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "3;4;3", "reproducibility": "4;4;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-2300-0743;;0000-0002-5262-7775;", "linkedin": ";;;wei-peng-phd-in-ai-4515ba22/?originalSubdomain=au", "aff_unique_index": "0;0", "aff_unique_norm": "Huawei", "aff_unique_dep": "Huawei Technologies", "aff_unique_url": "https://www.huawei.com", "aff_unique_abbr": "Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "qlCtkvgQJH", "title": "LogiCoT: Logical Chain-of-Thought Instruction Tuning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Generative Pre-trained Transformer 4 (GPT-4) demonstrates impressive chain-of-thought reasoning ability. \nRecent work on self-instruction tuning, such as Alpaca, has focused on enhancing the general proficiency of models. These instructions enable the model to achieve performance comparable to GPT-3.5 on general tasks like open-domain text generation and paraphrasing. However, they fall short of helping the model handle complex reasoning tasks.\nTo bridge the gap, this paper presents LogiCoT, a new instruction-tuning dataset for Logical Chain-of-Thought reasoning with GPT-4. We elaborate on the process of harvesting instructions for prompting GPT-4 to generate chain-of-thought rationales. LogiCoT serves as an instruction set for teaching models of logical reasoning and elicits general reasoning skills.", "keywords": "instruction tuning;chain-of-thought;large language model;logical reasoning;GPT-4", "primary_area": "", "supplementary_material": "", "author": "Hanmeng Liu;Zhiyang Teng;Leyang Cui;Chaoli Zhang;Qiji Zhou;Yue Zhang", "authorids": "~Hanmeng_Liu1;~Zhiyang_Teng1;~Leyang_Cui1;~Chaoli_Zhang1;~Qiji_Zhou1;~Yue_Zhang7", "gender": "M;M;M;F;M;M", "homepage": "https://liuhanmeng.github.io;https://zeeeyang.github.io;https://github.com/Nealcly;;;http://frcchang.github.io", "dblp": "269/4615;136/8660;247/6181;156/0429;268/1339;47/722-4", "google_scholar": "vjmL_9UAAAAJ;9wOJrf8AAAAJ;6YVwZgkAAAAJ;2bL2FJ0AAAAJ;bKaielcAAAAJ;", "or_profile": "~Hanmeng_Liu1;~Zhiyang_Teng1;~Leyang_Cui1;~Chaoli_Zhang1;~Qiji_Zhou1;~Yue_Zhang7", "aff": "Westlake University;Nanyang Technological University;Tencent AI Lab;Alibaba Group;Westlake University;Westlake University", "aff_domain": "westlake.edu;ntu.edu.sg;tencent.com;alibaba-inc.com;westlake.edu.cn;westlake.edu.cn", "position": "PhD student;Researcher;Researcher;Researcher;Postdoc;Full Professor", "bibtex": "@inproceedings{\nliu2023logicot,\ntitle={LogiCoT: Logical Chain-of-Thought Instruction Tuning},\nauthor={Hanmeng Liu and Zhiyang Teng and Leyang Cui and Chaoli Zhang and Qiji Zhou and Yue Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=qlCtkvgQJH}\n}", "github": "", "project": "", "reviewers": "mPz1;Gyqk;D4Qn", "site": "https://openreview.net/forum?id=qlCtkvgQJH", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;4;3", "excitement": "3;3;4", "reproducibility": "4;4;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-1320-9973;;;;;0000-0002-5214-2268", "linkedin": ";;;;;", "aff_unique_index": "0;1;2;3;0;0", "aff_unique_norm": "Westlake University;Nanyang Technological University;Tencent;Alibaba Group", "aff_unique_dep": ";;Tencent AI Lab;", "aff_unique_url": "https://www.westlake.edu.cn;https://www.ntu.edu.sg;https://ai.tencent.com;https://www.alibaba.com", "aff_unique_abbr": "WU;NTU;Tencent AI Lab;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "China;Singapore" }, { "id": "qlwXv0oHJD", "title": "Towards Noise-Tolerant Speech-Referring Video Object Segmentation: Bridging Speech and Text", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Linguistic communication is prevalent in Human-Computer Interaction (HCI). Speech (spoken language) serves as a convenient yet potentially ambiguous form due to noise and accents, exposing a gap compared to text. In this study, we investigate the prominent HCI task, Referring Video Object Segmentation (R-VOS), which aims to segment and track objects using linguistic references. While text input is well-investigated, speech input is under-explored. Our objective is to bridge the gap between speech and text, enabling the adaptation of existing text-input R-VOS models to accommodate noisy speech input effectively. Specifically, we propose a method to align the semantic spaces between speech and text by incorporating two key modules: 1) Noise-Aware Semantic Adjustment (NSA) for clear semantics extraction from noisy speech; and 2) Semantic Jitter Suppression (SJS) enabling R-VOS models to tolerate noisy queries. Comprehensive experiments conducted on the challenging AVOS benchmarks reveal that our proposed method outperforms state-of-the-art approaches.", "keywords": "Noisy Speech;Speech-Referring Video Object Segmentation", "primary_area": "", "supplementary_material": "", "author": "Xiang Li;Jinglu Wang;Xiaohao Xu;Muqiao Yang;Fan Yang;Yizhou Zhao;Rita Singh;Bhiksha Raj", "authorids": "~Xiang_Li35;~Jinglu_Wang3;~Xiaohao_Xu1;~Muqiao_Yang1;~Fan_Yang46;~Yizhou_Zhao2;~Rita_Singh1;~Bhiksha_Raj1", "gender": ";;;M;;;F;M", "homepage": ";;;https://muqiaoy.github.io;;;http://mlsp.cs.cmu.edu/people/rsingh/index.html;https://www.cs.cmu.edu/directory/bhikshar/", "dblp": ";;;239/6073;;;;60/3996", "google_scholar": ";;;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=zh-CN;nVKRaf4AAAAJ;;", "or_profile": "~Xiang_Li35;~Jinglu_Wang3;~Xiaohao_Xu1;~Muqiao_Yang1;~Fan_Yang46;~Yizhou_Zhao2;~Rita_Singh1;~Bhiksha_Raj1", "aff": ";;;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;School of Computer Science, Carnegie Mellon University;Mohamed bin Zayed University of Artificial Intelligence", "aff_domain": ";;;andrew.cmu.edu;andrew.cmu.edu;andrew.cmu.edu;cs.cmu.edu;mbzuai.ac.ae", "position": ";;;PhD student;MS student;MS student;Research Professor;Full Professor", "bibtex": "@inproceedings{\nli2023towards,\ntitle={Towards Noise-Tolerant Speech-Referring Video Object Segmentation: Bridging Speech and Text},\nauthor={Xiang Li and Jinglu Wang and Xiaohao Xu and Muqiao Yang and Fan Yang and Yizhou Zhao and Rita Singh and Bhiksha Raj},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=qlwXv0oHJD}\n}", "github": "", "project": "", "reviewers": "zoVN;A74t;uQBw", "site": "https://openreview.net/forum?id=qlwXv0oHJD", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;4", "excitement": "3;4;4", "reproducibility": "4;3;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-6273-0138;;0000-0002-2975-0783;;", "linkedin": ";;;muqiaoy/;;;;", "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Carnegie Mellon University;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.cmu.edu;https://mbzuai.ac.ae", "aff_unique_abbr": "CMU;MBZUAI", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "United States;United Arab Emirates" }, { "id": "qnHB2SMQLA", "title": "Take a Closer Look at Multilinguality! Improve Multilingual Pre-Training Using Monolingual Corpora Only", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recent studies have revealed the remarkable cross-lingual capability of multilingual pre-trained language models (mPLMs), even when pre-trained without parallel corpora (mono-mPLMs). Intuitively, semantic alignments may be the reason behind such capability but remain under-explored. In this work, we investigate the alignment properties from the token perspective in mono-mPLMs and find that the alignments correspond to the geometric similarity of embedding space across different languages. Nevertheless, mono-mPLMs tend to damage this geometric similarity at the higher layers due to the lack of cross-lingual interactions, thus limiting their cross-lingual transfer capabilities. To address this issue, we introduce token-level and semantic-level code-switched masked language modeling, employing the self-induced token alignments to explicitly improve cross-lingual interactions over layers of mono-mPLMs without relying on parallel sentences. We evaluate our method on various natural language understanding tasks and unsupervised machine translation tasks. The results demonstrate that our methods outperform the strong baselines and achieve comparable performance with mPLMs trained with parallel corpora.", "keywords": "multilinguality;mutlilingual pre-training", "primary_area": "", "supplementary_material": "", "author": "Jinliang Lu;Yu Lu;Jiajun Zhang", "authorids": "~Jinliang_Lu1;~Yu_Lu8;~Jiajun_Zhang1", "gender": "M;F;M", "homepage": "https://jinlianglu96.github.io/about/;;http://www.nlpr.ia.ac.cn/cip/jjzhang.htm", "dblp": "249/9047;;71/6950-1.html", "google_scholar": "https://scholar.google.com/citations?hl=en;;93zngeYAAAAJ", "or_profile": "~Jinliang_Lu1;~Yu_Lu8;~Jiajun_Zhang1", "aff": "Institute of automation, Chinese Academy of Sciences;Institute of automation, Chinese academy of science;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ia.ac.cn;nlpr.ia.ac.cn;ia.ac.cn", "position": "PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nlu2023take,\ntitle={Take a Closer Look at Multilinguality! Improve Multilingual Pre-Training Using Monolingual Corpora Only},\nauthor={Jinliang Lu and Yu Lu and Jiajun Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=qnHB2SMQLA}\n}", "github": "", "project": "", "reviewers": "R5qn;Swjy;Jsy5", "site": "https://openreview.net/forum?id=qnHB2SMQLA", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;5;3", "excitement": "3;3;4", "reproducibility": "3;4;4", "correctness": "3;4;4", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-5395-2385;0000-0001-9108-2619;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Automation", "aff_unique_url": "http://www.ia.cas.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "qnO9IRNA9d", "title": "Instructed Language Models with Retrievers Are Powerful Entity Linkers", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Generative approaches powered by large language models (LLMs) have demonstrated emergent abilities in tasks that require complex reasoning abilities. Yet the generative nature still makes the generated content suffer from hallucinations, thus unsuitable for entity-centric tasks like entity linking (EL) requiring precise entity predictions over a large knowledge base. \nWe present Instructed Generative Entity Linker (INSGENEL), the first approach that enables casual language models to perform entity linking over knowledge bases. \nSeveral methods of equipping language models with EL ability were proposed in this work, including (i) a sequence-to-sequence training EL objective with instruction-tuning, (ii) a novel generative EL framework based on a light-weight potential mention retriever that frees the model from heavy and non-parallelizable decoding, achieving 4$\\times$ speedup without compromise on linking metrics.\nINSGENEL outperforms previous generative alternatives with +6.8 F1 points gain on average, also with a huge advantage in training data efficiency and training compute consumption. In addition, our skillfully-engineered in-context learning (ICL) framework for EL still lags behind INSGENEL significantly, reaffirming that the EL task remains a persistent hurdle for general LLMs.", "keywords": "Knowledge Grounding;Entity Linking;Generative Model;Large Language Model", "primary_area": "", "supplementary_material": "", "author": "Zilin Xiao;MING GONG;Jie Wu;Xingyao Zhang;Linjun Shou;Daxin Jiang", "authorids": "~Zilin_Xiao1;~MING_GONG2;~Jie_Wu15;~Xingyao_Zhang2;~Linjun_Shou1;~Daxin_Jiang2", "gender": "M;;M;;M;M", "homepage": "https://zilin.me/;;http://tobeadded.com;;https://www.microsoft.com/en-us/research/people/lisho/;https://www.microsoft.com/en-us/research/people/djiang/", "dblp": "330/7498;;;;;77/5094", "google_scholar": "IHDbVRoAAAAJ;;;;Tj0DLa0AAAAJ;N-wAHCoAAAAJ", "or_profile": "~Zilin_Xiao1;~MING_GONG2;~Jie_Wu15;~Xingyao_Zhang2;~Linjun_Shou1;~Daxin_Jiang2", "aff": "Microsoft;;Microsoft;Microsoft;Microsoft;Microsoft", "aff_domain": "microsoft.com;;microsoft.com;microsoft.com;microsoft.com;microsoft.com", "position": "Intern;;Researcher;Researcher;Researcher;Researcher/Scientist", "bibtex": "@inproceedings{\nxiao2023instructed,\ntitle={Instructed Language Models with Retrievers Are Powerful Entity Linkers},\nauthor={Zilin Xiao and MING GONG and Jie Wu and Xingyao Zhang and Linjun Shou and Daxin Jiang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=qnO9IRNA9d}\n}", "github": "", "project": "", "reviewers": "zVxi;7xon;hkkb", "site": "https://openreview.net/forum?id=qnO9IRNA9d", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "3;4;4", "reproducibility": "4;4;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";;jiewu-ecnu/;xingyao-zhang-138474268/?originalSubdomain=hk;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Corporation", "aff_unique_url": "https://www.microsoft.com", "aff_unique_abbr": "Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "qo17ZiVnH2", "title": "Filling the Image Information Gap for VQA: Prompting Large Language Models to Proactively Ask Questions", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large Language Models (LLMs) demonstrate impressive reasoning ability and the maintenance of world knowledge not only in natural language tasks, but also in some vision-language tasks such as open-domain knowledge-based visual question answering (OK-VQA). As images are invisible to LLMs, researchers convert images to text to engage LLMs into the visual question reasoning procedure. This leads to discrepancies between images and their textual representations presented to LLMs, which consequently impedes final reasoning performance. \nTo fill the information gap and better leverage the reasoning capability, we design a framework that enables LLMs to proactively ask relevant questions to unveil more details in the image, along with filters for refining the generated information. We validate our idea on OK-VQA and A-OKVQA. Our method continuously boosts the performance of baselines methods by an average gain of 2.15\\% on OK-VQA, and achieves consistent improvements across different LLMs.", "keywords": "visual question answering;knowledge reasoning;in-context learning", "primary_area": "", "supplementary_material": "", "author": "Ziyue Wang;Chi Chen;Peng Li;Yang Liu", "authorids": "~Ziyue_Wang4;~Chi_Chen1;~Peng_Li2;~Yang_Liu19", "gender": "F;;M;M", "homepage": ";https://nlp.csai.tsinghua.edu.cn/;http://www.lpeng.net/;http://nlp.csai.tsinghua.edu.cn/~ly/", "dblp": "137/0610-2;21/1794-5;83/6353-30;51/3710-5", "google_scholar": "https://scholar.google.com/citations?hl=en;2jpvQ90AAAAJ;hgYzkOQAAAAJ;https://scholar.google.com.hk/citations?user=lVhoKNcAAAAJ", "or_profile": "~Ziyue_Wang4;~Chi_Chen1;~Peng_Li2;~Yang_Liu19", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "PhD student;PhD student;Associate Professor;Professor", "bibtex": "@inproceedings{\nwang2023filling,\ntitle={Filling the Image Information Gap for {VQA}: Prompting Large Language Models to Proactively Ask Questions},\nauthor={Ziyue Wang and Chi Chen and Peng Li and Yang Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=qo17ZiVnH2}\n}", "github": "", "project": "", "reviewers": "cxzi;Ka9D;QtV8", "site": "https://openreview.net/forum?id=qo17ZiVnH2", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "2;4;3", "reproducibility": "3;3;4", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0004-1433-0681;;0000-0003-1374-5979;0000-0002-3087-242X", "linkedin": ";;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "qq6ctdUwCX", "title": "Loose lips sink ships: Mitigating Length Bias in Reinforcement Learning from Human Feedback", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Reinforcement learning from human feedback serves as a crucial bridge, aligning large language models with human and societal values.\nThis alignment requires a vast corpus of human feedback to learn a reward model, which is subsequently used to finetune language models.\nHowever, we have identified that the reward model often finds shortcuts to bypass its intended objectives, misleadingly assuming that humans prefer longer responses.\nThe emergence of length bias often induces the model to favor longer outputs, yet it doesn't equate to an increase in helpful information within these outputs.\nIn this paper, we propose an innovative solution, applying the Product-of-Experts (PoE) technique to separate reward modeling from the influence of sequence length.\nIn our framework, the main expert concentrates on understanding human intents, while the biased expert targets the identification and capture of length bias. \nTo further enhance the learning of bias, we introduce perturbations into the bias-focused expert, disrupting the flow of semantic information.\nExperimental results validate the effectiveness of our approach, indicating that language model performance is improved, irrespective of sequence length.", "keywords": "Alignment;Large Language Model;Debias", "primary_area": "", "supplementary_material": "", "author": "Wei Shen;Rui Zheng;Wenyu Zhan;Jun Zhao;Shihan Dou;Tao Gui;Qi Zhang;Xuanjing Huang", "authorids": "~Wei_Shen12;~Rui_Zheng1;~Wenyu_Zhan1;~Jun_Zhao5;~Shihan_Dou1;~Tao_Gui1;~Qi_Zhang8;~Xuanjing_Huang1", "gender": ";M;M;M;M;M;F;M", "homepage": "http://github.com/fakerbaby;https://github.com/ruizheng20;;;;http://qizhang.info;https://xuanjing-huang.github.io/;https://shihandou.com/", "dblp": ";;;;135/6973;52/323-1;05/6735-1;282/6213", "google_scholar": "-DlGT8IAAAAJ;https://scholar.google.com.hk/citations?user=7Z0V_SoAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;;XfqR3yYAAAAJ;RGsMgZA4H78C;BM5WHiYAAAAJ", "or_profile": "~Wei_Shen12;~Rui_Zheng1;~Wenyu_Zhan1;~Jun_Zhao5;~Tao_Gui1;~Qi_Zhang8;~Xuanjing_Huang1;~Dou_Shi_Han1", "aff": "Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "MS student;PhD student;MS student;PhD student;Assistant Professor;Full Professor;Full Professor;MS student", "bibtex": "@inproceedings{\nshen2023loose,\ntitle={Loose lips sink ships: Mitigating Length Bias in Reinforcement Learning from Human Feedback},\nauthor={Wei Shen and Rui Zheng and Wenyu Zhan and Jun Zhao and Shihan Dou and Tao Gui and Qi Zhang and Xuanjing Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=qq6ctdUwCX}\n}", "github": "", "project": "", "reviewers": "kTmE;r6wY;K33q", "site": "https://openreview.net/forum?id=qq6ctdUwCX", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "3;4;3", "reproducibility": "3;3;3", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;0000-0001-9197-9426;", "linkedin": ";;;;;;;", "aff_unique_index": "0;0;0;0;0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "qtZI5YDe5d", "title": "UReader: Universal OCR-free Visually-situated Language Understanding with Multimodal Large Language Model", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Text is ubiquitous in our visual world, conveying crucial information, such as in documents, websites, and everyday photographs. In this work, we propose UReader, a first exploration of universal OCR-free visually-situated language understanding based on the Multimodal Large Language Model (MLLM). By leveraging the shallow text recognition ability of the MLLM, we only finetuned 1.2% parameters and the training cost is much lower than previous work following domain-specific pretraining and finetuning paradigms. Concretely, UReader is jointly finetuned on a wide range of Visually-situated Language Understanding tasks via a unified instruction format. To enhance the visual text and semantic understanding, we further apply two auxiliary tasks with the same format, namely text reading and key points generation tasks. We design a shape-adaptive cropping module before the encoder-decoder architecture of MLLM to leverage the frozen low-resolution vision encoder for processing high-resolution images. Without downstream finetuning, our single model achieves state-of-the-art ocr-free performance in 8 out of 10 visually-situated language understanding tasks, across 5 domains: documents, tables, charts, natural images, and webpage screenshots. Codes and instruction-tuning datasets will be released.", "keywords": "OCR-free;visually-situated language understanding;multimodal large language model", "primary_area": "", "supplementary_material": "", "author": "Jiabo Ye;Anwen Hu;Haiyang Xu;Qinghao Ye;Ming Yan;Guohai Xu;Chenliang Li;Junfeng Tian;Qi Qian;Ji Zhang;Qin Jin;Liang He;Xin Alex Lin;Fei Huang", "authorids": "~Jiabo_Ye1;~Anwen_Hu1;~Haiyang_Xu1;~Qinghao_Ye1;~Ming_Yan2;~Guohai_Xu1;~Chenliang_Li2;~Junfeng_Tian1;~Qi_Qian1;~Ji_Zhang3;~Qin_Jin1;~Liang_He2;~Xin_Alex_Lin1;~Fei_Huang1", "gender": "M;M;M;;M;;M;M;;;F;;M;", "homepage": "https://github.com/LukeForeverYoung;;;;;;;;http://qi-qian.com;;https://www.jin-qin.com/index.html;;https://faculty.ecnu.edu.cn/_s16/lx2_6212/main.psp;", "dblp": "304/1336;249/1182.html;;254/3247;51/5332-4.html;205/7621;52/9457;93/1076;05/2084-1;86/1953-11;47/2670;;50/3323-1.html;", "google_scholar": ";FqvDzH8AAAAJ;qZYvce8AAAAJ;ZYOhaGwAAAAJ;uIUfGxYAAAAJ;bS8Ku4MAAAAJ;3P2ZMKcAAAAJ;hUMfP1UAAAAJ;Rp_40_gAAAAJ;cgnuJDUAAAAJ;8UkYbCMAAAAJ;;;", "or_profile": "~Jiabo_Ye1;~Anwen_Hu1;~Haiyang_Xu1;~Qinghao_Ye1;~Ming_Yan2;~Guohai_Xu1;~Chenliang_Li2;~Junfeng_Tian1;~Qi_Qian1;~Ji_Zhang3;~Qin_Jin1;~Liang_He2;~Xin_Alex_Lin1;~Fei_Huang1", "aff": "East China Normal University;Renmin University of China;Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group;Alibaba Group;Renmin University of China;;East China Normal University;", "aff_domain": "ecnu.edu.cn;ruc.edu.cn;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;ruc.edu.cn;;ecnu.edu.cn;", "position": "PhD student;PhD student;Researcher;Researcher;Instructor;Algorithm Engineer;Researcher;Researcher;Researcher;Senior Staff Engineer;Professor;;Full Professor;", "bibtex": "@inproceedings{\nye2023ureader,\ntitle={{UR}eader: Universal {OCR}-free Visually-situated Language Understanding with Multimodal Large Language Model},\nauthor={Jiabo Ye and Anwen Hu and Haiyang Xu and Qinghao Ye and Ming Yan and Guohai Xu and Chenliang Li and Junfeng Tian and Qi Qian and Ji Zhang and Qin Jin and Liang He and Xin Alex Lin and Fei Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=qtZI5YDe5d}\n}", "github": "", "project": "", "reviewers": "4B6Y;4gdp;v2FU", "site": "https://openreview.net/forum?id=qtZI5YDe5d", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "4;2;4", "reproducibility": "3;4;4", "correctness": "4;2;5", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 14, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0003-4959-8878;;;0000-0003-1759-3255;;;0000-0001-6486-6020;;;", "linkedin": ";;;;;;;;;;qinjin/;;;", "aff_unique_index": "0;1;2;2;2;2;2;2;2;2;1;0", "aff_unique_norm": "East China Normal University;Renmin University of China;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ecnu.edu.cn;http://www.ruc.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "ECNU;RUC;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "qvftjm8DNC", "title": "The PEACE-Reviews dataset: Modeling Cognitive Appraisals in Emotion Text Analysis", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Cognitive appraisal plays a pivotal role in deciphering emotions. Recent studies have delved into its significance, yet the interplay between various forms of cognitive appraisal and specific emotions, such as joy and anger, remains an area of exploration in consumption contexts. Our research introduces the PEACE-Reviews dataset, a unique compilation of annotated autobiographical accounts where individuals detail their emotional and appraisal experiences during interactions with personally significant products or services. Focusing on the inherent variability in consumer experiences, this dataset offers an in-depth analysis of participants' psychological traits, their evaluative feedback on purchases, and the resultant emotions. Notably, the PEACE-Reviews dataset encompasses emotion, cognition, individual traits, and demographic data. We also introduce preliminary models that predict certain features based on the autobiographical narratives.", "keywords": "cognitive appraisals;emotions;language modeling;computational social science", "primary_area": "", "supplementary_material": "", "author": "Gerard Christopher Yeo;Kokil Jaidka", "authorids": "~Gerard_Christopher_Yeo1;~Kokil_Jaidka1", "gender": "M;F", "homepage": ";https://kokiljaidka.wordpress.com", "dblp": ";62/8212", "google_scholar": ";UvGgJREAAAAJ", "or_profile": "~Gerard_Christopher_Yeo1;~Kokil_Jaidka1", "aff": "National University of Singapore;National University of Singapore", "aff_domain": "nus.edu;nus.edu.sg", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nyeo2023the,\ntitle={The {PEACE}-Reviews dataset: Modeling Cognitive Appraisals in Emotion Text Analysis},\nauthor={Gerard Christopher Yeo and Kokil Jaidka},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=qvftjm8DNC}\n}", "github": "", "project": "", "reviewers": "Miqh;rMKV;5opM", "site": "https://openreview.net/forum?id=qvftjm8DNC", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;5", "excitement": "3;4;4", "reproducibility": "4;4;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-3243-8643;0000-0002-8127-1157", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "National University of Singapore", "aff_unique_dep": "", "aff_unique_url": "https://www.nus.edu.sg", "aff_unique_abbr": "NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Singapore" }, { "id": "qyvabTsnWg", "title": "Document-level Relationship Extraction by Bidirectional Constraints of Beta Rules", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Document-level Relation Extraction (DocRE) aims to extract relations among entity pairs in documents. Some works introduce logic constraints into DocRE, addressing the issues of opacity and weak logic in original DocRE models. However, they only focus on forward logic constraints and the rules mined in these works often suffer from pseudo rules with high standard-confidence but low support. In this paper, we proposes Bidirectional Constraints of Beta Rules(BCBR), a novel logic constraint framework. BCBR first introduces a new rule miner which model rules by beta contribtion. Then forward and reverse logic constraints are constructed based on beta rules. Finally, BCBR reconstruct rule consistency loss by bidirectional constraints to regulate the output of the DocRE model. Experiments show that BCBR outperforms original DocRE models in terms of relation extraction performance ($\\sim$2.7 F1 score) and logical consistency($\\sim$3.1 logic score). Furthermore, BCBR consistently outperforms two other logic constraint frameworks.", "keywords": "Document-level Relation Extraction;Logical Consistency;Beta Distribution;Bidirectional Constraints", "primary_area": "", "supplementary_material": "", "author": "Yichun Liu;Zizhong Zhu;Xiaowang Zhang;Zhiyong Feng;Daoqi Chen;Yaxin Li", "authorids": "~Yichun_Liu2;~Zizhong_Zhu1;~Xiaowang_Zhang2;~Zhiyong_Feng1;~Daoqi_Chen1;~Yaxin_Li5", "gender": "M;M;M;M;;M", "homepage": "https://github.com/15709441910;http://cic.tju.edu.cn/faculty/zhangxiaowang/index.html;http://cic.tju.edu.cn/faculty/zyfeng/index.html;;;", "dblp": ";https://dblp.uni-trier.de/pid/54/1153;https://dblp.uni-trier.de/pid/48/195-2;;;", "google_scholar": ";5pVypA8AAAAJ;https://scholar.google.com/citations?hl=zh-CN;;;", "or_profile": "~Zizhong_Zhu1;~Xiaowang_Zhang2;~Zhiyong_Feng1;~Daoqi_Chen1;~Yaxin_Li5;~Liu_Yichun1", "aff": "Tianjin University;Tianjin University, China;Tianjin University;Tianjin University;Tianjin University;Tianjin University", "aff_domain": "tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn;tju.edu.cn", "position": "PhD student;Full Professor;Full Professor;MS student;MS student;MS student", "bibtex": "@inproceedings{\nliu2023documentlevel,\ntitle={Document-level Relationship Extraction by Bidirectional Constraints of Beta Rules},\nauthor={Yichun Liu and Zizhong Zhu and Xiaowang Zhang and Zhiyong Feng and Daoqi Chen and Yaxin Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=qyvabTsnWg}\n}", "github": "", "project": "", "reviewers": "4D7h;rFoA;akdh", "site": "https://openreview.net/forum?id=qyvabTsnWg", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;3", "excitement": "4;4;4", "reproducibility": "5;3;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-3931-3886;0000-0001-8158-7453;0009-0009-0646-5566;0009-0003-2082-1885;0009-0005-9327-5242", "linkedin": ";;;;;", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Tianjin University", "aff_unique_dep": "", "aff_unique_url": "http://www.tju.edu.cn", "aff_unique_abbr": "TJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "qzYtTabDPY", "title": "Revisiting the Optimality of Word Lengths", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Zipf (1935) posited that wordforms are optimized to minimize utterances' communicative costs. Under the assumption that cost is given by an utterance's length, he supported this claim by showing that words' lengths are inversely correlated with their frequencies. Communicative cost, however, can be operationalized in different ways. Piantadosi et al. (2011) claim that cost should be measured as the distance between an utterance's information rate and channel capacity, which we dub the channel capacity hypothesis (CCH) here. Following this logic, they then proposed that a word's length should be proportional to the expected value of its surprisal (negative log-probability in context). In this work, we show that Piantadosi et al.'s derivation does not minimize CCH's cost, but rather a lower bound, which we term CCH-lower. We propose a novel derivation, suggesting an improved way to minimize CCH's cost. Under this method, we find that a language's word lengths should instead be proportional to the surprisal's expectation plus its variance-to-mean ratio. Experimentally, we compare these three communicative cost functions: Zipf's, CCH-lower , and CCH. Across 13 languages and several experimental settings, we find that length is better predicted by frequency than either of the other hypotheses. In fact, when surprisal's expectation, or expectation plus variance-to-mean ratio, is estimated using better language models, it leads to worse word length predictions. We take these results as evidence that Zipf's longstanding hypothesis holds.", "keywords": "word length;uniform information density;zipf;law of abbreviation", "primary_area": "", "supplementary_material": "", "author": "Tiago Pimentel;Clara Meister;Ethan Wilcox;Kyle Mahowald;Ryan Cotterell", "authorids": "~Tiago_Pimentel1;~Clara_Meister1;~Ethan_Wilcox1;~Kyle_Mahowald1;~Ryan_Cotterell1", "gender": "M;;M;F;Not Specified", "homepage": "https://tpimentelms.github.io/;https://wilcoxeg.github.io/;https://mahowak.github.io;https://cimeister.github.io/;https://rycolab.io/", "dblp": "203/8292;227/3505;38/11196;245/7485.html;146/4361.html", "google_scholar": "XjZ8NRsAAAAJ;5jzLBBwAAAAJ;XUmFLVUAAAAJ;quJhNH8AAAAJ;DexOqtoAAAAJ", "or_profile": "~Tiago_Pimentel1;~Ethan_Wilcox1;~Kyle_Mahowald1;~Clara_Isabel_Meister1;~Ryan_D_Cotterell1", "aff": "University of Cambridge;Georgetown University;The University of Texas at Austin;Swiss Federal Institute of Technology;Swiss Federal Institute of Technology", "aff_domain": "cam.ac.uk;georgetown.edu;utexas.edu;ethz.ch;ethz.ch", "position": "PhD student;Assistant Professor;Assistant Professor;PhD student;Assistant Professor", "bibtex": "@inproceedings{\npimentel2023revisiting,\ntitle={Revisiting the Optimality of Word Lengths},\nauthor={Tiago Pimentel and Clara Meister and Ethan Wilcox and Kyle Mahowald and Ryan Cotterell},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=qzYtTabDPY}\n}", "github": "", "project": "", "reviewers": "Vf5C;Wi23;zBRX", "site": "https://openreview.net/forum?id=qzYtTabDPY", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;4", "excitement": "5;4;5", "reproducibility": "4;4;5", "correctness": "4;4;5", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.666666666666667, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-5128-9890;;0000-0002-3775-4426;", "linkedin": ";;;;", "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "University of Cambridge;Georgetown University;University of Texas at Austin;Swiss Federal Institute of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.cam.ac.uk;https://www.georgetown.edu;https://www.utexas.edu;https://www.ethz.ch", "aff_unique_abbr": "Cambridge;GU;UT Austin;ETH Zurich", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Cambridge;;Austin", "aff_country_unique_index": "0;1;1;2;2", "aff_country_unique": "United Kingdom;United States;Switzerland" }, { "id": "r2z3qPltxs", "title": "Counter Turing Test (CT2): AI-Generated Text Detection is Not as Easy as You May Think - Introducing AI Detectability Index (ADI)", "track": "main", "status": "Long Main", "tldr": "", "abstract": "With the rise of prolific ChatGPT, the risk and consequences of AI-generated text has increased alarmingly. This triggered a series of events, including an open letter, signed by thousands of researchers and tech leaders in March 2023, demanding a six-month moratorium on the training of AI systems more sophisticated than GPT-4. To address the inevitable question of ownership attribution for AI-generated artifacts, the US Copyright Office released a statement stating that \"if the content is traditional elements of authorship produced by a machine, the work lacks human authorship and the office will not register it for copyright\". Furthermore, both the US and the EU governments have recently drafted their initial proposals regarding the regulatory framework for AI. Given this cynosural spotlight on generative AI, AI-generated text detection (AGTD) has emerged as a topic that has already received immediate attention in research, with some initial methods having been proposed, soon followed by the emergence of techniques to bypass detection. \n \nThis paper introduces the Counter Turing Test (CT2), a benchmark consisting of techniques aiming to offer a comprehensive evaluation of the robustness of existing AGTD techniques. Our empirical findings unequivocally highlight the fragility of the proposed AGTD methods under scrutiny. Amidst the extensive deliberations on policy-making for regulating AI development, it is of utmost importance to assess the detectability of content generated by LLMs. Thus, to establish a quantifiable spectrum facilitating the evaluation and ranking of LLMs according to their detectability levels, we propose the AI Detectability Index (ADI). We conduct a thorough examination of 15 contemporary LLMs, empirically demonstrating that larger LLMs tend to have a lower ADI, indicating they are less detectable compared to smaller LLMs. We firmly believe that ADI holds significant value as a tool for the wider NLP community, with the potential to serve as a rubric in AI-related policy-making.", "keywords": "AI-generated text detection", "primary_area": "", "supplementary_material": "", "author": "Megha Chakraborty;S.M Towhidul Islam Tonmoy;S M Mehedi Zaman;Shreya Gautam;Tanay Kumar;Krish Sharma;Niyar R Barman;Chandan Gupta;Vinija Jain;Aman Chadha;Amit P. Sheth;Amitava Das", "authorids": "~Megha_Chakraborty1;~S.M_Towhidul_Islam_Tonmoy1;~S_M_Mehedi_Zaman1;~Shreya_Gautam1;~Tanay_Kumar1;~Krish_Sharma1;~Niyar_R_Barman1;~Chandan_Gupta1;~Vinija_Jain1;~Aman_Chadha1;~Amit_P._Sheth1;~Amitava_Das3", "gender": "F;M;M;F;M;M;M;M;F;M;M;M", "homepage": ";;https://mehedizamane.github.io;;;;https://niyarrbarman.github.io/;;http://vinija.ai;https://aman.ai;http://aiisc.ai/amit;https://amitavadas.com/", "dblp": ";;;156/1607;337/3666;;;;298/1294;55/10360;s/AmitPSheth;", "google_scholar": "Jqq0mHoAAAAJ;3lmZN3gAAAAJ;8zqmyMMAAAAJ;eaRErNwAAAAJ;https://scholar.google.com/citations?hl=en;nEIRii0AAAAJ;;https://scholar.google.com/citations?hl=en;oYaD1NcAAAAJ;gPGQuBQAAAAJ;https://scholar.google.com/citations?hl=en;", "or_profile": "~Megha_Chakraborty1;~S.M_Towhidul_Islam_Tonmoy1;~S_M_Mehedi_Zaman1;~Shreya_Gautam1;~Tanay_Kumar1;~Krish_Sharma1;~Niyar_R_Barman1;~Chandan_Gupta1;~Vinija_Jain1;~Aman_Chadha1;~Amit_P._Sheth1;~Amitava_Das3", "aff": "University of South Carolina, Columbia;University of South Carolina;Islamic University of Technology;Birla Institute of Technology, Mesra;Birla Institute of Technology, Mesra;University of South Carolina;National Institute of Technology Silchar;;Stanford University;Amazon Web Services;University of South Carolina;University of South Carolina", "aff_domain": "uofsc.edu;sc.edu;iutoic-dhaka.edu;bitmesra.ac.in;bitmesra.ac.in;sc.edu;nits.ac.in;;stanford.edu;amazon.com;sc.edu;uofsc.edu", "position": "PhD student;Intern;Undergrad student;Undergrad student;Undergrad student;Intern;Undergrad student;;Researcher;GenAI Science Manager;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nchakraborty2023counter,\ntitle={Counter Turing Test ({CT}2): {AI}-Generated Text Detection is Not as Easy as You May Think - Introducing {AI} Detectability Index ({ADI})},\nauthor={Megha Chakraborty and S.M Towhidul Islam Tonmoy and S M Mehedi Zaman and Shreya Gautam and Tanay Kumar and Krish Sharma and Niyar R Barman and Chandan Gupta and Vinija Jain and Aman Chadha and Amit P. Sheth and Amitava Das},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=r2z3qPltxs}\n}", "github": "", "project": "", "reviewers": "6tjm;weMu;WEM6", "site": "https://openreview.net/forum?id=r2z3qPltxs", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;3", "excitement": "4;5;5", "reproducibility": "4;4;5", "correctness": "4;5;5", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.666666666666667, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.666666666666667, "replies_avg": 11, "authors#_avg": 12, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0000-6076-7068;0000-0001-7841-3962;0009-0003-6444-9376;;0009-0007-7001-7480;;0000-0002-2371-3586;;0000-0001-6621-9003;0000-0002-0021-5293;", "linkedin": "megha-chakraborty-9a324b165/;towhidultonmoy/;mehedizamane/;shreyagautamm/;tanay-kumar-60762517b/;krish-sharma-1074b6224/;niyar/;chandan-gupta-18aa5a184/;vinija/;https://linkedin.aman.ai/;amitsheth/;", "aff_unique_index": "0;0;1;2;2;0;3;4;5;0;0", "aff_unique_norm": "University of South Carolina;Islamic University of Technology;Birla Institute of Technology;National Institute of Technology;Stanford University;Amazon", "aff_unique_dep": ";;;;;Amazon Web Services", "aff_unique_url": "https://www.sc.edu;https://www.iut-dhaka.edu.bd;https://www.bitmesra.ac.in;https://www.nits.ac.in;https://www.stanford.edu;https://aws.amazon.com", "aff_unique_abbr": "USC;IUT;BIT Mesra;NIT Silchar;Stanford;AWS", "aff_campus_unique_index": "0;2;2;3;4", "aff_campus_unique": "Columbia;;Mesra;Silchar;Stanford", "aff_country_unique_index": "0;0;1;2;2;0;2;0;0;0;0", "aff_country_unique": "United States;Bangladesh;India" }, { "id": "r3utB5u4zP", "title": "Generating and Evaluating Tests for K-12 Students with Language Model Simulations: A Case Study on Sentence Reading Efficiency", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Developing an educational test can be expensive and time-consuming, as each item must be written by experts and then evaluated by collecting hundreds of student responses. Moreover, many tests require multiple distinct sets of questions administered throughout the school year to closely monitor students' progress, known as parallel tests. In this study, we focus on tests of silent sentence reading efficiency, used to assess students\u2019 reading ability over time. To generate high-quality parallel tests, we propose to fine-tune large language models (LLMs) to simulate how previous students would have responded to unseen items. With these simulated responses, we can estimate each item's difficulty and ambiguity. We first use GPT-4 to generate new test items following a list of expert-developed rules and then apply a fine-tuned LLM to filter the items based on criteria from psychological measurements. We also propose an optimal-transport-inspired technique for generating parallel tests and show the generated tests closely correspond to the original test's difficulty and reliability based on crowdworker responses. Our evaluation of a generated test with 234 students from grades 2 to 8 produces test scores highly correlated (r=0.93) to those of a standard test form written by human experts and evaluated across thousands of K-12 students.", "keywords": "language model;student test generation;psychometrics", "primary_area": "", "supplementary_material": "", "author": "Eric Zelikman;Wanjing Anya Ma;Jasmine Elizabeth Tran;Diyi Yang;Jason D Yeatman;Nick Haber", "authorids": "~Eric_Zelikman1;~Wanjing_Anya_Ma1;~Jasmine_Elizabeth_Tran1;~Diyi_Yang2;~Jason_D_Yeatman1;~Nick_Haber1", "gender": "M;F;F;;M;F", "homepage": "https://zelikman.me;;https://cs.stanford.edu/~diyiy/;;https://profiles.stanford.edu/jason-yeatman;", "dblp": "217/2378;;70/11145;179/4983;;", "google_scholar": "V5B8dSUAAAAJ;;j9jhYqQAAAAJ;euNCoVYAAAAJ;;nAWSAbQAAAAJ", "or_profile": "~Eric_Zelikman1;~Jasmine_Elizabeth_Tran1;~Diyi_Yang2;~Nick_Haber1;~Jason_Yeatman1;~Wanjing_Ma1", "aff": "Google;Stanford University;Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "google.com;stanford.edu;stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "Research Intern;Research Staff;Assistant Professor;Assistant Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nzelikman2023generating,\ntitle={Generating and Evaluating Tests for K-12 Students with Language Model Simulations: A Case Study on Sentence Reading Efficiency},\nauthor={Eric Zelikman and Wanjing Anya Ma and Jasmine Elizabeth Tran and Diyi Yang and Jason D Yeatman and Nick Haber},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=r3utB5u4zP}\n}", "github": "", "project": "", "reviewers": "CGAU;dxrP;qWhv", "site": "https://openreview.net/forum?id=r3utB5u4zP", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "3;4;3", "reproducibility": "3;4;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-8804-7804;;0000-0001-5761-8707", "linkedin": "ericzelikman/;JasmineET;;;;", "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "Google;Stanford University", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://www.stanford.edu", "aff_unique_abbr": "Google;Stanford", "aff_campus_unique_index": "0;1;1;1;1;1", "aff_campus_unique": "Mountain View;Stanford", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "r65IWQmsHF", "title": "Understanding HTML with Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models (LLMs) have shown exceptional performance on a variety of natural language tasks. Yet, their capabilities for HTML understanding \u2013 i.e., parsing the raw HTML of a webpage, with applications to automation of web-based tasks, crawling, and browser-assisted retrieval \u2013 have not been fully explored. We contribute HTML understanding models (fine-tuned LLMs) and an in-depth analysis of their capabilities under three tasks: (i) Semantic Classification of HTML elements, (ii) Description Generation for HTML inputs, and (iii) Autonomous Web Navigation of HTML pages. While previous work has developed dedicated architectures and training procedures for HTML understanding, we show that LLMs pretrained on standard natural language corpora transfer remarkably well to HTML understanding tasks. For instance, when fine-tuned on data from the MiniWoB benchmark, LLMs successfully complete 50% more tasks using 192x less data compared to the previous best supervised model. We create and open-source a large-scale HTML dataset distilled and auto-labeled from CommonCrawl", "keywords": "html understanding;web navigation;large language models;semantic classification;description generation", "primary_area": "", "supplementary_material": "", "author": "Izzeddin Gur;Ofir Nachum;Yingjie Miao;Mustafa Safdari;Austin V Huang;Aakanksha Chowdhery;Sharan Narang;Noah Fiedel;Aleksandra Faust", "authorids": "~Izzeddin_Gur1;~Ofir_Nachum1;~Yingjie_Miao1;~Mustafa_Safdari1;~Austin_V_Huang1;~Aakanksha_Chowdhery1;~Sharan_Narang1;~Noah_Fiedel1;~Aleksandra_Faust1", "gender": ";M;;M;;;M;;F", "homepage": ";https://scholar.google.com/citations?user=C-ZlBWMAAAAJ&hl=en;;;https://github.com/austinvhuang;http://www.achowdhery.com;;;http://www.afaust.info", "dblp": "188/9027;;22/10043;05/7184;;;;204/3399;135/8420", "google_scholar": "qS_ugJAAAAAJ;C-ZlBWMAAAAJ;ScqM05wAAAAJ;;;7KDSCpQAAAAJ;CWOixywAAAAJ;;RK72t68AAAAJ", "or_profile": "~Izzeddin_Gur1;~Ofir_Nachum1;~Yingjie_Miao1;~Mustafa_Safdari1;~Austin_V_Huang1;~Aakanksha_Chowdhery1;~Sharan_Narang1;~Noah_Fiedel1;~Aleksandra_Faust1", "aff": "Google;OpenAI;Google DeepMind;Research, Google;;Google;Meta;Google;Google Brain", "aff_domain": "google.com;openai.com;google.com;research.google.com;;google.com;meta.com;google.com;google.com", "position": "Research Scientist;Researcher;Software Engineer;Researcher;;Researcher;Researcher;Director, Research & Engineering;Principal Researcher", "bibtex": "@inproceedings{\ngur2023understanding,\ntitle={Understanding {HTML} with Large Language Models},\nauthor={Izzeddin Gur and Ofir Nachum and Yingjie Miao and Mustafa Safdari and Austin V Huang and Aakanksha Chowdhery and Sharan Narang and Noah Fiedel and Aleksandra Faust},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=r65IWQmsHF}\n}", "github": "", "project": "", "reviewers": "LsyH;NHjz;X9Ao", "site": "https://openreview.net/forum?id=r65IWQmsHF", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;4;3", "reproducibility": "4;4;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0009-0002-1604-8685;;;;;0000-0002-3268-8685", "linkedin": ";;yingjiemiao/;mustafasafdari/;austin-huang-74a75422/;;;;aleksandrafaust", "aff_unique_index": "0;1;0;0;0;2;0;0", "aff_unique_norm": "Google;OpenAI;Meta", "aff_unique_dep": "Google;;Meta Platforms, Inc.", "aff_unique_url": "https://www.google.com;https://openai.com;https://meta.com", "aff_unique_abbr": "Google;OpenAI;Meta", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;1;0;0;0;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "rBfVlElyVW", "title": "MAGNIFICo: Evaluating the In-Context Learning Ability of Large Language Models to Generalize to Novel Interpretations", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Humans possess a remarkable ability to assign novel interpretations to linguistic expressions, enabling them to learn new words and understand community-specific connotations. However, Large Language Models (LLMs) have a knowledge cutoff and are costly to finetune repeatedly. Therefore, it is crucial for LLMs to learn novel interpretations in-context. In this paper, we systematically analyse the ability of LLMs to acquire novel interpretations using in-context learning. To facilitate our study, we introduce MAGNIFICo, an evaluation suite implemented within a text-to-SQL semantic parsing framework that incorporates diverse tokens and prompt settings to simulate real-world complexity. Experimental results on MAGNIFICo demonstrate that LLMs exhibit a surprisingly robust capacity for comprehending novel interpretations from natural language descriptions as well as from discussions within long conversations. Nevertheless, our findings also highlight the need for further improvements, particularly when interpreting unfamiliar words or when composing multiple novel interpretations simultaneously in the same example. Additionally, our analysis uncovers the semantic predispositions in LLMs and reveals the impact of recency bias for information presented in long contexts.", "keywords": "Benchmarking LLMs;Compositional Generalization;In-Context Learning", "primary_area": "", "supplementary_material": "", "author": "Arkil Patel;Satwik Bhattamishra;Siva Reddy;Dzmitry Bahdanau", "authorids": "~Arkil_Patel1;~Satwik_Bhattamishra1;~Siva_Reddy1;~Dzmitry_Bahdanau1", "gender": "M;M;M;M", "homepage": "https://arkilpatel.github.io/;https://satwikb.com/;http://sivareddy.in;", "dblp": "254/5212;242/4259;64/8153;151/6504", "google_scholar": "-5goVAsAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.ca/citations?user=Nq0dVMcAAAAJ", "or_profile": "~Arkil_Patel1;~Satwik_Bhattamishra1;~Siva_Reddy1;~Dzmitry_Bahdanau1", "aff": "Allen Institute for Artificial Intelligence;University of Oxford;Mila, McGill University;ServiceNow Research", "aff_domain": "allenai.org;ox.ac.uk;mila.quebec;servicenow.com", "position": "Intern;PhD student;Assistant Professor;Research Scientist", "bibtex": "@inproceedings{\npatel2023magnifico,\ntitle={{MAGNIFIC}o: Evaluating the In-Context Learning Ability of Large Language Models to Generalize to Novel Interpretations},\nauthor={Arkil Patel and Satwik Bhattamishra and Siva Reddy and Dzmitry Bahdanau},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rBfVlElyVW}\n}", "github": "", "project": "", "reviewers": "AoAT;9Hzn;jQoo;wHLA", "site": "https://openreview.net/forum?id=rBfVlElyVW", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "4;3;4;3", "excitement": "4;4;4;3", "reproducibility": "3;4;5;3", "correctness": "4;4;5;4", "rating_avg": 5.0, "confidence_avg": 3.5, "excitement_avg": 3.75, "reproducibility_avg": 3.75, "correctness_avg": 4.25, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "arkil-patel;;;", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Allen Institute for Artificial Intelligence;University of Oxford;McGill University;ServiceNow", "aff_unique_dep": ";;Mila;Research", "aff_unique_url": "https://allenai.org;https://www.ox.ac.uk;https://www.mcgill.ca;https://www.servicenow.com", "aff_unique_abbr": "AI2;Oxford;McGill;ServiceNow", "aff_campus_unique_index": "1", "aff_campus_unique": ";Montreal", "aff_country_unique_index": "0;1;2;0", "aff_country_unique": "United States;United Kingdom;Canada" }, { "id": "rBrzSCruKl", "title": "Promoting Topic Coherence and Inter-Document Consorts in Multi-Document Summarization via Simplicial Complex and Sheaf Graph", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Multi-document Summarization (MDS) characterizes compressing information from multiple source documents to its succinct summary. An ideal summary should encompass all topics and accurately model cross-document relations expounded upon in the source documents. However, existing systems either impose constraints on the length of tokens during the encoding or falter in capturing the intricate cross-document relationships. These limitations impel the systems to produce summaries that are non-factual and unfaithful, thereby imparting an unfair comprehension of the topic to the readers. To counter these limitations and promote the information equivalence between the source document and generated summary, we propose FIBER, a novel encoder-decoder model that uses pre-trained BART to comprehensively analyze linguistic nuances, simplicial complex layer to apprehend inherent properties that transcend pairwise associations and sheaf graph attention to effectively capture the heterophilic properties. We benchmark FIBER with eleven baselines over four widely-used MDS datasets -- Multinews, CQASumm, DUC and Opinosis, and show that FIBER achieves consistent performance improvement across all the evaluation metrics (syntactical, semantical and faithfulness). We corroborate these improvements further through qualitative human evaluation.", "keywords": "multi document summarization;abstractive summarization", "primary_area": "", "supplementary_material": "", "author": "Yash Kumar Atri;Arun Iyer;Tanmoy Chakraborty;Vikram Goyal", "authorids": "~Yash_Kumar_Atri1;~Arun_Iyer1;~Tanmoy_Chakraborty2;~Vikram_Goyal1", "gender": "M;M;M;M", "homepage": "https://yashkumaratri.github.io;;http://tanmoychak.com;https://www.iiitd.ac.in/vikram/", "dblp": "275/9924;262/6555;65/2136-2.html;70/6404.html", "google_scholar": "sTyStXIAAAAJ;https://scholar.google.co.in/citations?user=Ngm0j_EAAAAJ;https://scholar.google.co.in/citations?user=C5S9JnIAAAAJ;https://scholar.google.com.tw/citations?user=YJaVmSwAAAAJ", "or_profile": "~Yash_Kumar_Atri1;~Arun_Iyer1;~Tanmoy_Chakraborty2;~Vikram_Goyal1", "aff": "Indraprastha Institute of Information Technology, Delhi, India;Microsoft;Indian Institute of Technology, Delhi;Indraprastha Institute of Information Technology, Delhi", "aff_domain": "iiitd.ac.in;microsoft.com;iitd.ac.in;iiitd.ac.in", "position": "PhD student;Principal Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\natri2023promoting,\ntitle={Promoting Topic Coherence and Inter-Document Consorts in Multi-Document Summarization via Simplicial Complex and Sheaf Graph},\nauthor={Yash Kumar Atri and Arun Iyer and Tanmoy Chakraborty and Vikram Goyal},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rBrzSCruKl}\n}", "github": "", "project": "", "reviewers": "EADd;W8oR;PARH", "site": "https://openreview.net/forum?id=rBrzSCruKl", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;5;1", "excitement": "3;4;2", "reproducibility": "3;5;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-4992-0505;0000-0001-7377-7599;0000-0002-0210-0369;0000-0003-0769-6381", "linkedin": "yashkumaratri/;iyerarunshankar/;tanmoy-chakraborty-89553324/;vikram-goyal-7a684213/", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Indraprastha Institute of Information Technology;Microsoft;Indian Institute of Technology Delhi", "aff_unique_dep": ";Microsoft Corporation;", "aff_unique_url": "http://www.iiitd.ac.in;https://www.microsoft.com;https://www.iitdelhi.ac.in", "aff_unique_abbr": "IIIT-D;Microsoft;IIT Delhi", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Delhi;", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "India;United States" }, { "id": "rDuv0LGf3T", "title": "Prompting ChatGPT in MNER: Enhanced Multimodal Named Entity Recognition with Auxiliary Refined Knowledge", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Multimodal Named Entity Recognition (MNER) on social media aims to enhance textual entity prediction by incorporating image-based clues. Existing studies mainly focus on maximizing the utilization of pertinent image information or incorporating external knowledge from explicit knowledge bases. However, these methods either neglect the necessity of providing the model with external knowledge, or encounter issues of high redundancy in the retrieved knowledge. In this paper, we present PGIM --- a two-stage framework that aims to leverage ChatGPT as an implicit knowledge base and enable it to heuristically generate auxiliary knowledge for more efficient entity prediction. Specifically, PGIM contains a Multimodal Similar Example Awareness module that selects suitable examples from a small number of predefined artificial samples. These examples are then integrated into a formatted prompt template tailored to the MNER and guide ChatGPT to generate auxiliary refined knowledge. Finally, the acquired knowledge is integrated with the original text and fed into a downstream model for further processing. Extensive experiments show that PGIM outperforms state-of-the-art methods on two classic MNER datasets and exhibits a stronger robustness and generalization capability.", "keywords": "Multimodal Named Entity Recognition;Information Extraction;Large Language Model", "primary_area": "", "supplementary_material": "", "author": "Jinyuan Li;Han Li;Zhuo Pan;Di Sun;Jiahao Wang;Wenkun Zhang;Gang Pan", "authorids": "~Jinyuan_Li1;~Han_Li14;~Zhuo_Pan2;~Di_Sun1;~Jiahao_Wang11;~Wenkun_Zhang1;~Gang_Pan3", "gender": ";;F;;;M;M", "homepage": "https://sites.google.com/view/jinyuanli;https://github.com/hacker-han;https://blog.csdn.net/p561234?spm=1018.2226.3001.5343;;https://wangjh9953.github.io/wjh.github.io/;https://github.com/ChuengMankwan;http://cic.tju.edu.cn/faculty/gpan/index.html", "dblp": ";;;;;;86/4183-2", "google_scholar": "https://scholar.google.com/citations?hl=en;;;;;;", "or_profile": "~Jinyuan_Li1;~Han_Li14;~Zhuo_Pan2;~Di_Sun1;~Jiahao_Wang11;~Wenkun_Zhang1;~Gang_Pan3", "aff": "Tianjin University;Taiyuan University of Technology;;;Tianjin University;University of Copenhagen ;Tianjin University", "aff_domain": "tju.edu.cn;tyut.edu.cn;;;tju.edu.cn;ku.dk;tju.edu.cn", "position": "MS student;MS student;;;PhD student;MS student;Associate Professor", "bibtex": "@inproceedings{\nli2023prompting,\ntitle={Prompting Chat{GPT} in {MNER}: Enhanced Multimodal Named Entity Recognition with Auxiliary Refined Knowledge},\nauthor={Jinyuan Li and Han Li and Zhuo Pan and Di Sun and Jiahao Wang and Wenkun Zhang and Gang Pan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rDuv0LGf3T}\n}", "github": "", "project": "", "reviewers": "hWkn;eSeH;KB7s", "site": "https://openreview.net/forum?id=rDuv0LGf3T", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;3;3", "reproducibility": "5;3;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0009-0000-5496-9720;;0000-0003-2155-4689", "linkedin": ";;;;;;", "aff_unique_index": "0;1;0;2;0", "aff_unique_norm": "Tianjin University;Taiyuan University of Technology;University of Copenhagen", "aff_unique_dep": ";;", "aff_unique_url": "http://www.tju.edu.cn;http://www.tyut.edu.cn/;https://www.ku.dk", "aff_unique_abbr": "TJU;TYUT;UCPH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;Denmark" }, { "id": "rG3QZA7JXV", "title": "CRT-QA: A Dataset of Complex Reasoning Question Answering over Tabular Data", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) show powerful reasoning abilities on various text-based tasks. However, their reasoning capability on structured data such as tables has not been systematically explored. In this work, we first establish a comprehensive taxonomy of reasoning and operation types for tabular data analysis. Then, we construct a complex reasoning QA dataset over tabular data, named CRT-QA dataset (Complex Reasoning QA over Tabular data), with the following unique features: (1) it is the first Table QA dataset with multi-step operation and informal reasoning; (2) it contains fine-grained annotations on questions' directness, composition types of sub-questions, and human reasoning paths which can be used to conduct a thorough investigation on LLMs' reasoning ability; (3) it contains a collection of unanswerable and indeterminate questions that commonly arise in real-world situations. We further introduce an efficient and effective tool-augmented method, named ARC (Auto-exemplar-guided Reasoning with Code), to use external tools such as Pandas to solve table reasoning tasks without handcrafted demonstrations. The experiment results show that CRT-QA presents a strong challenge for baseline methods and ARC achieves the best result.", "keywords": "Table QA;Table analysis;Large language model reasoning;Large language model with tool-use", "primary_area": "", "supplementary_material": "", "author": "Zhehao Zhang;Xitao Li;Yan Gao;Jian-Guang Lou", "authorids": "~Zhehao_Zhang1;~Xitao_Li1;~Yan_Gao7;~Jian-Guang_Lou1", "gender": "M;M;;M", "homepage": "https://zzh-sjtu.github.io/zhehaozhang.github.io/;;;https://www.microsoft.com/en-us/research/people/jlou/", "dblp": "223/7963.html;;;37/1917", "google_scholar": "QG-BAGwAAAAJ;;;alDxINIAAAAJ", "or_profile": "~Zhehao_Zhang1;~Xitao_Li1;~Yan_Gao7;~Jian-Guang_Lou1", "aff": "Shanghai Jiaotong University;Xi\u2019an Jiaotong University ;;Microsoft Research Asia", "aff_domain": "sjtu.edu.cn;xjtu.edu;;microsoft.com", "position": "Undergrad student;Undergrad student;;Principal Researcher", "bibtex": "@inproceedings{\nzhang2023crtqa,\ntitle={{CRT}-{QA}: A Dataset of Complex Reasoning Question Answering over Tabular Data},\nauthor={Zhehao Zhang and Xitao Li and Yan Gao and Jian-Guang Lou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rG3QZA7JXV}\n}", "github": "", "project": "", "reviewers": "dBCv;ejgp;r1r7", "site": "https://openreview.net/forum?id=rG3QZA7JXV", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;4;4", "reproducibility": "3;5;5", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;1;2", "aff_unique_norm": "Shanghai Jiao Tong University;Xi'an Jiao Tong University;Microsoft", "aff_unique_dep": ";;Research", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.xjtu.edu.cn;https://www.microsoft.com/en-us/research/group/asia", "aff_unique_abbr": "SJTU;XJTU;MSR Asia", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "rHjZFQvj9k", "title": "Norm of Word Embedding Encodes Information Gain", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Distributed representations of words encode lexical semantic information, but what type of information is encoded and how? Focusing on the skip-gram with negative-sampling method, we found that the squared norm of static word embedding encodes the information gain conveyed by the word; the information gain is defined by the Kullback-Leibler divergence of the co-occurrence distribution of the word to the unigram distribution.\nOur findings are explained by the theoretical framework of the exponential family of probability distributions and confirmed through precise experiments that remove spurious correlations arising from word frequency. This theory also extends to contextualized word embeddings in language models or any neural networks with the softmax output layer.\nWe also demonstrate that both the KL divergence and the squared norm of embedding provide a useful metric of the informativeness of a word in tasks such as keyword extraction, proper-noun discrimination, and hypernym discrimination.", "keywords": "Word embedding;Euclidean norm;Skip-gram with Negative Sampling;Softmax function;Kullback-Leibler divergence;Information geometry;Exponential family of probability distributions", "primary_area": "", "supplementary_material": "", "author": "Momose Oyama;Sho Yokoi;Hidetoshi Shimodaira", "authorids": "~Momose_Oyama2;~Sho_Yokoi1;~Hidetoshi_Shimodaira1", "gender": "M;;M", "homepage": "https://momoseoyama.github.io/;http://www.cl.ecei.tohoku.ac.jp/~yokoi/;http://stat.sys.i.kyoto-u.ac.jp/", "dblp": "336/5078.html;184/8316;19/3393", "google_scholar": "https://scholar.google.co.jp/citations?user=NWFbcG4AAAAJ;https://scholar.google.co.jp/citations?user=EW2QPKoAAAAJ;LvoIaIsAAAAJ", "or_profile": "~Momose_Oyama2;~Sho_Yokoi1;~Hidetoshi_Shimodaira1", "aff": "Kyoto University;Tohoku University;RIKEN", "aff_domain": "i.kyoto-u.ac.jp;tohoku.ac.jp;riken.jp", "position": "MS student;Assistant Professor;Researcher", "bibtex": "@inproceedings{\noyama2023norm,\ntitle={Norm of Word Embedding Encodes Information Gain},\nauthor={Momose Oyama and Sho Yokoi and Hidetoshi Shimodaira},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rHjZFQvj9k}\n}", "github": "", "project": "", "reviewers": "Jf3f;w5CX;Lew5", "site": "https://openreview.net/forum?id=rHjZFQvj9k", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;2", "excitement": "3;3;4", "reproducibility": "4;5;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0002-4437-5245;0000-0002-3371-7724", "linkedin": ";shoyokoi/;shimo/", "aff_unique_index": "0;1;2", "aff_unique_norm": "Kyoto University;Tohoku University;RIKEN", "aff_unique_dep": ";;", "aff_unique_url": "https://www.kyoto-u.ac.jp;https://www.tohoku.ac.jp;https://www.riken.jp", "aff_unique_abbr": "Kyoto U;Tohoku U;RIKEN", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "id": "rI7ebWPRLr", "title": "Efficient Long-Range Transformers: You Need to Attend More, but Not Necessarily at Every Layer", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Pretrained transformer models have demonstrated remarkable performance across various natural language processing tasks. These models leverage the attention mechanism to capture long- and short-range dependencies in the sequence. However, the (full) attention mechanism incurs high computational cost -- quadratic in the sequence length, which is not affordable in tasks with long sequences, e.g., inputs with 8k tokens. Although sparse attention can be used to improve computational efficiency, as suggested in existing work, it has limited modeling capacity and often fails to capture complicated dependencies in long sequences. To tackle this challenge, we propose MASFormer, an easy-to-implement transformer variant with mixed attention spans. Specifically, MASFormer is equipped with full attention to capture long-range dependencies, but only at a small number of layers. For the remaining layers, MASformer only employs sparse attention to capture short-range dependencies. Our experiments on natural language modeling and generation tasks show that a decoder-only MASFormer model of 1.3B parameters can achieve competitive performance to vanilla transformers with full attention while significantly reducing computational cost (up to 75\\%). Additionally, we investigate the effectiveness of continual training with long sequence data and how sequence length impacts downstream generation performance, which may be of independent interest.", "keywords": "Long-Range Transformer;Attention Mechanism;Mixed Attention Span", "primary_area": "", "supplementary_material": "", "author": "Qingru Zhang;Dhananjay Ram;Cole Hawkins;Sheng Zha;Tuo Zhao", "authorids": "~Qingru_Zhang2;~Dhananjay_Ram1;~Cole_Hawkins1;~Sheng_Zha1;~Tuo_Zhao2", "gender": "M;;;M;", "homepage": "https://qingruzhang.github.io/;;;https://github.com/szha;", "dblp": "228/6749;173/6604;;218/5471;", "google_scholar": "7YM-faYAAAAJ;yyK_4zMAAAAJ;;;", "or_profile": "~Qingru_Zhang2;~Dhananjay_Ram1;~Cole_Hawkins1;~Sheng_Zha1;~Tuo_Zhao2", "aff": "Georgia Institute of Technology;Amazon;;Amazon;", "aff_domain": "gatech.edu;amazon.com;;amazon.com;", "position": "PhD student;Researcher;;Researcher;", "bibtex": "@inproceedings{\nzhang2023efficient,\ntitle={Efficient Long-Range Transformers: You Need to Attend More, but Not Necessarily at Every Layer},\nauthor={Qingru Zhang and Dhananjay Ram and Cole Hawkins and Sheng Zha and Tuo Zhao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rI7ebWPRLr}\n}", "github": "", "project": "", "reviewers": "THaz;xgsE;F2Z8", "site": "https://openreview.net/forum?id=rI7ebWPRLr", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;4;3", "excitement": "3;4;4", "reproducibility": "5;4;5", "correctness": "4;4;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.666666666666667, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "qingru-zhang-4b789a187;;;shengzha/;", "aff_unique_index": "0;1;1", "aff_unique_norm": "Georgia Institute of Technology;Amazon", "aff_unique_dep": ";Amazon.com, Inc.", "aff_unique_url": "https://www.gatech.edu;https://www.amazon.com", "aff_unique_abbr": "Georgia Tech;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "rIc17Kziiq", "title": "Larger Probes Tell a Different Story: Extending Psycholinguistic Datasets Via In-Context Learning", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Language model probing is often used to test specific capabilities of models. However, conclusions from such studies may be limited when the probing benchmarks are small and lack statistical power. In this work, we introduce new, larger datasets for negation (NEG-1500-SIMP) and role reversal (ROLE-1500) inspired by psycholinguistic studies. We dramatically extend existing NEG-136 and ROLE-88 benchmarks using GPT3, increasing their size from 18 and 44 sentence pairs to 750 each. We also create another version of extended negation dataset (NEG-1500-SIMP-TEMP), created using template-based generation. It consists of 770 sentence pairs. We evaluate 22 models on the extended datasets, seeing model performance dip 20-57% compared to the original smaller benchmarks. We observe high levels of negation sensitivity in models like BERT and ALBERT demonstrating that previous findings might have been skewed due to smaller test sets. Finally, we observe that while GPT3 has generated all the examples in ROLE-1500 is only able to solve 24.6% of them during probing. The datasets and code are available on Github.", "keywords": "Psycholinguistic datasets;Negation;Role Reversal;Larger Dataset;ICL", "primary_area": "", "supplementary_material": "", "author": "Namrata Shivagunde;Vladislav Lialin;Anna Rumshisky", "authorids": "~Namrata_Shivagunde1;~Vladislav_Lialin1;~Anna_Rumshisky1", "gender": "F;;Not Specified", "homepage": "https://text-machine.cs.uml.edu/lab2/people/nshivagunde/;http://vladlialin.com;http://text-machine.cs.uml.edu", "dblp": "320/5719;https://dblp.uni-trier.de/pid/251/5456;63/873", "google_scholar": "RWNUtkkAAAAJ;B1Ijov0AAAAJ;https://scholar.google.com.tw/citations?user=_Q1uzVYAAAAJ", "or_profile": "~Namrata_Shivagunde1;~Vladislav_Lialin1;~Anna_Rumshisky1", "aff": "Department of Computer Science, University of Massachusetts at Lowell;University of Massachusetts, Lowell;University of Massachusetts, Lowell", "aff_domain": "cs.umass.edu;uml.edu;uml.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nshivagunde2023larger,\ntitle={Larger Probes Tell a Different Story: Extending Psycholinguistic Datasets Via In-Context Learning},\nauthor={Namrata Shivagunde and Vladislav Lialin and Anna Rumshisky},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rIc17Kziiq}\n}", "github": "", "project": "", "reviewers": "Vfwk;qAnt;yjSX", "site": "https://openreview.net/forum?id=rIc17Kziiq", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;5;3", "excitement": "4;5;3", "reproducibility": "4;5;4", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "namrata-shivagunde-b5823ba3/;vlialin/;", "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Massachusetts at Lowell;University of Massachusetts Lowell", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.uml.edu;https://www.uml.edu", "aff_unique_abbr": "UMass Lowell;UMass Lowell", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Lowell", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "rJXYb7D4ck", "title": "Tagging-Assisted Generation Model with Encoder and Decoder Supervision for Aspect Sentiment Triplet Extraction", "track": "main", "status": "Long Main", "tldr": "", "abstract": "ASTE (Aspect Sentiment Triplet Extraction) has gained increasing attention. Recent advancements in the ASTE task have been primarily driven by Natural Language Generation-based (NLG) approaches. However, most NLG methods overlook the supervision of the encoder-decoder hidden representations and fail to fully utilize the semantic information provided by the labels to enhance supervision. These limitations can hinder the extraction of implicit aspects and opinions. To address these challenges, we propose a tagging-assisted generation model with encoder and decoder supervision (TAGS), which enhances the supervision of the encoder and decoder through multiple-perspective tagging assistance and label semantic representations.\n Specifically, TAGS enhances the generation task by integrating an additional sequence tagging task, which improves the encoder's capability to distinguish the words of triplets. Moreover, it utilizes sequence tagging probabilities to guide the decoder, improving the generated content's quality.\n Furthermore, TAGS employs a self-decoding process for labels to acquire the semantic representations of the labels and aligns the decoder's hidden states with these semantic representations, thereby achieving enhanced semantic supervision for the decoder's hidden states. Extensive experiments on various public benchmarks demonstrate that TAGS achieves state-of-the-art performance.", "keywords": "ABSA; ASTE; sentiment analysis; generation seq2seq model; sequence tagging; semantic alignment", "primary_area": "", "supplementary_material": "", "author": "Luo Xianlong;Meng Yang;Yihao Wang", "authorids": "~Luo_Xianlong1;~Meng_Yang5;~Yihao_Wang1", "gender": "M;M;M", "homepage": "https://github.com/lxianl455;http://www.smartllv.com/;https://github.com/xbdxwyh?tab=repositories", "dblp": ";44/2761-1;", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;", "or_profile": "~Luo_Xianlong1;~Meng_Yang5;~Yihao_Wang1", "aff": "sun yet sun;SUN YAT-SEN UNIVERSITY, School of Computer Science and Engineering;Sun Yat-Sen University", "aff_domain": "sysu.edu;sysu.edu.cn;mail.sysu.edu.cn", "position": "MS student;Associate Professor;PhD student", "bibtex": "@inproceedings{\nxianlong2023taggingassisted,\ntitle={Tagging-Assisted Generation Model with Encoder and Decoder Supervision for Aspect Sentiment Triplet Extraction},\nauthor={Luo Xianlong and Meng Yang and Yihao Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rJXYb7D4ck}\n}", "github": "", "project": "", "reviewers": "Uz23;3TTV;Gsym", "site": "https://openreview.net/forum?id=rJXYb7D4ck", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;4", "excitement": "4;4;4", "reproducibility": "4;3;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-1624-3707;;0009-0008-1786-2738", "linkedin": ";;", "aff_unique_index": "1;1", "aff_unique_norm": ";Sun Yat-sen University", "aff_unique_dep": ";School of Computer Science and Engineering", "aff_unique_url": ";http://www.sysu.edu.cn/", "aff_unique_abbr": ";SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "1;1", "aff_country_unique": ";China" }, { "id": "rJhk7Fpnvh", "title": "Sources of Hallucination by Large Language Models on Inference Tasks", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large Language Models (LLMs) are claimed to be capable of Natural Language Inference (NLI), necessary for applied tasks like question answering and summarization. We present a series of behavioral studies on several LLM families (LLaMA, GPT-3.5, and PaLM) which probe their behavior using controlled experiments. We establish two biases originating from pretraining which predict much of their behavior, and show that these are major sources of hallucination in generative LLMs. First, memorization at the level of sentences: we show that, regardless of the premise, models falsely label NLI test samples as entailing when the hypothesis is attested in training data, and that entities are used as \"indices\" to access the memorized data. Second, statistical patterns of usage learned at the level of corpora: we further show a similar effect when the premise predicate is less frequent than that of the hypothesis in the training data, a bias following from previous studies. We demonstrate that LLMs perform significantly worse on NLI test samples which do not conform to these biases than those which do, and we offer these as valuable controls for future LLM evaluation.", "keywords": "LLM;LM;language model;hallucination;natural language inference;NLI;entailment;directional;attestation;relative frequency;predicate", "primary_area": "", "supplementary_material": "", "author": "Nick McKenna;Tianyi Li;Liang Cheng;Mohammad Javad Hosseini;Mark Johnson;Mark Steedman", "authorids": "~Nick_McKenna1;~Tianyi_Li2;~Liang_Cheng3;~Mohammad_Javad_Hosseini2;~Mark_Johnson8;~Mark_Steedman1", "gender": "M;M;M;M;M;M", "homepage": "https://nmckenna.me/;https://teddy-li.github.io/index.html;https://github.com/PonyCheng;https://homepages.inf.ed.ac.uk/shossein/index.html;http://web.science.mq.edu.au/~mjohnson/;https://homepages.inf.ed.ac.uk/steedman/", "dblp": "290/1567;;;;https://dblp.uni-trier.de/pers/hd/j/Johnson_0001:Mark;s/MarkSteedman", "google_scholar": "9MzRL9YAAAAJ;1HtnSIkAAAAJ;aqwszn0AAAAJ;https://scholar.google.co.uk/citations?hl=en;Z_kok3sAAAAJ;ccCd0_YAAAAJ", "or_profile": "~Nick_McKenna1;~Tianyi_Li2;~Liang_Cheng3;~Mohammad_Javad_Hosseini2;~Mark_Johnson8;~Mark_Steedman1", "aff": "University of Edinburgh, University of Edinburgh;University of Edinburgh;University of Edinburgh, University of Edinburgh;Google;Macquarie University;University of Edinburgh", "aff_domain": "ed.ac.uk;ed.ac.uk;ed.ac.uk;google.com;mq.edu.au;ed.ac.uk", "position": "PhD student;PhD student;PhD student;Researcher;Full Professor;Full Professor", "bibtex": "@inproceedings{\nmckenna2023sources,\ntitle={Sources of Hallucination by Large Language Models on Inference Tasks},\nauthor={Nick McKenna and Tianyi Li and Liang Cheng and Mohammad Javad Hosseini and Mark Johnson and Mark Steedman},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rJhk7Fpnvh}\n}", "github": "", "project": "", "reviewers": "oXrA;4oz7;Fdep", "site": "https://openreview.net/forum?id=rJhk7Fpnvh", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "3;4;4", "reproducibility": "4;4;4", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";tianyi-li-a96506143/?originalSubdomain=uk;;mohammad-javad-hosseini-20297678/;;", "aff_unique_index": "0;0;0;1;2;0", "aff_unique_norm": "University of Edinburgh;Google;Macquarie University", "aff_unique_dep": ";Google;", "aff_unique_url": "https://www.ed.ac.uk;https://www.google.com;https://www.mq.edu.au", "aff_unique_abbr": "Edinburgh;Google;MQ", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;1;2;0", "aff_country_unique": "United Kingdom;United States;Australia" }, { "id": "rKjzOYrXKd", "title": "GRENADE: Graph-Centric Language Model for Self-Supervised Representation Learning on Text-Attributed Graphs", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Self-supervised representation learning on text-attributed graphs, which aims to create expressive and generalizable representations for various downstream tasks, has received increasing research attention lately. However, existing methods either struggle to capture the full extent of structural context information or rely on task-specific training labels, which largely hampers their effectiveness and generalizability in practice. To solve the problem of self-supervised representation learning on text-attributed graphs, we develop a novel Graph-Centric Language model -- GRENADE. \nSpecifically, GRENADE harnesses the synergy of both pre-trained language model and graph neural network by optimizing with two specialized self-supervised learning algorithms: graph-centric contrastive learning and graph-centric knowledge alignment. The proposed graph-centric self-supervised learning algorithms effectively help GRENADE to capture informative textual semantics as well as structural context information on text-attributed graphs. Through extensive experiments, GRENADE shows its superiority over state-of-the-art methods.", "keywords": "textual attributed graph;text rich network;representation learning;graph neural network;language model", "primary_area": "", "supplementary_material": "", "author": "Yichuan Li;Kaize Ding;Kyumin Lee", "authorids": "~Yichuan_Li3;~Kaize_Ding1;~Kyumin_Lee1", "gender": ";M;M", "homepage": ";https://kaize0409.github.io/;https://web.cs.wpi.edu/~kmlee/", "dblp": "216/7478-1.html;234/6878;https://dblp.uni-trier.de/pid/22/8024.html", "google_scholar": "lLvYmOwAAAAJ;PI3myr8AAAAJ;zQKRsSEAAAAJ", "or_profile": "~Yichuan_Li3;~Kaize_Ding1;~Kyumin_Lee1", "aff": "Worcester Polytechnic Institute;Arizona State University;Worcester Polytechnic Institute", "aff_domain": "wpi.edu;asu.edu;wpi.edu", "position": "PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nli2023grenade,\ntitle={{GRENADE}: Graph-Centric Language Model for Self-Supervised Representation Learning on Text-Attributed Graphs},\nauthor={Yichuan Li and Kaize Ding and Kyumin Lee},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rKjzOYrXKd}\n}", "github": "", "project": "", "reviewers": "6SrM;esjL;P3Mr", "site": "https://openreview.net/forum?id=rKjzOYrXKd", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;4;4", "excitement": "4;3;3", "reproducibility": "3;4;4", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 9, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Worcester Polytechnic Institute;Arizona State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.wpi.edu;https://www.asu.edu", "aff_unique_abbr": "WPI;ASU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "rLx2eDYcMK", "title": "VivesDebate-Speech: A Corpus of Spoken Argumentation to Leverage Audio Features for Argument Mining", "track": "main", "status": "Short Main", "tldr": "", "abstract": "In this paper, we describe VivesDebate-Speech, a corpus of spoken argumentation created to leverage audio features for argument mining tasks. The creation of this corpus represents an important contribution to the intersection of speech processing and argument mining communities, and one of the most complete publicly available resources in this topic. Moreover, we have performed a set of first-of-their-kind experiments which show an improvement when integrating audio features into the argument mining pipeline. The provided results can be used as a baseline for future research.", "keywords": "Argument Mining;Argument Segmentation;Argument Classification;Speech Corpus", "primary_area": "", "supplementary_material": "", "author": "Ramon Ruiz-Dolz;Javier Iranzo Sanchez", "authorids": "~Ramon_Ruiz-Dolz1;~Javier_Iranzo_Sanchez1", "gender": "Not Specified;", "homepage": "https://raruidol.github.io;", "dblp": "242/1924;", "google_scholar": "https://scholar.google.es/citations?user=nHVCVWgAAAAJ;", "or_profile": "~Ramon_Ruiz-Dolz1;~Javier_Iranzo_Sanchez1", "aff": "UPV, Universidad Polit\u00e9cnica de Valencia;", "aff_domain": "dsic.upv.es;", "position": "PhD student;", "bibtex": "@inproceedings{\nruiz-dolz2023vivesdebatespeech,\ntitle={VivesDebate-Speech: A Corpus of Spoken Argumentation to Leverage Audio Features for Argument Mining},\nauthor={Ramon Ruiz-Dolz and Javier Iranzo Sanchez},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rLx2eDYcMK}\n}", "github": "", "project": "", "reviewers": "s6XU;hNqj;N8m1", "site": "https://openreview.net/forum?id=rLx2eDYcMK", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "4;4;3", "reproducibility": "3;4;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0", "aff_unique_norm": "Universidad Polit\u00e9cnica de Valencia", "aff_unique_dep": "", "aff_unique_url": "https://www.upv.es", "aff_unique_abbr": "UPV", "aff_country_unique_index": "0", "aff_country_unique": "Spain" }, { "id": "rMLnxh4oT5", "title": "CASE: Commonsense-Augmented Score with an Expanded Answer Space", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "LLMs have demonstrated impressive zero-shot performance on NLP tasks thanks to the knowledge they acquired in their training. In multiple-choice QA tasks, the LM probabilities are used as an imperfect measure of the plausibility of each answer choice. One of the major limitations of the basic score is that it treats all words as equally important. We propose CASE, a Commonsense-Augmented Score with an Expanded Answer Space. CASE addresses this limitation by assigning importance weights for individual words based on their semantic relations to other words in the input. The dynamic weighting approach outperforms basic LM scores, not only because it reduces noise from unimportant words, but also because it informs the model of implicit commonsense knowledge that may be useful for answering the question. We then also follow prior work in expanding the answer space by generating lexically-divergent answers that are conceptually-similar to the choices. When combined with answer space expansion, our method outperforms strong baselines on 5 commonsense benchmarks. We further show these two approaches are complementary and may be especially beneficial when using smaller LMs.", "keywords": "Commonsense Reasoning;Question Answering;Language Model;Zero-shot", "primary_area": "", "supplementary_material": "", "author": "Wenkai Chen;Sahithya Ravi;Vered Shwartz", "authorids": "~Wenkai_Chen3;~Sahithya_Ravi1;~Vered_Shwartz1", "gender": "M;F;F", "homepage": "https://wk-chen.github.io/;https://sahithyaravi.github.io/;https://www.cs.ubc.ca/~vshwartz/", "dblp": ";236/5862;166/2038", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;mlWmdUsAAAAJ;bbe4ResAAAAJ", "or_profile": "~Wenkai_Chen3;~Sahithya_Ravi1;~Vered_Shwartz1", "aff": "Utrecht University;University of British Columbia;University of British Columbia", "aff_domain": "uu.nl;cs.ubc.ca;ubc.ca", "position": "MS student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nchen2023case,\ntitle={{CASE}: Commonsense-Augmented Score with an Expanded Answer Space},\nauthor={Wenkai Chen and Sahithya Ravi and Vered Shwartz},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rMLnxh4oT5}\n}", "github": "", "project": "", "reviewers": "8bm5;pNxV;sJkV;CJBH", "site": "https://openreview.net/forum?id=rMLnxh4oT5", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;4;3;3", "excitement": "2;4;4;3", "reproducibility": "3;4;4;3", "correctness": "4;4;3;3", "rating_avg": 4.0, "confidence_avg": 3.5, "excitement_avg": 3.25, "reproducibility_avg": 3.5, "correctness_avg": 3.5, "replies_avg": 13, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;vered-shwartz-99548633/", "aff_unique_index": "0;1;1", "aff_unique_norm": "Utrecht University;University of British Columbia", "aff_unique_dep": ";", "aff_unique_url": "https://www.uu.nl;https://www.ubc.ca", "aff_unique_abbr": "UU;UBC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Netherlands;Canada" }, { "id": "rQJAaOh4nr", "title": "Self-prompted Chain-of-Thought on Large Language Models for Open-domain Multi-hop Reasoning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In open-domain question-answering (ODQA), most existing questions require single-hop reasoning on commonsense. To further extend this task, we officially introduce open-domain multi-hop reasoning (ODMR) by answering multi-hop questions with explicit reasoning steps in open-domain setting. Recently, large language models (LLMs) have found significant utility in facilitating ODQA without external corpus. Furthermore, chain-of-thought (CoT) prompting boosts the reasoning capability of LLMs to a greater extent with manual or automated paradigms. However, existing automated methods lack of quality assurance, while manual approaches suffer from limited scalability and poor diversity, hindering the capabilities of LLMs. In this paper, we propose Self-prompted Chain-of-Thought (SP-CoT), an automated framework to mass-produce high quality CoTs of LLMs, by LLMs and for LLMs. SP-CoT introduces an automated generation pipeline of high quality ODMR datasets, an adaptive sampler for in-context CoT selection and self-prompted inference via in-context learning. Extensive experiments on four multi-hop question-answering benchmarks show that our proposed SP-CoT not only significantly surpasses the previous SOTA methods on large-scale (175B) LLMs, but also nearly doubles the zero-shot performance of small-scale (13B) LLMs. Further analysis reveals the remarkable capability of SP-CoT to elicit direct and concise intermediate reasoning steps by recalling $\\sim$50\\% of intermediate answers on MuSiQue-Ans dataset.", "keywords": "Chain-of-Thought;Large Language Models;In-context-Learning;Open-domain question-answering;Multi-hop question-answering", "primary_area": "", "supplementary_material": "", "author": "Jinyuan Wang;Junlong Li;hai zhao", "authorids": "~Jinyuan_Wang1;~Junlong_Li1;~hai_zhao1", "gender": "M;;M", "homepage": "https://website.noewang.top;;http://bcmi.sjtu.edu.cn/~zhaohai/", "dblp": ";115/6031;25/1145-1.html", "google_scholar": ";UX7TpSYAAAAJ;https://scholar.google.com.tw/citations?user=4dU5KS0AAAAJ", "or_profile": "~Jinyuan_Wang1;~Junlong_Li1;~hai_zhao1", "aff": "Shanghai Jiaotong University;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;sjtu.edu.cn;sjtu.edu.cn", "position": "MS student;MS student;Full Professor", "bibtex": "@inproceedings{\nwang2023selfprompted,\ntitle={Self-prompted Chain-of-Thought on Large Language Models for Open-domain Multi-hop Reasoning},\nauthor={Jinyuan Wang and Junlong Li and hai zhao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rQJAaOh4nr}\n}", "github": "", "project": "", "reviewers": "mWKK;K9QY;4LDF", "site": "https://openreview.net/forum?id=rQJAaOh4nr", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;1;5", "excitement": "4;3;3", "reproducibility": "3;3;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "jinyuan-wang-5141621ba/;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "rRwPzcSFeL", "title": "TrueTeacher: Learning Factual Consistency Evaluation with Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Factual consistency evaluation is often conducted using Natural Language Inference (NLI) models, yet these models exhibit limited success in evaluating summaries. Previous work improved such models with synthetic training data. However, the data is typically based on perturbed human-written summaries, which often differ in their characteristics from real model-generated summaries and have limited coverage of possible factual errors. Alternatively, large language models (LLMs) have recently shown promising results in directly evaluating generative tasks, but are too computationally expensive for practical use. Motivated by these limitations, we introduce TrueTeacher, a method for generating synthetic data by annotating diverse model-generated summaries using a LLM. Unlike prior work, TrueTeacher does not rely on human-written summaries, and is multilingual by nature. Experiments on the TRUE benchmark show that a student model trained using our data, substantially outperforms both the state-of-the-art model with similar capacity, and the LLM teacher. In a systematic study, we compare TrueTeacher to existing synthetic data generation methods and demonstrate its superiority and robustness to domain-shift. We also show that our method generalizes to multilingual scenarios. Lastly, we release our large scale synthetic dataset (1.4M examples), generated using TrueTeacher, and a checkpoint trained on this data.", "keywords": "factuality;attribution;consistency;hallucinations", "primary_area": "", "supplementary_material": "", "author": "Zorik Gekhman;Jonathan Herzig;Roee Aharoni;Chen Elkind;Idan Szpektor", "authorids": "~Zorik_Gekhman1;~Jonathan_Herzig2;~Roee_Aharoni1;~Chen_Elkind1;~Idan_Szpektor1", "gender": "M;M;M;;", "homepage": ";https://jonathanherzig.github.io/;http://www.roeeaharoni.com;;", "dblp": "275/3280;133/3687.html;148/9506;;15/6513", "google_scholar": "c748UcIAAAAJ;https://scholar.google.co.il/citations?view_op=list_works;https://scholar.google.co.il/citations?user=wV0mHWgAAAAJ;https://scholar.google.com/citations?view_op=list_works;XI2CP68AAAAJ", "or_profile": "~Zorik_Gekhman1;~Jonathan_Herzig2;~Roee_Aharoni1;~Chen_Elkind1;~Idan_Szpektor1", "aff": "Technion, Technion;Research, Google;Google;Research, Google;Google", "aff_domain": "technion.ac.il;research.google.com;google.com;research.google.com;google.com", "position": "MS student;Researcher;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\ngekhman2023trueteacher,\ntitle={TrueTeacher: Learning Factual Consistency Evaluation with Large Language Models},\nauthor={Zorik Gekhman and Jonathan Herzig and Roee Aharoni and Chen Elkind and Idan Szpektor},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rRwPzcSFeL}\n}", "github": "", "project": "", "reviewers": "TpWY;LjUF;AJ8x", "site": "https://openreview.net/forum?id=rRwPzcSFeL", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "4;3;3", "reproducibility": "3;0;3", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;roeeaharoni;;", "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Technion - Israel Institute of Technology;Google", "aff_unique_dep": ";Google Research", "aff_unique_url": "https://www.technion.ac.il/en/;https://research.google", "aff_unique_abbr": "Technion;Google", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "Israel;United States" }, { "id": "rTAIgZe3wo", "title": "ACTOR: Active Learning with Annotator-specific Classification Heads to Embrace Human Label Variation", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Label aggregation such as majority voting is commonly used to resolve annotator disagreement in dataset creation. \nHowever, this may disregard minority values and opinions.\nRecent studies indicate that learning from individual annotations outperforms learning from aggregated labels, though they require a considerable amount of annotation.\nActive learning, as an annotation cost-saving strategy, has not been fully explored in the context of learning from disagreement. \nWe show that in the active learning setting, a multi-head model performs significantly better than a single-head model in terms of uncertainty estimation.\nBy designing and evaluating acquisition functions with annotator-specific heads on two datasets, we show that group-level entropy works generally well on both datasets. \nImportantly, it achieves performance in terms of both prediction and uncertainty estimation comparable to full-scale training from disagreement, while saving 70\\% of the annotation budget.", "keywords": "Active Learning;Human Label Variation;Multi-task Learning", "primary_area": "", "supplementary_material": "", "author": "Xinpeng Wang;Barbara Plank", "authorids": "~Xinpeng_Wang3;~Barbara_Plank2", "gender": "M;", "homepage": "https://xinpeng-wang.github.io/;https://bplank.github.io/", "dblp": "156/1668-3;46/521", "google_scholar": "QcNNM2YAAAAJ;", "or_profile": "~Xinpeng_Wang3;~Barbara_Plank2", "aff": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;IT University of Copenhagen", "aff_domain": "lmu.de;itu.dk", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nwang2023actor,\ntitle={{ACTOR}: Active Learning with Annotator-specific Classification Heads to Embrace Human Label Variation},\nauthor={Xinpeng Wang and Barbara Plank},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rTAIgZe3wo}\n}", "github": "", "project": "", "reviewers": "9qCH;vzgW;LA7Z", "site": "https://openreview.net/forum?id=rTAIgZe3wo", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "3;3;3", "reproducibility": "4;4;4", "correctness": "3;2;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 2.6666666666666665, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": "xinpeng-wang-9177b510b/;", "aff_unique_index": "0;1", "aff_unique_norm": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;IT University of Copenhagen", "aff_unique_dep": ";", "aff_unique_url": "https://www.lmu.de;https://itu.dk", "aff_unique_abbr": "LMU;ITU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Germany;Denmark" }, { "id": "rVsnAmxnR9", "title": "Ask To The Point: Open-Domain Entity-Centric Question Generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We introduce a new task called *entity-centric question generation* (ECQG), motivated by real-world applications such as topic-specific learning, assisted reading, and fact-checking. The task aims to generate questions from an entity perspective. To solve ECQG, we propose a coherent PLM-based framework GenCONE with two novel modules: content focusing and question verification. The content focusing module first identifies a focus as \"what to ask\" to form draft questions, and the question verification module refines the questions afterwards by verifying the answerability. We also construct a large-scale open-domain dataset from SQuAD to support this task. Our extensive experiments demonstrate that GenCONE significantly and consistently outperforms various baselines, and two modules are effective and complementary in generating high-quality questions.", "keywords": "Question Generation;Entity-Centric;Multi-Task", "primary_area": "", "supplementary_material": "", "author": "Yuxiang Liu;Jie Huang;Kevin Chang", "authorids": "~Yuxiang_Liu3;~Jie_Huang3;~Kevin_Chang1", "gender": ";;M", "homepage": ";https://jeffhj.github.io/;https://siebelschool.illinois.edu/about/people/faculty/kcchang", "dblp": ";29/6643-9;c/KCCChang", "google_scholar": ";GIoPkMoAAAAJ;https://scholar.google.com.tw/citations?user=sugWZ6MAAAAJ", "or_profile": "~Yuxiang_Liu3;~Jie_Huang3;~Kevin_Chang1", "aff": ";University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign", "aff_domain": ";illinois.edu;illinois.edu", "position": ";PhD student;Full Professor", "bibtex": "@inproceedings{\nliu2023ask,\ntitle={Ask To The Point: Open-Domain Entity-Centric Question Generation},\nauthor={Yuxiang Liu and Jie Huang and Kevin Chang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rVsnAmxnR9}\n}", "github": "", "project": "", "reviewers": "GFDG;G3ne;LJfA", "site": "https://openreview.net/forum?id=rVsnAmxnR9", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "2;4;3", "reproducibility": "3;4;4", "correctness": "2;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-0997-6803", "linkedin": ";jie-huang-4b0104151/;", "aff_unique_index": "0;0", "aff_unique_norm": "University of Illinois Urbana-Champaign", "aff_unique_dep": "", "aff_unique_url": "https://illinois.edu", "aff_unique_abbr": "UIUC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "rXn9WO4M2p", "title": "Self-Influence Guided Data Reweighting for Language Model Pre-training", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Language Models (LMs) pre-trained with selfsupervision on large text corpora have become the default starting point for developing models for various NLP tasks. Once the pre-training corpus has been assembled, all data samples in the corpus are treated with equal importance during LM pre-training. However, due to varying levels of relevance and quality of data, equal importance to all the data samples may not be the optimal choice. While data reweighting has been explored in the context of task-specific supervised learning and LM fine-tuning, model-driven reweighting for pretraining data has not been explored. We fill this important gap and propose PRESENCE, a method for jointly reweighting samples by leveraging self-influence (SI) scores as an indicator of sample importance and pre-training. PRESENCE promotes novelty and stability for model pre-training. Through extensive analysis spanning multiple model sizes, datasets, and tasks, we present PRESENCE as an important first step in the research direction of sample reweighting for pre-training language models.", "keywords": "pre-training;multilingual pretraining;self-influence;language models;data reweighting", "primary_area": "", "supplementary_material": "", "author": "Megh Thakkar;Tolga Bolukbasi;Sriram Ganapathy;Shikhar Vashishth;Sarath Chandar;Partha Talukdar", "authorids": "~Megh_Thakkar1;~Tolga_Bolukbasi1;~Sriram_Ganapathy1;~Shikhar_Vashishth1;~Sarath_Chandar1;~Partha_Talukdar1", "gender": "M;;M;;M;M", "homepage": "http://megh-thakkar.github.io;;http://leap.ee.iisc.ac.in/sriram/;;http://sarathchandar.in/;https://parthatalukdar.github.io/", "dblp": "92/6840;150/4230;23/4298.html;;45/8542;282/0169.html", "google_scholar": ";3rF9gtAAAAAJ;cgpzrtcAAAAJ;;https://scholar.google.co.in/citations?user=yxWtZLAAAAAJ;https://scholar.google.com.tw/citations?user=CIZwXAcAAAAJ", "or_profile": "~Megh_Thakkar1;~Tolga_Bolukbasi1;~Sriram_Ganapathy1;~Shikhar_Vashishth1;~Sarath_Chandar1;~Partha_Talukdar1", "aff": "Universit\u00e9 de Montr\u00e9al;Google;Google DeepMind;;\u00c9cole Polytechnique de Montr\u00e9al;Indian Institute of Science, Bangalore", "aff_domain": "umontreal.ca;google.com;google.com;;polymtl.ca;iisc.ac.in", "position": "MS student;Software Engineer;Researcher;;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nthakkar2023selfinfluence,\ntitle={Self-Influence Guided Data Reweighting for Language Model Pre-training},\nauthor={Megh Thakkar and Tolga Bolukbasi and Sriram Ganapathy and Shikhar Vashishth and Sarath Chandar and Partha Talukdar},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rXn9WO4M2p}\n}", "github": "", "project": "", "reviewers": "nK5X;YoT7;8mAv", "site": "https://openreview.net/forum?id=rXn9WO4M2p", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;5;4", "excitement": "3;3;4", "reproducibility": "4;4;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 4.666666666666667, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": "Megh-Thakkar;;;;;", "aff_unique_index": "0;1;1;2;3", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;Google;\u00c9cole Polytechnique de Montr\u00e9al;Indian Institute of Science", "aff_unique_dep": ";Google;;", "aff_unique_url": "https://www.umontreal.ca;https://www.google.com;https://www.polymtl.ca;https://www.iisc.ac.in", "aff_unique_abbr": "UdeM;Google;Polytechnique Montr\u00e9al;IISc", "aff_campus_unique_index": "1;2;3", "aff_campus_unique": ";Mountain View;Montr\u00e9al;Bangalore", "aff_country_unique_index": "0;1;2;0;3", "aff_country_unique": "Canada;United States;United Kingdom;India" }, { "id": "rbaK24KnIO", "title": "Combining Denoising Autoencoders with Contrastive Learning to fine-tune Transformer Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recently, using large pre-trained Transformer models for transfer learning tasks has evolved to the point where they have become one of the\nflagship trends in the Natural Language Processing (NLP) community, giving rise to various outlooks such as prompt-based, adapters, or combinations with unsupervised approaches, among many others. In this work, we propose a 3-Phase technique to adjust a base model for a classification task. First, we adapt the model\u2019s signal to the data distribution by performing further training with a Denoising Autoencoder (DAE). Second, we adjust the representation space of the output to the corresponding classes by clustering through a Contrastive Learning (CL) method. In addition, we introduce a new data augmentation approach for Supervised Contrastive Learning to correct the unbalanced datasets. Third, we apply fine-tuning to delimit the predefined categories. These different phases provide relevant and complementary knowledge to the model to learn the final task. We supply extensive experimental results on several datasets to demonstrate these claims. Moreover, we include an ablation study and compare the proposed method against other ways of combining these techniques.", "keywords": "transfer learning;contrastive learning;denoising autoencoders;NLU", "primary_area": "", "supplementary_material": "", "author": "Alejo Lopez-Avila;V\u00edctor Su\u00e1rez-Paniagua", "authorids": "~Alejo_Lopez-Avila1;~V\u00edctor_Su\u00e1rez-Paniagua1", "gender": "M;M", "homepage": ";https://linkedin.com/in/victorsuarezpaniagua", "dblp": ";184/8336.html", "google_scholar": "8x0SoaYAAAAJ;u8Ia51gAAAAJ", "or_profile": "~Alejo_Lopez-Avila1;~V\u00edctor_Su\u00e1rez-Paniagua1", "aff": "Huawei Technologies Ltd.;Huawei Technologies Ltd.", "aff_domain": "huawei.com;huawei.com", "position": "Researcher;Researcher", "bibtex": "@inproceedings{\nlopez-avila2023combining,\ntitle={Combining Denoising Autoencoders with Contrastive Learning to fine-tune Transformer Models},\nauthor={Alejo Lopez-Avila and V{\\'\\i}ctor Su{\\'a}rez-Paniagua},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rbaK24KnIO}\n}", "github": "", "project": "", "reviewers": "j83o;Nkv3;oYVS", "site": "https://openreview.net/forum?id=rbaK24KnIO", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;2;4", "excitement": "2;4;4", "reproducibility": "4;4;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 9, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-6036-5322", "linkedin": ";https://linkedin.com/in/victorsuarezpaniagua", "aff_unique_index": "0;0", "aff_unique_norm": "Huawei", "aff_unique_dep": "Huawei Technologies", "aff_unique_url": "https://www.huawei.com", "aff_unique_abbr": "Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "rd0C4kD0o4", "title": "Efficient Latent Variable Modeling for Knowledge-Grounded Dialogue Generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Knowledge-grounded dialogue generation requires first retrieving appropriate external knowledge based on a conversational context and then generating a response grounded on the retrieved knowledge. In general, these two sequential modules, a knowledge retriever and a response generator, have been separately trained in a supervised manner. However, obtaining intermediate labels of the ground-truth knowledge is expensive, especially in open-domain conversations. Latent variable modeling avoids this need for the labels. In this paper, we propose an efficient algorithm for this latent variable modeling that is able to leverage a large amount of dialogue data. Rather than directly training the complex retriever, we adapt a query generator with an off-the-shelf retriever, and the query generator and response generator are simultaneously trained over the latent variable of query. Moreover, we employ lower bound of the evidence as a training objective and modify it to robustly perform the joint training. Experimental results on diverse knowledge-grounded dialogue datasets show that the proposed algorithm significantly outperforms the supervised learning algorithm even without the use of the annotated knowledge while maintaining efficiency and scalability.", "keywords": "knowledge-grounded dialogue generation", "primary_area": "", "supplementary_material": "", "author": "Gunsoo Han;Daejin Jo;Daniel Wontae Nam;Eunseop Yoon;Taehwan Kwon;Seungeun Rho;Kyoung-Woon On;Chang D. Yoo;Sungwoong Kim", "authorids": "~Gunsoo_Han1;~Daejin_Jo1;~Daniel_Wontae_Nam1;~Eunseop_Yoon1;~Taehwan_Kwon1;~Seungeun_Rho1;~Kyoung-Woon_On1;~Chang_D._Yoo1;~Sungwoong_Kim2", "gender": "M;M;M;F;;M;M;M;M", "homepage": "https://github.com/robinsongh381;;;https://esyoon7.github.io/;https://github.com/TaehwanKwon;;;https://sanctusfactory.com/family.php;", "dblp": ";264/5824;242/3858.html;331/3764;;239/5265;175/0873;31/7819;74/8063", "google_scholar": ";;;QbEnxx0AAAAJ;;;;gFWgUQEAAAAJ;https://scholar.google.co.kr/citations?user=3DSA90AAAAAJ", "or_profile": "~Gunsoo_Han1;~Daejin_Jo1;~Daniel_Wontae_Nam1;~Eunseop_Yoon1;~Taehwan_Kwon1;~Seungeun_Rho1;~Kyoung-Woon_On1;~Chang_D._Yoo1;~Sungwoong_Kim2", "aff": "Kakao Brain;Kakao Brain;Kakao Brain Corp.;KAIST;;Georgia Institute of Technology;Kakaobrain;Korea Advanced Institute of Science & Technology;Kakao Brain", "aff_domain": "kakaobrain.com;kakaobrain.com;kakaobrain.com;ee.kaist.ac.kr;;gatech.edu;kakaobrain.com;kaist.ac.kr;kakaobrain.com", "position": "Researcher;Researcher;Researcher;MS student;;PhD student;Researcher;Full Professor;Research Scientist", "bibtex": "@inproceedings{\nhan2023efficient,\ntitle={Efficient Latent Variable Modeling for Knowledge-Grounded Dialogue Generation},\nauthor={Gunsoo Han and Daejin Jo and Daniel Wontae Nam and Eunseop Yoon and Taehwan Kwon and Seungeun Rho and Kyoung-Woon On and Chang D. Yoo and Sungwoong Kim},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rd0C4kD0o4}\n}", "github": "", "project": "", "reviewers": "UuJj;ivGY;unbb;doRo", "site": "https://openreview.net/forum?id=rd0C4kD0o4", "pdf_size": 0, "rating": "2;2;2;2", "confidence": "4;2;4;4", "excitement": "2;3;3;2", "reproducibility": "4;3;3;3", "correctness": "2;3;3;3", "rating_avg": 2.0, "confidence_avg": 3.5, "excitement_avg": 2.5, "reproducibility_avg": 3.25, "correctness_avg": 2.75, "replies_avg": 14, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-5580-5354;;;;0000-0002-0756-7179;", "linkedin": ";;daniel-w-nam;;;seungeun-rho-2943a0179/;;;", "aff_unique_index": "0;0;0;1;2;0;1;0", "aff_unique_norm": "Kakao Brain;Korea Advanced Institute of Science and Technology;Georgia Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://brain.kakao.com;https://www.kaist.ac.kr;https://www.gatech.edu", "aff_unique_abbr": "Kakao Brain;KAIST;Georgia Tech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;0;0", "aff_country_unique": "South Korea;United States" }, { "id": "rgKfPzAF2j", "title": "Byte Pair Encoding for Symbolic Music", "track": "main", "status": "Long Main", "tldr": "", "abstract": "When used with deep learning, the symbolic music modality is often coupled with language model architectures. To do so, the music needs to be tokenized, i.e. converted into a sequence of discrete tokens. This can be achieved by different approaches, as music can be composed of simultaneous tracks, of simultaneous notes with several attributes. Until now, the proposed tokenizations rely on small vocabularies of tokens describing the note attributes and time events, resulting in fairly long token sequences, and a sub-optimal use of the embedding space of language models. Recent research has put efforts on reducing the overall sequence length by merging embeddings or combining tokens. In this paper, we show that Byte Pair Encoding, a compression technique widely used for natural language, significantly decreases the sequence length while increasing the vocabulary size. By doing so, we leverage the embedding capabilities of such models with more expressive tokens, resulting in both better results and faster inference in generation and classification tasks. The [source code is shared on Github](https://github.com/Natooz/bpe-symbolic-music), along with a [companion website](https://Natooz.github.io/BPE-Symbolic-Music). Finally, BPE is directly implemented in [MidiTok](https://github.com/Natooz/MidiTok), allowing the reader to easily benefit from this method.", "keywords": "Symbolic music;BPE;Music generation;MIR", "primary_area": "", "supplementary_material": "", "author": "Nathan Fradet;Nicolas Gutowski;Fabien Chhel;Jean-Pierre Briot", "authorids": "~Nathan_Fradet1;~Nicolas_Gutowski1;~Fabien_Chhel1;~Jean-Pierre_Briot2", "gender": "M;M;Not Specified;M", "homepage": "https://nathanfradet.com;https://ngutowski.fr/;;https://webia.lip6.fr/~briot/", "dblp": ";217/9115;;https://dblp.uni-trier.de/pid/b/JPBriot.html", "google_scholar": "YdSSbXoAAAAJ;https://scholar.google.fr/citations?user=sTdjSqoAAAAJ;sk1j5vMAAAAJ;eTrhekUAAAAJ", "or_profile": "~Nathan_Fradet1;~Nicolas_Gutowski1;~Fabien_Chhel1;~Jean-Pierre_Briot2", "aff": "Sorbonne University;University of Angers;Ecole Sup\u00e9rieure d'Electronique de l'Ouest;CNRS", "aff_domain": "sorbonne-universite.fr;univ-angers.fr;eseo.fr;cnrs.fr", "position": "PhD student;Associate Professor;Associate Professor;Principal Researcher", "bibtex": "@inproceedings{\nfradet2023byte,\ntitle={Byte Pair Encoding for Symbolic Music},\nauthor={Nathan Fradet and Nicolas Gutowski and Fabien Chhel and Jean-Pierre Briot},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rgKfPzAF2j}\n}", "github": "", "project": "", "reviewers": "ErZ1;ntvA;heEW;SeWr", "site": "https://openreview.net/forum?id=rgKfPzAF2j", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;4;2;3", "excitement": "3;4;4;4", "reproducibility": "4;4;4;4", "correctness": "4;4;3;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.75, "reproducibility_avg": 4.0, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-4729-570X;0000-0002-5765-9901;;0000-0003-1621-6335", "linkedin": "nathanfradet/;ngutowski/;;", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Sorbonne University;University of Angers;Ecole Sup\u00e9rieure d'Electronique de l'Ouest;Centre National de la Recherche Scientifique", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.sorbonne.universite.fr;https://www.univ-angers.fr;https://www.eseo.fr;https://www.cnrs.fr", "aff_unique_abbr": "Sorbonne;UA;ESEO;CNRS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "France" }, { "id": "rgos321qpD", "title": "PEFTDebias : Capturing debiasing information using PEFTs", "track": "main", "status": "Short Main", "tldr": "", "abstract": "The increasing use of foundation models highlights the urgent need to address and eliminate implicit biases present in them that arise during pretraining. In this paper, we introduce PEFTDebias, a novel approach that employs parameter-efficient fine-tuning (PEFT) to mitigate the biases within foundation models. PEFTDebias consists of two main phases: an upstream phase for acquiring debiasing parameters along a specific bias axis, and a downstream phase where these parameters are incorporated into the model and frozen during the fine-tuning process. By evaluating on four datasets across two bias axes namely gender and race, we find that downstream biases can be effectively reduced with PEFTs. In addition, we show that these parameters possess axis-specific debiasing characteristics, enabling their effective transferability in mitigating biases in various downstream tasks.", "keywords": "paramter-efficient finetuning (PEFT);bias mitigation;debias;bias;gender;group;language model;debiasing;LoRA debias;prompt debias;adapter debias", "primary_area": "", "supplementary_material": "", "author": "Sumit Agarwal;Aditya Srikanth Veerubhotla;Srijan Bansal", "authorids": "~Sumit_Agarwal2;~Aditya_Srikanth_Veerubhotla1;~Srijan_Bansal1", "gender": "M;M;M", "homepage": ";https://aditya-srikanth.github.io/;", "dblp": "134/6808;257/4265;", "google_scholar": "FhP8jyIAAAAJ;RIQF-UwAAAAJ;q-r7dUAAAAAJ", "or_profile": "~Sumit_Agarwal2;~Aditya_Srikanth_Veerubhotla1;~Srijan_Bansal1", "aff": "Carnegie Mellon University;School of Computer Science, Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "andrew.cmu.edu;cs.cmu.edu;cmu.edu", "position": "MS student;MS student;MS student", "bibtex": "@inproceedings{\nagarwal2023peftdebias,\ntitle={{PEFTD}ebias : Capturing debiasing information using {PEFT}s},\nauthor={Sumit Agarwal and Aditya Srikanth Veerubhotla and Srijan Bansal},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rgos321qpD}\n}", "github": "", "project": "", "reviewers": "xV67;A5DV;LNGT", "site": "https://openreview.net/forum?id=rgos321qpD", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "4;4;3", "reproducibility": "4;4;2", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";aditya-srikanth-veerubhotla/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "rhGh8jLOPd", "title": "MaXM: Towards Multilingual Visual Question Answering", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Visual Question Answering (VQA) has been primarily studied through the lens of the English language. Yet, tackling VQA in other languages in the same manner would require a considerable amount of resources. In this paper, we propose scalable solutions to multilingual visual question answering (mVQA), on both data and modeling fronts. We first propose a translation-based framework to mVQA data generation that requires much less human annotation efforts than the conventional approach of directly collection questions and answers. Then, we apply our framework to the multilingual captions in the Crossmodal-3600 dataset and develop an efficient annotation protocol to create MaXM, a test-only VQA benchmark in 7 diverse languages. Finally, we develop a simple, lightweight, and effective approach as well as benchmark state-of-the-art English and multilingual VQA models. We hope that our benchmark encourages further research on mVQA.", "keywords": "VQA;multilinguality", "primary_area": "", "supplementary_material": "", "author": "Soravit Changpinyo;Linting Xue;Michal Yarom;Ashish V Thapliyal;Idan Szpektor;Julien Amelot;Xi Chen;Radu Soricut", "authorids": "~Soravit_Changpinyo1;~Linting_Xue1;~Michal_Yarom1;~Ashish_V_Thapliyal1;~Idan_Szpektor1;~Julien_Amelot1;~Xi_Chen23;~Radu_Soricut2", "gender": "M;F;F;Not Specified;;;M;M", "homepage": "https://schangpi.github.io/;;;;;https://xchen147.github.io/;;", "dblp": "139/1319;143/0319.html;181/6577;42/4147;15/6513;16/3283;;74/1173", "google_scholar": "2TWx9x0AAAAJ;SuTADHAAAAAJ;GMVxiYgAAAAJ;1JtHXbAAAAAJ;XI2CP68AAAAJ;https://scholar.google.com/citations?hl=en;NAzD9mgAAAAJ;7CsenF4AAAAJ", "or_profile": "~Soravit_Changpinyo1;~Linting_Xue1;~Michal_Yarom1;~Ashish_V_Thapliyal1;~Idan_Szpektor1;~Xi_Chen23;~Radu_Soricut2;~Julien_Marc_Amelot1", "aff": "Google;;Research, Google;Google;Google;Google;Google;Research, Google", "aff_domain": "google.com;;research.google.com;google.com;google.com;google.com;google.com;research.google.com", "position": "Researcher;;Researcher;Research Software Engineer;Researcher;Researcher;Research Scientist;Researcher", "bibtex": "@inproceedings{\nchangpinyo2023maxm,\ntitle={Ma{XM}: Towards Multilingual Visual Question Answering},\nauthor={Soravit Changpinyo and Linting Xue and Michal Yarom and Ashish V Thapliyal and Idan Szpektor and Julien Amelot and Xi Chen and Radu Soricut},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rhGh8jLOPd}\n}", "github": "", "project": "", "reviewers": "jdG6;PM6i;NrNZ", "site": "https://openreview.net/forum?id=rhGh8jLOPd", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;3;4", "reproducibility": "3;4;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-4013-1190;;;0000-0002-7219-0515;;;;", "linkedin": "soravit-changpinyo-b6a35944;;;;;;;", "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google", "aff_unique_url": "https://www.google.com", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0;0;0;0;0;0;0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "rjDaTBwEBX", "title": "In What Languages are Generative Language Models the Most Formal? Analyzing Formality Distribution across Languages", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Multilingual generative language models (LMs) are increasingly fluent in a large variety of languages. Trained on the concatenation of corpora in multiple languages, they enable powerful transfer from high-resource languages to low-resource ones. However, it is still unknown what cultural biases are induced in the predictions of these models. In this work, we focus on one language property highly influenced by culture: formality. We analyze the formality distributions of XGLM and BLOOM's predictions, two popular generative multilingual language models, in 5 languages. We classify 1,200 generations per language as formal, informal, or incohesive and measure the impact of the prompt formality on the predictions. Overall, we observe a diversity of behaviors across the models and languages. For instance, XGLM generates informal text in Arabic and Bengali when conditioned with informal prompts, much more than BLOOM. In addition, even though both models are highly biased toward the formal style when prompted neutrally, we find that the models generate a significant amount of informal predictions even when prompted with formal text. We release with this work 6,000 annotated samples, paving the way for future work on the formality of generative multilingual LMs.", "keywords": "Multilingual language models;language models;formality bias analysis;bias analysis in language models", "primary_area": "", "supplementary_material": "", "author": "As\u0131m Ersoy;Gerson Vizcarra;Tahsin Mayeesha;Benjamin Muller", "authorids": "~As\u0131m_Ersoy1;~Gerson_Vizcarra1;~Tahsin_Mayeesha1;~Benjamin_Muller1", "gender": "M;F;M;", "homepage": "https://dyrson11.github.io/;https://tahsin-mayeesha.github.io/;https://scholar.google.com/citations?user=Ecl07CkAAAAJ&hl=en;https://github.com/asimokby", "dblp": "227/1894.html;;;", "google_scholar": "PTF8Bu4AAAAJ;MRDAGP8AAAAJ;;Vu_XvfwAAAAJ", "or_profile": "~Gerson_Vizcarra1;~Tahsin_Mayeesha1;~Benjamin_Muller1;~ASIM_ERSOY1", "aff": "Banco de Credito e Inversiones;;Meta;Ozyegin University", "aff_domain": "bci.cl;;meta.com;ozu.edu.tr", "position": "Researcher;;Postdoc;MS student", "bibtex": "@inproceedings{\nersoy2023in,\ntitle={In What Languages are Generative Language Models the Most Formal? Analyzing Formality Distribution across Languages},\nauthor={As{\\i}m Ersoy and Gerson Vizcarra and Tahsin Mayeesha and Benjamin Muller},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rjDaTBwEBX}\n}", "github": "", "project": "", "reviewers": "4rv1;13h5;emAM", "site": "https://openreview.net/forum?id=rjDaTBwEBX", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;5;4", "excitement": "3;4;3", "reproducibility": "4;4;5", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;1;2", "aff_unique_norm": "Banco de Credito e Inversiones;Meta;Ozyegin University", "aff_unique_dep": ";Meta Platforms, Inc.;", "aff_unique_url": "https://www.bci.cl;https://meta.com;https://www.ozyegin.edu.tr", "aff_unique_abbr": ";Meta;Ozyegin", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Chile;United States;T\u00fcrkiye" }, { "id": "rjd8AqRyW3", "title": "OpenAsp: A Benchmark for Multi-document Open Aspect-based Summarization", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The performance of automatic summarization models has improved dramatically in recent years. Yet, there is still a gap in meeting specific information needs of users in real-world scenarios, particularly when a targeted summary is sought, such as in the useful aspect-based summarization setting targeted in this paper. Previous datasets and studies for this setting have predominantly concentrated on a limited set of pre-defined aspects, focused solely on single document inputs, or relied on synthetic data. To advance research on more realistic scenarios, we introduce OpenAsp, a benchmark for multi-document open aspect-based summarization. This benchmark is created using a novel and cost-effective annotation protocol, by which an open aspect dataset is derived from existing generic multi-document summarization datasets. We analyze the properties of OpenAsp showcasing its high-quality content. Further, we show that the realistic open-aspect setting realized in OpenAsp poses a challenge for current state-of-the-art summarization models, as well as for large language models.", "keywords": "Aspect-based Summarization;Datasets;Annotation;Multi-document Summarization", "primary_area": "", "supplementary_material": "", "author": "Shmuel Amar;Liat Schiff;Ori Ernst;Asi Shefer;Ori Shapira;Ido Dagan", "authorids": "~Shmuel_Amar1;~Liat_Schiff1;~Ori_Ernst1;~Asi_Shefer1;~Ori_Shapira1;~Ido_Dagan1", "gender": "M;F;M;M;;M", "homepage": "https://github.com/shmuelamar;;;https://www.linkedin.com/in/asi-sheffer-00897385;https://orishapira.wordpress.com/;http://u.cs.biu.ac.il/~dagan/", "dblp": "299/0265.html;;217/3552;;205/9013;95/284", "google_scholar": "_ioUYlYAAAAJ;;;;s7djZnUAAAAJ;https://scholar.google.com.tw/citations?user=YzGAGtoAAAAJ", "or_profile": "~Shmuel_Amar1;~Liat_Schiff1;~Ori_Ernst1;~Asi_Shefer1;~Ori_Shapira1;~Ido_Dagan1", "aff": ";Bar-Ilan University;Bar-Ilan University;One AI;Amazon;Bar-Ilan University", "aff_domain": ";biu.ac.il;biu.ac.il;oneai.com;amazon.com;biu.ac.il", "position": ";MS student;PhD student;Chief Scientist;Researcher;Full Professor", "bibtex": "@inproceedings{\namar2023openasp,\ntitle={OpenAsp: A Benchmark for Multi-document Open Aspect-based Summarization},\nauthor={Shmuel Amar and Liat Schiff and Ori Ernst and Asi Shefer and Ori Shapira and Ido Dagan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rjd8AqRyW3}\n}", "github": "", "project": "", "reviewers": "XNMG;zYF8;LFH5", "site": "https://openreview.net/forum?id=rjd8AqRyW3", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "4;3;4", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-6451-3732;;;;;", "linkedin": "shmuel-amar-883783b3;liat-schiff-aa1931184/;;;;", "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Bar-Ilan University;One AI;Amazon", "aff_unique_dep": ";;Amazon.com, Inc.", "aff_unique_url": "https://www.biu.ac.il;;https://www.amazon.com", "aff_unique_abbr": "BIU;;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;2;0", "aff_country_unique": "Israel;;United States" }, { "id": "rmhSMGjWPp", "title": "Multi-label and Multi-target Sampling of Machine Annotation for Computational Stance Detection", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Data collection from manual labeling provides domain-specific and task-aligned supervision for data-driven approaches, and a critical mass of well-annotated resources is required to achieve reasonable performance in natural language processing tasks. However, manual annotations are often challenging to scale up in terms of time and budget, especially when domain knowledge, capturing subtle semantic features, and reasoning steps are needed. In this paper, we investigate the efficacy of leveraging large language models on automated labeling for computational stance detection. We empirically observe that while large language models show strong potential as an alternative to human annotators, their sensitivity to task-specific instructions and their intrinsic biases pose intriguing yet unique challenges in machine annotation. We introduce a multi-label and multi-target sampling strategy to optimize the annotation quality. Experimental results on the benchmark stance detection corpora show that our method can significantly improve performance and learning efficacy.", "keywords": "stance detection;machine annotation;computational social science", "primary_area": "", "supplementary_material": "", "author": "Zhengyuan Liu;Hai Leong Chieu;Nancy F. Chen", "authorids": "~Zhengyuan_Liu2;~Hai_Leong_Chieu1;~Nancy_F._Chen1", "gender": "M;M;", "homepage": ";http://chaileon.github.io/;http://alum.mit.edu/www/nancychen", "dblp": "229/9236;38/4132;84/8761", "google_scholar": ";https://scholar.google.com.sg/citations?user=9QO16LcAAAAJ;https://scholar.google.com.sg/citations?user=K3Z9UiAAAAAJ", "or_profile": "~Zhengyuan_Liu2;~Hai_Leong_Chieu1;~Nancy_F._Chen1", "aff": "I2R;DSO National Laboratories;I2R, A*STAR", "aff_domain": "astar.edu.sg;dso.org.sg;i2r.a-star.edu.sg", "position": "Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nliu2023multilabel,\ntitle={Multi-label and Multi-target Sampling of Machine Annotation for Computational Stance Detection},\nauthor={Zhengyuan Liu and Hai Leong Chieu and Nancy F. Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rmhSMGjWPp}\n}", "github": "", "project": "", "reviewers": "KnpT;5SF3;jYAZ", "site": "https://openreview.net/forum?id=rmhSMGjWPp", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "4;2;2", "reproducibility": "3;5;3", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0003-6396-7614;0000-0003-0872-5877", "linkedin": ";;nancy-chen-4644865/?originalSubdomain=sg", "aff_unique_index": "0;1;2", "aff_unique_norm": "Institute for Infocomm Research;DSO National Laboratories;A*STAR", "aff_unique_dep": ";;Institute for Infocomm Research", "aff_unique_url": "https://www.i2r.a-star.edu.sg;https://www.dso.org.sg;https://www.a-star.edu.sg", "aff_unique_abbr": "I2R;DSO;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "id": "rn7Fn3CV7b", "title": "CoVariance-based Causal Debiasing for Entity and Relation Extraction", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Joint entity and relation extraction tasks aim to recognize named entities and extract relations simultaneously. Suffering from a variety of data biases, such as data selection bias, and distribution bias (out of distribution, long-tail distribution), serious concerns can be witnessed to threaten the model's transferability, robustness, and generalization. In this work, we address the above problems from a causality perspective. We propose a novel causal framework called c$\\underline{\\textbf{o}}$variance and $\\underline{\\textbf{v}}$ariance $\\underline{\\textbf{o}}$ptimization framework (OVO) to optimize feature representations and conduct general debiasing. In particular, the proposed $\\underline{\\textbf{c}}$ovariance $\\underline{\\textbf{op}}$timizing (COP) minimizes characterizing features' covariance for alleviating the selection and distribution bias and enhances feature representation in the feature space. Furthermore, based on the causal backdoor adjustment, we propose $\\\\underline{\\textbf{v}}$ariance $\\underline{\\textbf{op}}$timizing (VOP) separates samples in terms of label information and minimizes the variance of each dimension in the feature vectors of the same class label for mitigating the distribution bias further. By applying it to three strong baselines in two widely used datasets, the results demonstrate the effectiveness and generalization of OVO for joint entity and relation extraction tasks. Furthermore, a fine-grained analysis reveals that OVO possesses the capability to mitigate the impact of long-tail distribution.", "keywords": "Causal Debiasing;Entity and Relation Extraction;Covariance Optimizing;Variance Optimizing", "primary_area": "", "supplementary_material": "", "author": "Lin Ren;Yongbin Liu;Yixin Cao;Chunping Ouyang", "authorids": "~Lin_Ren1;~Yongbin_Liu1;~Yixin_Cao2;~Chunping_Ouyang1", "gender": "M;M;M;F", "homepage": ";;https://sites.google.com/view/yixin-homepage;https://jsjxy.usc.edu.cn/info/2022/4741.htm", "dblp": ";79/9544;20/8038-2;", "google_scholar": "FZ_bjkQAAAAJ;9sXgL3MAAAAJ;https://scholar.google.co.uk/citations?user=CnhTvdoAAAAJ;", "or_profile": "~Lin_Ren1;~Yongbin_Liu1;~Yixin_Cao2;~Chunping_Ouyang1", "aff": "University of South China;University of South China;Singapore Management University;University of South China", "aff_domain": "usc.edu.cn;usc.edu.cn;smu.edu.sg;usc.edu.cn", "position": "MS student;Associate Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nren2023covariancebased,\ntitle={CoVariance-based Causal Debiasing for Entity and Relation Extraction},\nauthor={Lin Ren and Yongbin Liu and Yixin Cao and Chunping Ouyang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rn7Fn3CV7b}\n}", "github": "", "project": "", "reviewers": "5RrX;qNHL;debR;J2YN", "site": "https://openreview.net/forum?id=rn7Fn3CV7b", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;2;4;4", "excitement": "4;4;2;3", "reproducibility": "3;4;3;4", "correctness": "3;4;2;3", "rating_avg": 3.0, "confidence_avg": 3.5, "excitement_avg": 3.25, "reproducibility_avg": 3.5, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-2692-774X;;;", "linkedin": ";;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "South China University;Singapore Management University", "aff_unique_dep": ";", "aff_unique_url": "http://www.scu.edu.cn;https://www.smu.edu.sg", "aff_unique_abbr": "SCU;SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;Singapore" }, { "id": "rq4UfmpRA9", "title": "Democratizing Reasoning Ability: Tailored Learning from Large Language Model", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) exhibit impressive emergent abilities in natural language processing, but their democratization is hindered due to huge computation requirements and closed-source nature. Recent research on advancing open-source smaller LMs by distilling knowledge from black-box LLMs has obtained promising results in the instruction-following ability. However, the reasoning ability which is more challenging to foster, is relatively rarely explored. In this paper, we propose a tailored learning approach to distill such reasoning ability to smaller LMs to facilitate the democratization of the exclusive reasoning ability. In contrast to merely employing LLM as a data annotator, we exploit the potential of LLM as a reasoning teacher by building an interactive multi-round learning paradigm. This paradigm enables the student to expose its deficiencies to the black-box teacher who then can provide customized training data in return. Further, to exploit the reasoning potential of the smaller LM, we propose self-reflection learning to motivate the student to learn from self-made mistakes. The learning from self-reflection and LLM are all tailored to the student's learning status, thanks to the seamless integration with the multi-round learning paradigm. Comprehensive experiments and analysis on mathematical and commonsense reasoning tasks demonstrate the effectiveness of our method. The code will be available at https://github.com/Raibows/Learn-to-Reason.", "keywords": "large language model;reasoning;distillation;open-source", "primary_area": "", "supplementary_material": "", "author": "Zhaoyang Wang;Shaohan Huang;Yuxuan Liu;Jiahai Wang;Minghui Song;Zihan Zhang;Haizhen Huang;Furu Wei;Weiwei Deng;Feng Sun;Qi Zhang", "authorids": "~Zhaoyang_Wang1;~Shaohan_Huang1;~Yuxuan_Liu10;~Jiahai_Wang1;~Minghui_Song1;~Zihan_Zhang4;~Haizhen_Huang1;~Furu_Wei1;~Weiwei_Deng2;~Feng_Sun1;~Qi_Zhang19", "gender": ";M;M;M;;M;M;M;M;M;M", "homepage": ";;https://bit.ly/yxliu;;https://github.com/TriLoo;;;https://www.microsoft.com/en-us/research/people/fuwei/;;;", "dblp": ";176/0380;42/7844-4;00/2989;;;304/7795;72/5870;311/3565.html;09/3224;", "google_scholar": ";;;;;;;G-V1VpwAAAAJ;;;", "or_profile": "~Zhaoyang_Wang1;~Shaohan_Huang1;~Yuxuan_Liu10;~Jiahai_Wang1;~Minghui_Song1;~Zihan_Zhang4;~Haizhen_Huang1;~Furu_Wei1;~Weiwei_Deng2;~Feng_Sun1;~Qi_Zhang19", "aff": ";Microsoft;Peking University;SUN YAT-SEN UNIVERSITY;Microsoft;;;Microsoft Research;Microsoft;;Microsoft", "aff_domain": ";microsoft.com;pku.edu.cn;sysu.edu.cn;microsoft.com;;;microsoft.com;microsoft.com;;microsoft.com", "position": ";Researcher;MS student;Full Professor;Researcher;;;Distinguished Scientist;Researcher;;Researcher", "bibtex": "@inproceedings{\nwang2023democratizing,\ntitle={Democratizing Reasoning Ability: Tailored Learning from Large Language Model},\nauthor={Zhaoyang Wang and Shaohan Huang and Yuxuan Liu and Jiahai Wang and Minghui Song and Zihan Zhang and Haizhen Huang and Furu Wei and Weiwei Deng and Feng Sun and Qi Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rq4UfmpRA9}\n}", "github": "", "project": "", "reviewers": "3M1o;BrcX;MSxv", "site": "https://openreview.net/forum?id=rq4UfmpRA9", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;3;4", "reproducibility": "4;4;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 11, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0009-0002-9684-6416;;;;;;0009-0001-4793-9715;;", "linkedin": ";;;;;https://cn.linkedin.com/in/zihan-zhang-916bb7101;haizhen-huang-21a58824/;;;feng-sun/;qizhang07/", "aff_unique_index": "0;1;2;0;0;0;0", "aff_unique_norm": "Microsoft;Peking University;Sun Yat-sen University", "aff_unique_dep": "Microsoft Corporation;;", "aff_unique_url": "https://www.microsoft.com;http://www.pku.edu.cn;http://www.sysu.edu.cn", "aff_unique_abbr": "Microsoft;Peking U;SYSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;0;0;0", "aff_country_unique": "United States;China" }, { "id": "rs78DlnUB8", "title": "Complexity-Guided Curriculum Learning for Text Graphs", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Curriculum learning provides a systematic approach to training. It refines training progressively, tailors training to task requirements, and improves generalization through exposure to diverse examples. We present a curriculum learning approach that builds on existing knowledge about text and graph complexity formalisms for training with text graph data. The core part of our approach is a novel data scheduler, which employs ``spaced repetition'' and complexity formalisms to guide the training process. We demonstrate the effectiveness of the proposed approach on several text graph tasks and graph neural network architectures. The proposed model gains more and uses less data; consistently prefers text over graph complexity indices throughout training, while the best curricula derived from text and graph complexity indices are equally effective; and it learns transferable curricula across GNN models and datasets. In addition, we find that both node-level (local) and graph-level (global) graph complexity indices, as well as shallow and traditional text complexity indices play a crucial role in effective curriculum learning.", "keywords": "Curriculum Learning;Graph Neural Network;Text Graph", "primary_area": "", "supplementary_material": "", "author": "Nidhi Vakil;Hadi Amiri", "authorids": "~Nidhi_Vakil1;~Hadi_Amiri1", "gender": ";Not Specified", "homepage": "https://clu.cs.uml.edu/;https://cs.uml.edu/~hadi/", "dblp": "320/8251;41/7403", "google_scholar": "NIah6CsAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Nidhi_Vakil1;~Hadi_Amiri1", "aff": "University of Massachusetts, Lowell;University of Massachusetts Lowell", "aff_domain": "uml.edu;uml.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nvakil2023complexityguided,\ntitle={Complexity-Guided Curriculum Learning for Text Graphs},\nauthor={Nidhi Vakil and Hadi Amiri},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rs78DlnUB8}\n}", "github": "", "project": "", "reviewers": "G4id;xYVm;8ekE", "site": "https://openreview.net/forum?id=rs78DlnUB8", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;2", "excitement": "3;4;3", "reproducibility": "3;3;3", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "University of Massachusetts Lowell", "aff_unique_dep": "", "aff_unique_url": "https://www.uml.edu", "aff_unique_abbr": "UMass Lowell", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Lowell", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "rwHOXIBFwq", "title": "Combining Counting Processes and Classification Improves a Stopping Rule for Technology Assisted Review", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Technology Assisted Review (TAR) stopping rules aim to reduce the cost of manually assessing documents for relevance by minimising the number of documents that need to be examined to ensure a desired level of recall. This paper extends an effective stopping rule using information derived from a text classifier that can be trained without the need for any additional annotation. Experiments on multiple data sets (CLEF e-Health, TREC Total Recall, TREC Legal and RCV1) showed that the proposed approach consistently improves performance and outperforms several alternative methods.", "keywords": "Technology assisted review;TAR;total recall;stopping criteria;counting processes;classification", "primary_area": "", "supplementary_material": "", "author": "Reem Bin Hezam;Mark Stevenson", "authorids": "~Reem_Bin_Hezam1;~Mark_Stevenson1", "gender": ";M", "homepage": ";http://staffwww.dcs.shef.ac.uk/people/M.Stevenson/", "dblp": ";68/6", "google_scholar": ";https://scholar.google.co.uk/citations?user=tXe1lgkAAAAJ", "or_profile": "~Reem_Bin_Hezam1;~Mark_Stevenson1", "aff": ";University of Sheffield", "aff_domain": ";shef.ac.uk", "position": ";(Senior) Lecturer/Researcher", "bibtex": "@inproceedings{\nhezam2023combining,\ntitle={Combining Counting Processes and Classification Improves a Stopping Rule for Technology Assisted Review},\nauthor={Reem Bin Hezam and Mark Stevenson},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rwHOXIBFwq}\n}", "github": "", "project": "", "reviewers": "jZAo;Mr4c;yCws", "site": "https://openreview.net/forum?id=rwHOXIBFwq", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;4;3", "excitement": "3;3;3", "reproducibility": "3;3;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-9483-6006", "linkedin": ";", "aff_unique_index": "0", "aff_unique_norm": "University of Sheffield", "aff_unique_dep": "", "aff_unique_url": "https://www.sheffield.ac.uk", "aff_unique_abbr": "Sheffield", "aff_country_unique_index": "0", "aff_country_unique": "United Kingdom" }, { "id": "rwcLHjtUmn", "title": "A Suite of Generative Tasks for Multi-Level Multimodal Webpage Understanding", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Webpages have been a rich, scalable resource for vision-language and language only tasks. Yet only pieces of webpages are kept in existing datasets: image-caption pairs, long text articles, or raw HTML, never all in one place. Webpage tasks have resultingly received little attention and structured image-text data left underused. To study multimodal webpage understanding, we introduce the Wikipedia Webpage suite (WikiWeb2M) containing 2M pages with all of the associated image, text, and structure data. We verify its utility on three generative tasks: page description generation, section summarization, and contextual image captioning. We design a novel attention mechanism Prefix Global, which selects the most relevant image and text content as global tokens to attend to the rest of the webpage for context. By using page structure to separate such tokens, it performs better than full attention with lower computational complexity. Extensive experiments show that the new data in WikiWeb2M improves task performance compared to prior work.", "keywords": "Webpage Understanding;Multimodal Data;Text Generation", "primary_area": "", "supplementary_material": "", "author": "Andrea Burns;Krishna Srinivasan;Joshua Ainslie;Geoff Brown;Bryan A. Plummer;Kate Saenko;Jianmo Ni;Mandy Guo", "authorids": "~Andrea_Burns1;~Krishna_Srinivasan1;~Joshua_Ainslie1;~Geoff_Brown1;~Bryan_A._Plummer1;~Kate_Saenko1;~Jianmo_Ni2;~Mandy_Guo2", "gender": "F;M;;;F;;M;F", "homepage": "https://cs-people.bu.edu/aburns4/;https://krishna2.com;;https://www.linkedin.com/in/geoff-brown-8a8a3275/;http://ai.bu.edu;;http://bryanplummer.com/;", "dblp": "247/5872;50/145.html;263/3363;;88/2754;161/2449;163/2330;", "google_scholar": "1J-9WgoAAAAJ;aYn5qFUAAAAJ;;;https://scholar.google.com.tw/citations?user=9xDADY4AAAAJ;VECFLiAAAAAJ;https://scholar.google.com/citations?hl=en;qOiCKewAAAAJ", "or_profile": "~Andrea_Burns1;~Krishna_Srinivasan1;~Joshua_Ainslie1;~Geoff_Brown1;~Kate_Saenko1;~Jianmo_Ni2;~Bryan_Allen_Plummer1;~Xiaoyue_Guo1", "aff": "Boston University;Research, Google;Google;Google;Boston University, Boston University;Google;Boston University;", "aff_domain": "bu.edu;research.google.com;google.com;google.com;bu.edu;google.com;bu.edu;", "position": "PhD student;Researcher;Software Engineer;Researcher;Full Professor;Software engineer;Assistant Professor;", "bibtex": "@inproceedings{\nburns2023a,\ntitle={A Suite of Generative Tasks for Multi-Level Multimodal Webpage Understanding},\nauthor={Andrea Burns and Krishna Srinivasan and Joshua Ainslie and Geoff Brown and Bryan A. Plummer and Kate Saenko and Jianmo Ni and Mandy Guo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rwcLHjtUmn}\n}", "github": "", "project": "", "reviewers": "dKPZ;pHok;X6bk", "site": "https://openreview.net/forum?id=rwcLHjtUmn", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;4;4", "reproducibility": "3;4;3", "correctness": "5;5;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.666666666666667, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-1366-0895;;;0000-0002-5704-7614;;;", "linkedin": "andrea-burns/;krishna2/;;;;;;", "aff_unique_index": "0;1;1;1;0;1;0", "aff_unique_norm": "Boston University;Google", "aff_unique_dep": ";Google Research", "aff_unique_url": "https://www.bu.edu;https://research.google", "aff_unique_abbr": "BU;Google", "aff_campus_unique_index": "1;1;1;2;1", "aff_campus_unique": ";Mountain View;Boston", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "rwcTxeSsVI", "title": "For Generated Text, Is NLI-Neutral Text the Best Text?", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "We explore incorporating natural language inference (NLI) into the text generative pipeline by using \na pre-trained NLI model to assess whether a generated sentence entails, contradicts, or is neutral to the prompt and preceding text. \nFirst, we show that the NLI task is predictive of generation errors made by GPT-3.\nWe use these results to develop an NLI-informed generation procedure for GPT-J.\nThen, we evaluate these generations by obtaining human annotations on error types and overall quality.\nWe find that an NLI strategy of maximizing entailment improves text generation when the nucleus sampling randomness parameter value is high, while one which maximizes contradiction is in fact productive when the parameter value is low. \nOverall, though, we demonstrate that an NLI strategy of maximizing the neutral class provides the highest quality of generated text (significantly better than the vanilla generations), regardless of parameter value.", "keywords": "generation;natural language inference", "primary_area": "", "supplementary_material": "", "author": "Michail Mersinias;Kyle Mahowald", "authorids": "~Michail_Mersinias1;~Kyle_Mahowald1", "gender": "M;M", "homepage": ";https://mahowak.github.io", "dblp": "266/1077;38/11196", "google_scholar": "GNo8fGgAAAAJ;XUmFLVUAAAAJ", "or_profile": "~Michail_Mersinias1;~Kyle_Mahowald1", "aff": "University of Texas at Austin;The University of Texas at Austin", "aff_domain": "utexas.edu;utexas.edu", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\nmersinias2023for,\ntitle={For Generated Text, Is {NLI}-Neutral Text the Best Text?},\nauthor={Michail Mersinias and Kyle Mahowald},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rwcTxeSsVI}\n}", "github": "", "project": "", "reviewers": "gehN;GiAa;XZPF", "site": "https://openreview.net/forum?id=rwcTxeSsVI", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;4;4", "excitement": "3;2;3", "reproducibility": "5;3;4", "correctness": "4;4;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": "michael-mersinias/;", "aff_unique_index": "0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "rwpv2kCt4X", "title": "Accuracy is not enough: Evaluating Personalization in Summarizers", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Text summarization models are evaluated in terms of their accuracy and quality using various measures such as ROUGE, BLEU, METEOR, BERTScore, PYRAMID, readability, and several other recently proposed ones. The central objective of all accuracy measures is to evaluate the model's ability to capture $\\textit{saliency}$ accurately. Since saliency is subjective w.r.t the readers' preferences, there cannot be a fit-all summary for a given document. This means that in many use-cases, summarization models need to be personalized w.r.t user-profiles. However, to our knowledge, there is no measure to evaluate the $\\textit{degree-of-personalization}$ of a summarization model. In this paper, we first establish that existing accuracy measures cannot evaluate the degree of personalization of any summarization model, and then propose a novel measure, called $EGISES$, for automatically computing the same. Using the PENS dataset released by Microsoft Research, we analyze the degree of personalization of ten different state-of-the-art summarization models (both extractive and abstractive), five of which are explicitly trained for personalized summarization, and the remaining are appropriated to exhibit personalization. We conclude by proposing a generalized accuracy measure, called $P$-$Accuracy$, for designing accuracy measures that should also take personalization into account and demonstrate the robustness and reliability of the measure through meta-evaluation.", "keywords": "Personalized Summarization Evaluation;Meta Evaluation;Automated Accuracy Metrics", "primary_area": "", "supplementary_material": "", "author": "Rahul Vansh;Darsh Rank;Sourish Dasgupta;Tanmoy Chakraborty", "authorids": "~Rahul_Vansh1;~Darsh_Rank1;~Sourish_Dasgupta2;~Tanmoy_Chakraborty2", "gender": "M;M;M;M", "homepage": ";;https://www.daiict.ac.in/faculty-details/2321;http://tanmoychak.com", "dblp": ";;17/3004;65/2136-2.html", "google_scholar": ";;A09ROeEAAAAJ;https://scholar.google.co.in/citations?user=C5S9JnIAAAAJ", "or_profile": "~Rahul_Vansh1;~Darsh_Rank1;~Sourish_Dasgupta2;~Tanmoy_Chakraborty2", "aff": "Dhirubhai Ambani Institute of Information and Communication Technology;Dhirubhai Ambani Institute of Information and Communication Technology;Dhirubhai Ambani Institute of Information & Communication Technology;Indian Institute of Technology, Delhi", "aff_domain": "daiict.ac.in;daiict.ac.in;daiict.ac.in;iitd.ac.in", "position": "MS student;Undergrad student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nvansh2023accuracy,\ntitle={Accuracy is not enough: Evaluating Personalization in Summarizers},\nauthor={Rahul Vansh and Darsh Rank and Sourish Dasgupta and Tanmoy Chakraborty},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rwpv2kCt4X}\n}", "github": "", "project": "", "reviewers": "GHAL;QpF4;wqBM", "site": "https://openreview.net/forum?id=rwpv2kCt4X", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;4;4", "reproducibility": "3;4;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-0507-6824;0000-0002-0210-0369", "linkedin": "rahulvansh66/;darsh-rank/;sourish-dasgupta-2432248/;tanmoy-chakraborty-89553324/", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Dhirubhai Ambani Institute of Information and Communication Technology;Indian Institute of Technology Delhi", "aff_unique_dep": ";", "aff_unique_url": "https://www.daiict.ac.in;https://www.iitdelhi.ac.in", "aff_unique_abbr": "DAIICT;IIT Delhi", "aff_campus_unique_index": "1", "aff_campus_unique": ";Delhi", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "India" }, { "id": "rzW3RouIXc", "title": "Query-as-context Pre-training for Dense Passage Retrieval", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recently, methods have been developed to improve the performance of dense passage retrieval by using context-supervised pre-training. These methods simply consider two passages from the same document to be relevant, without taking into account the potential negative impacts of weakly correlated pairs. Thus, this paper proposes query-as-context pre-training, a simple yet effective pre-training technique to alleviate the issue. Query-as-context pre-training assumes that the query derived from a passage is more likely to be relevant to that passage and forms a passage-query pair. These passage-query pairs are then used in contrastive or generative context-supervised pre-training. The pre-trained models are evaluated on large-scale passage retrieval benchmarks and out-of-domain zero-shot benchmarks. Experimental results show that query-as-context pre-training brings considerable gains for retrieval performances, demonstrating its effectiveness and efficiency.", "keywords": "Query prediction;Dense passage retrieval;Pre-training", "primary_area": "", "supplementary_material": "", "author": "Xing W;Guangyuan Ma;Wanhui Qian;Zijia Lin;Songlin Hu", "authorids": "~Xing_W1;~Guangyuan_Ma1;~Wanhui_Qian1;~Zijia_Lin1;~Songlin_Hu2", "gender": "M;M;M;M;M", "homepage": "https://scholar.google.com.hk/citations?user=ZKd3UjkAAAAJ&hl=zh-CN;;;https://sites.google.com/site/linzijia72/;http://people.ucas.ac.cn/~0000967?language=en", "dblp": ";289/8498;245/3679;78/9911;67/4108-1.html", "google_scholar": "https://scholar.google.com.hk/citations?user=ZKd3UjkAAAAJ;GHBLzN0AAAAJ;;ghUYrHkAAAAJ;", "or_profile": "~Xing_W1;~Guangyuan_Ma1;~Wanhui_Qian1;~Zijia_Lin1;~Songiln_Hu1", "aff": "University of Chinese Academy of Sciences;University of Chinese Academy of Sciences;;Kuaishou Technology;Institute of Information Engineering, Chinese Academy of Sciences", "aff_domain": "ucas.edu.cn;ucas.ac.cn;;kuaishou.com;iie.ac.cn", "position": "PhD student;PhD student;;NLP expert;Full Professor", "bibtex": "@inproceedings{\nw2023queryascontext,\ntitle={Query-as-context Pre-training for Dense Passage Retrieval},\nauthor={Xing W and Guangyuan Ma and Wanhui Qian and Zijia Lin and Songlin Hu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rzW3RouIXc}\n}", "github": "", "project": "", "reviewers": "qcxv;nEeh;DMcN", "site": "https://openreview.net/forum?id=rzW3RouIXc", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "3;3;3", "reproducibility": "4;4;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-6916-9611;;0000-0002-1390-7424;", "linkedin": ";;;;", "aff_unique_index": "0;0;1;2", "aff_unique_norm": "University of Chinese Academy of Sciences;Kuaishou Technology;Chinese Academy of Sciences", "aff_unique_dep": ";;Institute of Information Engineering", "aff_unique_url": "http://www.ucas.ac.cn;https://www.kuaishou.com;http://www.cas.cn", "aff_unique_abbr": "UCAS;Kuaishou;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "rzdqmUFVnv", "title": "Multi-Granularity Information Interaction Framework for Incomplete Utterance Rewriting", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Recent approaches in Incomplete Utterance Rewriting (IUR) fail to capture the source of important words, which is crucial to edit the incomplete utterance, and introduce words from irrelevant utterances. We propose a novel and effective multi-task information interaction framework including context selection, edit matrix construction, and relevance merging to capture\nthe multi-granularity of semantic information. Benefiting from fetching the relevant utterance and figuring out the important words, \nour approach outperforms existing state-of-the-art models on two benchmark datasets Restoration-200K and CANAND in this field.", "keywords": "Incomplete Utterance Rewriting;Information Interaction;Multi-Granularity", "primary_area": "", "supplementary_material": "", "author": "Haowei Du;Dinghao Zhang;Chen Li;Yang Li;Dongyan Zhao", "authorids": "~Haowei_Du1;~Dinghao_Zhang1;~Chen_Li37;~Yang_Li45;~Dongyan_Zhao2", "gender": "M;M;M;M;M", "homepage": ";https://github.com/goufugui/SCGD;https://github.com/ChenLi09;;https://www.wict.pku.edu.cn/zhaodongyan/en/", "dblp": "303/7899.html;;;;63/1870", "google_scholar": "uu9HarwAAAAJ;;7-eyqnoAAAAJ;AeCTbv8AAAAJ;lhR8-68AAAAJ", "or_profile": "~Haowei_Du1;~Dinghao_Zhang1;~Chen_Li37;~Yang_Li45;~Dongyan_Zhao2", "aff": "Peking University;;Ant Group;Alibaba Group;Peking University", "aff_domain": "pku.edu.cn;;antgroup.com;alibaba-inc.com;pku.edu.cn", "position": "PhD student;;Researcher;Researcher;Full Professor", "bibtex": "@inproceedings{\ndu2023multigranularity,\ntitle={Multi-Granularity Information Interaction Framework for Incomplete Utterance Rewriting},\nauthor={Haowei Du and Dinghao Zhang and Chen Li and Yang Li and Dongyan Zhao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=rzdqmUFVnv}\n}", "github": "", "project": "", "reviewers": "VzV8;9o7i;DJDn", "site": "https://openreview.net/forum?id=rzdqmUFVnv", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;5", "excitement": "3;3;3", "reproducibility": "2;4;5", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-7683-0279;;;;", "linkedin": ";;;;", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Peking University;Ant Group;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "http://www.pku.edu.cn;https://www.antgroup.com;https://www.alibaba.com", "aff_unique_abbr": "Peking U;Ant Group;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "s1Lrw1HTcT", "title": "ChatGPT to Replace Crowdsourcing of Paraphrases for Intent Classification: Higher Diversity and Comparable Model Robustness", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The emergence of generative large language models (LLMs) raises the question: what will be its impact on crowdsourcing? Traditionally, crowdsourcing has been used for acquiring solutions to a wide variety of human-intelligence tasks, including ones involving text generation, modification or evaluation. For some of these tasks, models like ChatGPT can potentially substitute human workers. In this study, we investigate whether this is the case for the task of paraphrase generation for intent classification. We apply data collection methodology of an existing crowdsourcing study (similar scale, prompts and seed data) using ChatGPT and Falcon-40B. We show that ChatGPT-created paraphrases are more diverse and lead to at least as robust models.", "keywords": "natural language generation;paraphrase generation;crowdsourcing;large language models;intent classification;text diversity", "primary_area": "", "supplementary_material": "", "author": "Jan Cegin;Jakub Simko;Peter Brusilovsky", "authorids": "~Jan_Cegin1;~Jakub_Simko1;~Peter_Brusilovsky1", "gender": "M;M;M", "homepage": "https://kinit.sk/sk/clen/jan-cegin/;https://kinit.sk/member/jakub-simko/;https://sites.pitt.edu/~peterb", "dblp": "272/2028;09/8578.html;b/PBrusilovsky", "google_scholar": "EFUgHF4AAAAJ;vjXMG2AAAAAJ;s6RpNfAAAAAJ", "or_profile": "~Jan_Cegin1;~Jakub_Simko1;~Peter_Brusilovsky1", "aff": "Brno University of Technology;Kempelen Institute of Intelligent Technologies;University of Pittsburgh", "aff_domain": "vut.cz;kinit.sk;pitt.edu", "position": "PhD student;Principal Researcher;Full Professor", "bibtex": "@inproceedings{\ncegin2023chatgpt,\ntitle={Chat{GPT} to Replace Crowdsourcing of Paraphrases for Intent Classification: Higher Diversity and Comparable Model Robustness},\nauthor={Jan Cegin and Jakub Simko and Peter Brusilovsky},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=s1Lrw1HTcT}\n}", "github": "", "project": "", "reviewers": "fC6f;YaLe;HLjE", "site": "https://openreview.net/forum?id=s1Lrw1HTcT", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "3;4;4", "reproducibility": "3;4;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-0239-4237;0000-0002-1902-1464", "linkedin": ";;brusilovsky/", "aff_unique_index": "0;1;2", "aff_unique_norm": "Brno University of Technology;Kempelen Institute of Intelligent Technologies;University of Pittsburgh", "aff_unique_dep": ";;", "aff_unique_url": "https://www.vutbr.cz;http://www.kempeleninstitute.com;https://www.pitt.edu", "aff_unique_abbr": "Brno UoT;;Pitt", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2", "aff_country_unique": "Czech Republic;Hungary;United States" }, { "id": "s4xIeYimGQ", "title": "Large Language Models are Better Reasoners with Self-Verification", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recently, with the chain of thought (CoT) prompting, large language models (LLMs), e.g., GPT-3, have shown strong reasoning ability in several natural language processing tasks such as arithmetic, commonsense, and logical reasoning. However, LLMs with CoT require multi-step prompting and multi-token prediction, which is highly sensitive to individual mistakes and vulnerable to error accumulation. The above issues make the LLMs need the ability to verify the answers. In fact, after inferring conclusions in some thinking decision tasks, people often check them by re-verifying steps to avoid some mistakes. In this paper, we propose and prove that LLMs also have similar self-verification abilities. We take the conclusion obtained by CoT as one of the conditions for solving the original problem. By performing a backward verification of the answers that LLM deduced for itself, we can obtain interpretable answer validation scores to select the candidate answer with the highest score. Experimental results demonstrate that the proposed method can improve the reasoning performance on various arithmetic, commonsense, and logical reasoning datasets. Our code is publicly available at: https://github.com/WENGSYX/Self-Verification.", "keywords": "Large Language Models;Self-verification;Reasoning Ability;Chain of Thought;Backward Verification", "primary_area": "", "supplementary_material": "", "author": "Yixuan Weng;Minjun Zhu;Fei Xia;Bin Li;Shizhu He;Shengping Liu;Bin Sun;Kang Liu;Jun Zhao", "authorids": "~Yixuan_Weng1;~Minjun_Zhu2;~Fei_Xia4;~Bin_Li14;~Shizhu_He2;~Shengping_Liu1;~Bin_Sun4;~Kang_Liu1;~Jun_Zhao4", "gender": "M;F;M;M;M;M;M;M;M", "homepage": "https://wengsyx.github.io/;;https://github.com/Alex0xf;https://libincn.top;https://heshizhu.github.io/;;;http://www.nlpr.ia.ac.cn/cip/~liukang/index.html;http://nlpr-web.ia.ac.cn/cip/english/~junzhao/index.html", "dblp": "298/8205;271/6029;79/1081;89/6764-83;136/8650;21/5679;;42/4903.html;https://dblp.uni-trier.de/pid/47/2026-1.html", "google_scholar": "O1XsDEMAAAAJ;cm2ub2kAAAAJ;;2ZIBEWgAAAAJ;zBPIt3QAAAAJ;;lmVsnBsAAAAJ;DtZCfl0AAAAJ;https://scholar.google.com.hk/citations?user=HljRttwAAAAJ", "or_profile": "~Yixuan_Weng1;~Minjun_Zhu2;~Fei_Xia4;~Bin_Li14;~Shizhu_He2;~Shengping_Liu1;~Bin_Sun4;~Kang_Liu1;~Jun_Zhao4", "aff": "Institute of Automation, Chinese Academy of Sciences;University of Chinese Academy of Sciences;Institute of automation, Chinese Academy of Sciences;Hunan University;Institute of Automation, Chinese Academy of Sciences;Unisound;Hunan University;Institute of Automation, Chinese Academy of Sciences;Institute of automation, Chinese academy of science", "aff_domain": "ia.ac.cn;ucas.ac.cn;ia.ac.cn;hnu.edu.cn;ia.ac.cn;unisound.com;hnu.edu.cn;ia.ac.cn;nlpr.ia.ac.cn", "position": "MS student;MS student;MS student;PhD student;Associate Researcher;Principal Researcher;Associate Professor;Professor;Full Professor", "bibtex": "@inproceedings{\nweng2023large,\ntitle={Large Language Models are Better Reasoners with Self-Verification},\nauthor={Yixuan Weng and Minjun Zhu and Fei Xia and Bin Li and Shizhu He and Shengping Liu and Bin Sun and Kang Liu and Jun Zhao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=s4xIeYimGQ}\n}", "github": "", "project": "", "reviewers": "hdXu;iNBL;yvPK", "site": "https://openreview.net/forum?id=s4xIeYimGQ", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;4;3", "reproducibility": "4;4;4", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-9720-8689;;0009-0002-4609-9950;0000-0002-6508-5071;;;;;", "linkedin": ";;;;;;;;", "aff_unique_index": "0;1;0;2;0;3;2;0;0", "aff_unique_norm": "Chinese Academy of Sciences;University of Chinese Academy of Sciences;Hunan University;Unisound", "aff_unique_dep": "Institute of Automation;;;", "aff_unique_url": "http://www.ia.cas.cn;http://www.ucas.ac.cn;http://www.hunu.edu.cn/;https://www.unisound.com/", "aff_unique_abbr": "CAS;UCAS;HNU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "s7Vh8OIIm6", "title": "Hybrid Inverted Index Is a Robust Accelerator for Dense Retrieval", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Inverted file structure is a common technique for accelerating dense retrieval. \nIt clusters documents based on their embeddings; during searching, it probes nearby clusters w.r.t. an input query and only evaluates documents within them by subsequent codecs, thus avoiding the expensive cost from exhaustive traversal. \nHowever, the clustering is always lossy, which results in the miss of relevant documents in the probed clusters and hence degrades retrieval quality. \nIn contrast, lexical matching, such as overlaps of salient terms, tend to be strong features for identifying relevant documents.\nIn this work, we present the Hybrid Inverted Index (HI$^2$), where the embedding clusters and salient terms work collaboratively to accelerate dense retrieval. \nTo make best of both effectiveness and efficiency, we devise a cluster selector and a term selector, to construct compact inverted lists and efficiently searching through them.\nMoreover, we leverage simple unsupervised algorithms as well as end-to-end knowledge distillation to learn these two modules, with the latter further boosting the effectiveness.\nBased on comprehensive experiments on popular retrieval benchmarks, we verify that clusters and terms indeed complement each other, enabling HI$^2$ to achieve lossless retrieval quality with competitive efficiency across a variety of index settings.", "keywords": "Dense Retrieval;ANN Index;Inverted Index", "primary_area": "", "supplementary_material": "", "author": "Peitian Zhang;Zheng Liu;Shitao Xiao;Zhicheng Dou;Jing Yao", "authorids": "~Peitian_Zhang1;~Zheng_Liu4;~Shitao_Xiao1;~Zhicheng_Dou1;~Jing_Yao4", "gender": "M;;M;;F", "homepage": "https://www.namespace-pt.com;https://www.microsoft.com/en-us/research/people/zhengliu/;;https://playbigdata.ruc.edu.cn/dou;", "dblp": "304/3403;06/3580-11;286/1495;18/5740;24/5678.html", "google_scholar": "KyH5b58AAAAJ;https://scholar.google.com.hk/citations?user=k2SF4M0AAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN;ChCjAAwAAAAJ;https://scholar.google.jp/citations?user=2FH8EjkAAAAJ", "or_profile": "~Peitian_Zhang1;~Zheng_Liu4;~Shitao_Xiao1;~Zhicheng_Dou1;~Jing_Yao4", "aff": "Renmin University of China;Microsoft Research;Beijing University of Posts and Telecommunications;Renmin University of China;Microsoft", "aff_domain": "ruc.edu.cn;research.microsoft.com;bupt.edu.cn;ruc.edu.cn;microsoft.com", "position": "MS student;Researcher;MS student;Full Professor;Researcher", "bibtex": "@inproceedings{\nzhang2023hybrid,\ntitle={Hybrid Inverted Index Is a Robust Accelerator for Dense Retrieval},\nauthor={Peitian Zhang and Zheng Liu and Shitao Xiao and Zhicheng Dou and Jing Yao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=s7Vh8OIIm6}\n}", "github": "", "project": "", "reviewers": "8PkW;wBPq;4cBb", "site": "https://openreview.net/forum?id=s7Vh8OIIm6", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;3;4", "excitement": "3;3;3", "reproducibility": "4;5;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0007-1926-7433;0000-0001-7765-8466;;0000-0002-9781-948X;", "linkedin": ";;;;", "aff_unique_index": "0;1;2;0;1", "aff_unique_norm": "Renmin University of China;Microsoft;Beijing University of Posts and Telecommunications", "aff_unique_dep": ";Microsoft Research;", "aff_unique_url": "http://www.ruc.edu.cn;https://www.microsoft.com/en-us/research;http://www.bupt.edu.cn/", "aff_unique_abbr": "RUC;MSR;BUPT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;1;0;0;1", "aff_country_unique": "China;United States" }, { "id": "sCtJmxhvJe", "title": "\"Fifty Shades of Bias\": Normative Ratings of Gender Bias in GPT Generated English Text", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Language serves as a powerful tool for the manifestation of societal belief systems. In doing so, it also perpetuates the prevalent biases in our society. Gender bias is one of the most pervasive biases in our society and is seen in online and offline discourses. With LLMs increasingly gaining human-like fluency in text generation, gaining a nuanced understanding of the biases these systems can generate is imperative. Prior work often treats gender bias as a binary classification task. However, acknowledging that bias must be perceived at a relative scale; we investigate the generation and consequent receptivity of manual annotators to bias of varying degrees. Specifically, we create the first dataset of GPT-generated English text with normative ratings of gender bias. Ratings were obtained using Best--Worst Scaling -- an efficient comparative annotation framework. Next, we systematically analyze the variation of themes of gender biases in the observed ranking and show that identity-attack is most closely related to gender bias. Finally, we show the performance of existing automated models trained on related concepts on our dataset.", "keywords": "Gender bias;NLP;LLMs;GPT", "primary_area": "", "supplementary_material": "", "author": "Rishav Hada;Agrima Seth;Harshita Diddee;Kalika Bali", "authorids": "~Rishav_Hada1;~Agrima_Seth1;~Harshita_Diddee1;~Kalika_Bali1", "gender": "M;;F;F", "homepage": "https://sites.google.com/view/rishavhada;https://agrimaseth.github.io/;https://harshitadd.netlify.app/;https://www.microsoft.com/en-us/research/people/kalikab/", "dblp": ";;280/8888;19/5717", "google_scholar": "ctKGG_YAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?mauthors=Harshita+Diddee;HSIGxEgAAAAJ", "or_profile": "~Rishav_Hada1;~Agrima_Seth1;~Harshita_Diddee1;~Kalika_Bali1", "aff": "Microsoft Research India;University of Michigan - Ann Arbor;Microsoft;Microsoft Research Labs", "aff_domain": "microsoft.com;umich.edu;microsoft.com;microsoft.com", "position": "Researcher;PhD student;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nhada2023fifty,\ntitle={''Fifty Shades of Bias'': Normative Ratings of Gender Bias in {GPT} Generated English Text},\nauthor={Rishav Hada and Agrima Seth and Harshita Diddee and Kalika Bali},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sCtJmxhvJe}\n}", "github": "", "project": "", "reviewers": "HvWd;XADC;DxRa", "site": "https://openreview.net/forum?id=sCtJmxhvJe", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;5", "excitement": "2;4;4", "reproducibility": "4;3;3", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-8547-6304;0000-0002-0852-7371;0000-0001-9275-742X", "linkedin": ";;harshita-diddee/;kalika-bali-b72bab9/", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Microsoft;University of Michigan", "aff_unique_dep": "Microsoft Research India;", "aff_unique_url": "https://www.microsoft.com/en-us/research/group/microsoft-research-india;https://www.umich.edu", "aff_unique_abbr": "MSR India;UM", "aff_campus_unique_index": "1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "India;United States" }, { "id": "sCu26OfxxZ", "title": "INA: An Integrative Approach for Enhancing Negotiation Strategies with Reward-Based Dialogue Agent", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In this paper, we propose a novel negotiation agent designed for the online marketplace. Our dialogue agent is integrative in nature i.e, it possesses the capability to negotiate on price as well as other factors, such as the addition or removal of items from a deal bundle, thereby offering a more flexible and comprehensive negotiation experience. To enable this functionality, we create a new dataset called Integrative Negotiation Dataset (IND). For this dataset creation, we introduce a new semi-automated data creation method, which combines defining negotiation intents, actions, and intent-action simulation between users and the agent to generate potential dialogue flows. Finally, the prompting of GPT-J, a state-of-the-art language model, is done to generate dialogues for a given intent, with a human-in-the-loop process for post-editing and refining minor errors to ensure high data quality. We first train a maximum likelihood loss based model on IND, and then employ a set of novel rewards specifically tailored for the negotiation task to train our Integrative Negotiation Agent (INA). These rewards incentivize the agent to learn effective negotiation strategies that can adapt to various contextual requirements and price proposals. We train our model and conduct experiments to evaluate the effectiveness of our reward-based dialogue agent for negotiation. Our results demonstrate that the proposed approach and reward functions significantly enhance the negotiation capabilities of the dialogue agent. The INA successfully engages in integrative negotiations, displaying the ability to dynamically adjust prices and negotiate the inclusion or exclusion of items in a deal bundle.", "keywords": "Negotiation;Dialogue Agent;Prompting", "primary_area": "", "supplementary_material": "", "author": "Zishan Ahmad;Suman Saurabh;Vaishakh Sreekanth Menon;Asif Ekbal;Roshni Ramnani;ANUTOSH MAITRA", "authorids": "~Zishan_Ahmad2;~Suman_Saurabh1;~Vaishakh_Sreekanth_Menon1;~Asif_Ekbal1;~Roshni_Ramnani1;~ANUTOSH_MAITRA1", "gender": "M;M;M;M;F;", "homepage": ";;;https://ekbalasif.github.io;;", "dblp": ";;;11/3590;138/1138.html;", "google_scholar": "EtWzpyoAAAAJ;;;https://scholar.google.co.in/citations?user=IAL_F04AAAAJ;DThywGgAAAAJ;", "or_profile": "~Zishan_Ahmad2;~Suman_Saurabh1;~Vaishakh_Sreekanth_Menon1;~Asif_Ekbal1;~Roshni_Ramnani1;~ANUTOSH_MAITRA1", "aff": "Indian Institute of Technology, Patna;Indian Institute of Technology, Patna;Indian Institute of Technology, Patna;Indian Institute of Technology, Patna;Accenture;", "aff_domain": "iitp.ac.in;iitp.ac.in;iitp.ac.in;iitp.ac.in;accenture.com;", "position": "PhD student;MS student;Undergrad student;Associate Professor;Principal Researcher;", "bibtex": "@inproceedings{\nahmad2023ina,\ntitle={{INA}: An Integrative Approach for Enhancing Negotiation Strategies with Reward-Based Dialogue Agent},\nauthor={Zishan Ahmad and Suman Saurabh and Vaishakh Sreekanth Menon and Asif Ekbal and Roshni Ramnani and ANUTOSH MAITRA},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sCu26OfxxZ}\n}", "github": "", "project": "", "reviewers": "PVmB;gPQD;ndma", "site": "https://openreview.net/forum?id=sCu26OfxxZ", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;2;3", "excitement": "3;3;3", "reproducibility": "3;4;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-3612-8834;0000-0002-0540-339X;", "linkedin": ";suman-saurabh-3a857514b;vaishakhsmn/;asif-ekbal-3b8a4517/?originalSubdomain=in;;", "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Indian Institute of Technology Patna;Accenture", "aff_unique_dep": ";", "aff_unique_url": "https://www.iitp.ac.in;https://www.accenture.com", "aff_unique_abbr": "IIT Patna;Accenture", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Patna;", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "India;United States" }, { "id": "sCxiD2Rx4l", "title": "DUnE: Dataset for Unified Editing", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Even the most advanced language models remain susceptible to errors necessitating to modify these models without initiating a comprehensive retraining process. Model editing refers to the modification of a model's knowledge or representations in a manner that produces the desired outcomes. Prior research primarily centered around editing factual data e.g. \"Messi plays for Inter Miami\" confining the definition of an edit to a knowledge triplet i.e. (subject, object, relation). However, as the applications of language models expand, so do the diverse ways in which we wish to edit and refine their outputs. In this study, we broaden the scope of the editing problem to include an array of editing cases such as debiasing and rectifying reasoning errors and define an edit as any natural language expression that solicits a change in the model's outputs. We are introducing DUnE, an editing benchmark where edits are natural language sentences and propose that DUnE presents a challenging yet relevant task. To substantiate this claim, we conduct an extensive series of experiments testing various editing approaches to address DUnE, demonstrating their respective strengths and weaknesses. We argue that retrieval-augmented language modeling can outperform specialized editing techniques and neither set of approaches has fully solved the generalized editing problem covered by our benchmark.", "keywords": "Model Editing;Editing Large Language Models;Learning from Human Feedback", "primary_area": "", "supplementary_material": "", "author": "Afra Feyza Aky\u00fcrek;Eric L Pan;Garry Kuwanto;Derry Wijaya", "authorids": "~Afra_Feyza_Aky\u00fcrek1;~Eric_L_Pan1;~Garry_Kuwanto1;~Derry_Wijaya1", "gender": "M;M;F;F", "homepage": ";https://gkuwanto.github.io;https://derrywijaya.github.io/;https://feyzaakyurek.github.io", "dblp": ";;https://dblp.org/pers/w/Wijaya:Derry;268/0913.html", "google_scholar": ";CgJJ-PoAAAAJ;8lmWWD0AAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Eric_L_Pan1;~Garry_Kuwanto1;~Derry_Wijaya1;~Afra_Feyza_Akyurek1", "aff": "Yale University;Boston University, Boston University;Boston University;Boston University", "aff_domain": "yale.edu;bu.edu;bu.edu;bu.edu", "position": "Undergrad student;PhD student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\naky{\\\"u}rek2023dune,\ntitle={{DU}nE: Dataset for Unified Editing},\nauthor={Afra Feyza Aky{\\\"u}rek and Eric L Pan and Garry Kuwanto and Derry Wijaya},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sCxiD2Rx4l}\n}", "github": "", "project": "", "reviewers": "As6k;kaca;kptn", "site": "https://openreview.net/forum?id=sCxiD2Rx4l", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;4", "excitement": "4;4;3", "reproducibility": "4;4;4", "correctness": "3;4;3", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-0848-4703;", "linkedin": "https://linkedin.com/in/ericlpan;;derry-wijaya-577b80178/;afrafeyzaakyurek/", "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Yale University;Boston University", "aff_unique_dep": ";", "aff_unique_url": "https://www.yale.edu;https://www.bu.edu", "aff_unique_abbr": "Yale;BU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Boston", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "sEvU6r8e7N", "title": "RefGPT: Dialogue Generation of GPT, by GPT, and for GPT", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large Language Models (LLMs) have attained the impressive capability to resolve a wide range of NLP tasks by fine-tuning high-quality instruction data. However, collecting human-written data of high quality, especially multi-turn dialogues, is expensive and unattainable for most people. Though previous studies have used powerful LLMs to generate the dialogues automatically, they all suffer from generating untruthful dialogues because of the model hallucination. Therefore, we propose a method called RefGPT to generate enormous truthful and customized dialogues without worrying about factual errors caused by the model hallucination. RefGPT solves the model hallucination in dialogue generation by restricting the LLMs to leverage the given reference instead of reciting their own knowledge to generate dialogues. Additionally, RefGPT adds detailed controls on every utterance to enable high customization capability, which previous studies have ignored. On the basis of RefGPT, we also propose two high-quality dialogue datasets generated by GPT-4, namely **RefGPT-Fact** and **RefGPT-Code**. RefGPT-Fact is a dataset with 100k multi-turn dialogues based on factual knowledge and RefGPT-Code has 76k multi-turn dialogues covering a wide range of coding scenarios. Our code and datasets are released in https://github.com/mutonix/RefGPT.", "keywords": "Dialogue Generation;GPT;Model Hallucination;LLM", "primary_area": "", "supplementary_material": "", "author": "Dongjie Yang;Ruifeng Yuan;YuanTao Fan;Yifei Yang;Zili Wang;Shusen Wang;hai zhao", "authorids": "~Dongjie_Yang1;~Ruifeng_Yuan1;~YuanTao_Fan2;~Yifei_Yang2;~Zili_Wang1;~Shusen_Wang1;~hai_zhao1", "gender": "M;M;;;M;M;M", "homepage": "https://github.com/mutonix/;http://www4.comp.polyu.edu.hk/~csryuan/#;;;https://commencement.github.io/;http://wangshusen.github.io;http://bcmi.sjtu.edu.cn/~zhaohai/", "dblp": ";;;;;77/9625;25/1145-1.html", "google_scholar": "https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?hl=zh-CN;UxAb3eQAAAAJ;E9zWgmwAAAAJ;HAf4pEoAAAAJ;https://scholar.google.com.tw/citations?user=4dU5KS0AAAAJ", "or_profile": "~Dongjie_Yang1;~Ruifeng_Yuan1;~YuanTao_Fan2;~Yifei_Yang2;~Zili_Wang1;~Shusen_Wang1;~hai_zhao1", "aff": "Shanghai Jiaotong University;Hong Kong Polytechnic University;Beijing University of Posts and Telecommunications;Shanghai Jiaotong University;;Xiaohongshu;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;polyu.edu.hk;bupt.edu.cn;sjtu.edu.cn;;xiaohongshu.com;sjtu.edu.cn", "position": "PhD student;PhD student; Beijing University of Posts and Telecommunications;PhD student;;Researcher;Full Professor", "bibtex": "@inproceedings{\nyang2023refgpt,\ntitle={Ref{GPT}: Dialogue Generation of {GPT}, by {GPT}, and for {GPT}},\nauthor={Dongjie Yang and Ruifeng Yuan and YuanTao Fan and Yifei Yang and Zili Wang and Shusen Wang and hai zhao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sEvU6r8e7N}\n}", "github": "", "project": "", "reviewers": "7XKU;8x7N;7RQX", "site": "https://openreview.net/forum?id=sEvU6r8e7N", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;4;3", "reproducibility": "5;4;4", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-0997-9422;;;", "linkedin": ";;;;;;", "aff_unique_index": "0;1;2;0;3;0", "aff_unique_norm": "Shanghai Jiao Tong University;Hong Kong Polytechnic University;Beijing University of Posts and Telecommunications;Xiaohongshu", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.polyu.edu.hk;http://www.bupt.edu.cn/;https://www.xiaohongshu.com", "aff_unique_abbr": "SJTU;PolyU;BUPT;XHS", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Hong Kong SAR;Beijing", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "sFtyaTTtap", "title": "Towards Example-Based NMT with Multi-Levenshtein Transformers", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Retrieval-Augmented Machine Translation (RAMT) is attracting growing attention. This is because RAMT not only improves translation metrics, but is also assumed to implement some form of domain adaptation. In this contribution, we study another salient trait of RAMT, its ability to make translation decisions more transparent by allowing users to go back to examples that contributed to these decisions.\nFor this, we propose a novel architecture aiming to increase this transparency. This model adapts a retrieval-augmented version of the Levenshtein Transformer and makes it amenable to simultaneously edit multiple fuzzy matches found in memory. We discuss how to perform training and inference in this model, based on multi-way alignment algorithms and imitation learning. Our experiments show that editing several examples positively impacts translation scores, notably increasing the number of target spans that are copied from existing instances.", "keywords": "Neural Machine Translation;Non Autoregressive Transformers;Levenshtein Transformer;Translation Memory;Computer Assisted Translation", "primary_area": "", "supplementary_material": "", "author": "Maxime Bouthors;Josep Crego;Fran\u00e7ois Yvon", "authorids": "~Maxime_Bouthors1;~Josep_Crego1;~Fran\u00e7ois_Yvon2", "gender": ";M;M", "homepage": ";;http://cv.archives-ouvertes.fr/francois-yvon", "dblp": "359/0915;94/361;05/2701.html", "google_scholar": "CJsmvEYAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.fr/citations?hl=fr", "or_profile": "~Maxime_Bouthors1;~Josep_Crego1;~Fran\u00e7ois_Yvon2", "aff": ";SYSTRAN By ChapsVision;LISN-CNRS / Universit\u00e9 Paris Saclay", "aff_domain": ";chapsvision.com;lisn.fr", "position": ";Researcher;Senior Researcher", "bibtex": "@inproceedings{\nbouthors2023towards,\ntitle={Towards Example-Based {NMT} with Multi-Levenshtein Transformers},\nauthor={Maxime Bouthors and Josep Crego and Fran{\\c{c}}ois Yvon},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sFtyaTTtap}\n}", "github": "", "project": "", "reviewers": "Fysf;k9eN;4qBu", "site": "https://openreview.net/forum?id=sFtyaTTtap", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "4;3;3", "reproducibility": "4;2;3", "correctness": "5;3;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-1400-4902;0009-0006-8034-3581;0000-0002-7972-7442", "linkedin": ";;", "aff_unique_index": "0;1", "aff_unique_norm": "SYSTRAN;Universit\u00e9 Paris Saclay", "aff_unique_dep": "ChapsVision;LISN-CNRS", "aff_unique_url": "https://www.systran.net;https://www.universite-paris-saclay.fr", "aff_unique_abbr": "SYSTRAN;UPS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "France" }, { "id": "sGrYJQZMQo", "title": "Active Instruction Tuning: Improving Cross-Task Generalization by Training on Prompt Sensitive Tasks", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Instruction tuning (IT) achieves impressive zero-shot generalization results by training large language models (LLMs) on a massive amount of diverse tasks with instructions. However, how to select new tasks to improve the performance and generalizability of IT models remains an open question. Training on all existing tasks is impractical due to prohibiting computation requirements, and randomly selecting tasks can lead to suboptimal performance. In this work, we propose active instruction tuning based on prompt uncertainty, a novel framework to identify informative tasks, and then actively tune the models on the selected tasks. We represent the informativeness of new tasks with the disagreement of the current model outputs over perturbed prompts. Our experiments on NIV2 and Self-Instruct datasets demonstrate that our method consistently outperforms other baseline strategies for task selection, achieving better out-of-distribution generalization with fewer training tasks. Additionally, we introduce a task map that categorizes and diagnoses tasks based on prompt uncertainty and prediction probability. We discover that training on ambiguous (prompt-uncertain) tasks improves generalization while training on difficult (prompt-certain and low-probability) tasks offers no benefit, underscoring the importance of task selection for instruction tuning.", "keywords": "Instruction Tuning;Large Language Models;Active Learning", "primary_area": "", "supplementary_material": "", "author": "Po-Nien Kung;Fan Yin;Di Wu;Kai-Wei Chang;Nanyun Peng", "authorids": "~Po-Nien_Kung1;~Fan_Yin1;~Di_Wu14;~Kai-Wei_Chang1;~Nanyun_Peng1", "gender": "M;M;Not Specified;M;F", "homepage": ";;https://xiaowu0162.github.io/;http://kwchang.net;https://violetpeng.github.io/", "dblp": "278/2288;;52/328-54.html;18/2428;117/4036", "google_scholar": "wAjBsHAAAAAJ;klShdV0AAAAJ;vu1pDZgAAAAJ;fqDBtzYAAAAJ;XxRXvX0AAAAJ", "or_profile": "~Po-Nien_Kung1;~Fan_Yin1;~Di_Wu14;~Kai-Wei_Chang1;~Nanyun_Peng1", "aff": "UCLA Computer Science Department, University of California, Los Angeles;University of California, Los Angeles;University of California, Los Angeles;Amazon;University of California, Los Angeles", "aff_domain": "cs.ucla.edu;cs.ucla.edu;cs.ucla.edu;amazon.com;ucla.edu", "position": "PhD student;PhD student;PhD student;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nkung2023active,\ntitle={Active Instruction Tuning: Improving Cross-Task Generalization by Training on Prompt Sensitive Tasks},\nauthor={Po-Nien Kung and Fan Yin and Di Wu and Kai-Wei Chang and Nanyun Peng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sGrYJQZMQo}\n}", "github": "", "project": "", "reviewers": "Yx6p;wiNR;26oa;daei", "site": "https://openreview.net/forum?id=sGrYJQZMQo", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;3;4;5", "excitement": "3;4;4;3", "reproducibility": "3;4;5;3", "correctness": "4;3;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.5, "reproducibility_avg": 3.75, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-5365-0072;", "linkedin": ";fan-y-60b666180/;;kai-wei-chang-41239040;", "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of California, Los Angeles;Amazon", "aff_unique_dep": "Computer Science Department;Amazon.com, Inc.", "aff_unique_url": "https://www.ucla.edu;https://www.amazon.com", "aff_unique_abbr": "UCLA;Amazon", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Los Angeles;", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "sJUCMYtgIK", "title": "Augmenting Zero-Shot Dense Retrievers with Plug-in Mixture-of-Memories", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In this paper we improve the zero-shot generalization ability of language models via Mixture-Of-Memory Augmentation (MoMA), a mechanism that retrieves augmentation documents from multiple information corpora (external memories), with the option to ''plug in'' unseen memory at inference time.\nWe develop a joint learning mechanism that trains the augmentation component with latent labels derived from the end retrieval task, paired with hard negatives from the memory mixture.\nWe instantiate the model in a zero-shot dense retrieval setting by augmenting strong T5-based retrievers with MoMA.\nWith only T5-base, our model obtains strong zero-shot retrieval accuracy on the eighteen tasks included in the standard BEIR benchmark, outperforming some systems with larger model sizes.\nAs a plug-in-play model, our model can efficiently generalize to any unseen corpus, meanwhile achieving comparable or even better performance than methods relying on target-specific pretraining.\nOur analysis further illustrates the necessity of augmenting with mixture-of-memory for robust generalization, the benefits of augmentation learning, and how MoMA utilizes the plug-in memory at inference time without changing its parameters.\nOur code can be found at https://github.com/gesy17/MoMA.", "keywords": "Retrieval Augmented Language Model;Zero-shot Dense Retrieval;Mixture of Memory", "primary_area": "", "supplementary_material": "", "author": "Suyu Ge;Chenyan Xiong;Corby Rosset;Arnold Overwijk;Jiawei Han;Paul N. Bennett", "authorids": "~Suyu_Ge1;~Chenyan_Xiong1;~Corby_Rosset2;~Arnold_Overwijk1;~Jiawei_Han1;~Paul_N._Bennett1", "gender": ";M;;M;M;", "homepage": ";https://www.cs.cmu.edu/~cx/;;;http://hanj.cs.illinois.edu/;https://www.microsoft.com/en-us/research/people/pauben/publications/", "dblp": ";18/10886;;16/7404;h/JiaweiHan.html;33/6188", "google_scholar": ";E9BaEBYAAAAJ;;zKiMGDgAAAAJ;https://scholar.google.com.tw/citations?user=Kv9AbjMAAAAJ;AIncPrIAAAAJ", "or_profile": "~Suyu_Ge1;~Chenyan_Xiong1;~Corby_Rosset2;~Arnold_Overwijk1;~Jiawei_Han1;~Paul_N._Bennett1", "aff": ";Microsoft Research;;Meta;University of Illinois at Urbana-Champaign (UIUC);Microsoft", "aff_domain": ";research.microsoft.com;;meta.com;illinois.edu;microsoft.com", "position": ";Principal Researcher;;Engineering Manager;Full Professor;Researcher", "bibtex": "@inproceedings{\nge2023augmenting,\ntitle={Augmenting Zero-Shot Dense Retrievers with Plug-in Mixture-of-Memories},\nauthor={Suyu Ge and Chenyan Xiong and Corby Rosset and Arnold Overwijk and Jiawei Han and Paul N. Bennett},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sJUCMYtgIK}\n}", "github": "", "project": "", "reviewers": "i6oZ;3HsD;uvZi", "site": "https://openreview.net/forum?id=sJUCMYtgIK", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "4;3;4", "reproducibility": "4;4;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-3629-2696;0009-0006-7852-9651", "linkedin": ";;;;;paulnbennett/", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Microsoft;Meta;University of Illinois Urbana-Champaign", "aff_unique_dep": "Microsoft Research;Meta Platforms, Inc.;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://meta.com;https://illinois.edu", "aff_unique_abbr": "MSR;Meta;UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "sJb43ykK3o", "title": "RegaVAE: A Retrieval-Augmented Gaussian Mixture Variational Auto-Encoder for Language Modeling", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Retrieval-augmented language models show promise in addressing issues like outdated information and hallucinations in language models (LMs). However, current research faces two main problems: 1) determining what information to retrieve, and 2) effectively combining retrieved information during generation. We argue that valuable retrieved information should not only be related to the current source text but also consider the future target text, given the nature of LMs that model future tokens. Moreover, we propose that aggregation using latent variables derived from a compact latent space is more efficient than utilizing explicit raw text, which is limited by context length and susceptible to noise. Therefore, we introduce RegaVAE, a retrieval-augmented language model built upon the variational auto-encoder (VAE). It encodes the text corpus into a latent space, capturing current and future information from both source and target text. Additionally, we leverage the VAE to initialize the latent space and adopt the probabilistic form of the retrieval generation paradigm by expanding the Gaussian prior distribution into a Gaussian mixture distribution. Theoretical analysis provides an optimizable upper bound for RegaVAE. Experimental results on various datasets demonstrate significant improvements in text generation quality and hallucination removal.", "keywords": "Retrieval-Augmented Language Model;Hallucination;Variational Auto-Encoder", "primary_area": "", "supplementary_material": "", "author": "Jingcheng Deng;Liang Pang;Huawei Shen;Xueqi Cheng", "authorids": "~Jingcheng_Deng1;~Liang_Pang1;~Huawei_Shen1;~Xueqi_Cheng1", "gender": "M;M;M;M", "homepage": "https://scholar.google.com/citations?view_op=list_works&hl=zh-CN&hl=zh-CN&user=JBkt6EYAAAAJ;https://pl8787.github.io/;https://www.ict.ac.cn/sourcedb/cn/jssrck/201402/t20140221_4037648.html;https://people.ucas.ac.cn/~cxq?language=en", "dblp": "271/9292;37/11078;;44/912", "google_scholar": "https://scholar.google.com/citations?view_op=list_works;1dgQHBkAAAAJ;;hY8aLqAAAAAJ", "or_profile": "~Jingcheng_Deng1;~Liang_Pang1;~Huawei_Shen1;~Xueqi_Cheng1", "aff": "Institute of Computing Technology, CAS;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy", "aff_domain": "ict.cas.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn", "position": "MS student;Associate Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\ndeng2023regavae,\ntitle={Rega{VAE}: A Retrieval-Augmented Gaussian Mixture Variational Auto-Encoder for Language Modeling},\nauthor={Jingcheng Deng and Liang Pang and Huawei Shen and Xueqi Cheng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sJb43ykK3o}\n}", "github": "", "project": "", "reviewers": "RR3J;oBTQ;MX59", "site": "https://openreview.net/forum?id=sJb43ykK3o", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;2;3", "excitement": "2;3;3", "reproducibility": "3;4;3", "correctness": "2;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 9, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-1161-8546;0000-0002-1081-8119;", "linkedin": ";;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Computing Technology", "aff_unique_url": "http://www.ict.ac.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "sKdsBUAnts", "title": "Building Persona Consistent Dialogue Agents with Offline Reinforcement Learning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Maintaining a consistent persona is a key quality for any open domain dialogue system. Current state-of-the-art systems do this by training agents with supervised learning or online reinforcement learning (RL). However, systems trained with supervised learning often lack consistency as they are never punished for uttering contradictions. Additional training with RL can alleviate some of these issues, however the training process is expensive. Instead, we propose an offline RL framework to improve the persona consistency of dialogue systems. Our framework allows us to combine the advantages of previous methods as we can inexpensively train our model on existing data as in supervised learning, while punishing and rewarding specific utterances as in RL. We also introduce a simple importance sampling method to reduce the variance of importance weights in offline RL training which we call Variance-Reducing MLE-Initialized (VaRMI) importance sampling. Our automatic and human evaluations show that our framework improves both the persona consistency and dialogue quality of a state-of-the-art social chatbot.", "keywords": "NLP;persona consistent dialogue;offline reinforcement learning", "primary_area": "", "supplementary_material": "", "author": "Ryan Shea;Zhou Yu", "authorids": "~Ryan_Shea1;~Zhou_Yu1", "gender": ";F", "homepage": ";http://www.cs.columbia.edu/~zhouyu/", "dblp": "119/1123;83/3205", "google_scholar": "8WABvQ4AAAAJ;https://scholar.google.com.tw/citations?user=jee2Dy0AAAAJ", "or_profile": "~Ryan_Shea1;~Zhou_Yu1", "aff": "Columbia University;Columbia University", "aff_domain": "columbia.edu;columbia.edu", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\nshea2023building,\ntitle={Building Persona Consistent Dialogue Agents with Offline Reinforcement Learning},\nauthor={Ryan Shea and Zhou Yu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sKdsBUAnts}\n}", "github": "", "project": "", "reviewers": "vjbp;Fcrt;2MMu", "site": "https://openreview.net/forum?id=sKdsBUAnts", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "2;3;3", "reproducibility": "3;4;5", "correctness": "3;2;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 12, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": "ryan-shea-192595202/;", "aff_unique_index": "0;0", "aff_unique_norm": "Columbia University", "aff_unique_dep": "", "aff_unique_url": "https://www.columbia.edu", "aff_unique_abbr": "Columbia", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "sM9NTLjsUh", "title": "Failures Pave the Way: Enhancing Large Language Models through Tuning-free Rule Accumulation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large Language Models (LLMs) have showcased impressive performance. However, due to their inability to capture relationships among samples, these frozen LLMs inevitably keep repeating similar mistakes. In this work, we propose our Tuning-free Rule Accumulation (TRAN) framework, which guides LLMs in improving their performance by learning from previous mistakes. Considering data arrives sequentially, LLMs gradually accumulate rules from incorrect cases, forming a rule collection. These rules are then utilized by the LLMs to avoid making similar mistakes when processing subsequent inputs. Moreover, the rules remain independent of the primary prompts, seamlessly complementing prompt design strategies. Experimentally, we show that TRAN improves over recent baselines by a large margin.", "keywords": "large langauge models;zero-shot learning;prompt", "primary_area": "", "supplementary_material": "", "author": "Zeyuan Yang;Peng Li;Yang Liu", "authorids": "~Zeyuan_Yang3;~Peng_Li2;~Yang_Liu19", "gender": "M;M;M", "homepage": "https://miicheyang.github.io/;http://www.lpeng.net/;http://nlp.csai.tsinghua.edu.cn/~ly/", "dblp": "260/6331-2.html;83/6353-30;51/3710-5", "google_scholar": "k_qpTh4AAAAJ;hgYzkOQAAAAJ;https://scholar.google.com.hk/citations?user=lVhoKNcAAAAJ", "or_profile": "~Zeyuan_Yang3;~Peng_Li2;~Yang_Liu19", "aff": ", Tsinghua University;Tsinghua University;Tsinghua University", "aff_domain": "cs.tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn", "position": "MS student;Associate Professor;Professor", "bibtex": "@inproceedings{\nyang2023failures,\ntitle={Failures Pave the Way: Enhancing Large Language Models through Tuning-free Rule Accumulation},\nauthor={Zeyuan Yang and Peng Li and Yang Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sM9NTLjsUh}\n}", "github": "", "project": "", "reviewers": "Ucpj;ziX2;17gG", "site": "https://openreview.net/forum?id=sM9NTLjsUh", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "4;4;4", "reproducibility": "4;4;3", "correctness": "4;4;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-1374-5979;0000-0002-3087-242X", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "sOTbFCUrDj", "title": "A Generation-based Deductive Method for Math Word Problems", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Math word problems (MWP) involving advanced operators such as linear equation solver cannot be easily tackled by earlier MWP methods, because the existing generation methods suffer from repeated sub-expression generation and deductive methods are restricted to dealing with binary operations. This paper propose a new multivariate directed acyclic graph (mDAG) as an alternative to the generation methods' binary expression tree or the deductive methods' binary directed acyclic graph. Then to produce the topological ordering of mDAG, we propose a generation-based deductive (GeDe) model, which equips a generation model with a re-encoder to keep the deductive property but avoid the expensive enumeration of the deductive methods. GeDe performs well on math problems with many operators on the widely used benchmarks as well as solving multivariate operators on our own CMWPA benchmark. Our code is available at https://github.com/hyx1999/GeDe", "keywords": "math word problem;natural language processing", "primary_area": "", "supplementary_material": "", "author": "Yuxuan Hu;Jing Zhang;Haoyang Li;Cuiping Li;Hong Chen", "authorids": "~Yuxuan_Hu2;~Jing_Zhang24;~Haoyang_Li2;~Cuiping_Li1;~Hong_Chen5", "gender": "M;;M;F;F", "homepage": "https://hyx1999.github.io/;https://xiaojingzi.github.io/;http://www.lhystrive.com;;", "dblp": ";05/3499-1.html;;03/6827-1;52/4150-1", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;T7Wa3GQAAAAJ;;;", "or_profile": "~Yuxuan_Hu2;~Jing_Zhang24;~Haoyang_Li2;~Cuiping_Li1;~Hong_Chen5", "aff": "Renmin University of China;Renmin University of China;Renmin University of China;Renmin University of China;Renmin University of China", "aff_domain": "ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;ruc.edu.cn;ruc.edu.cn", "position": "PhD student;Associate Professor;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nhu2023a,\ntitle={A Generation-based Deductive Method for Math Word Problems},\nauthor={Yuxuan Hu and Jing Zhang and Haoyang Li and Cuiping Li and Hong Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sOTbFCUrDj}\n}", "github": "", "project": "", "reviewers": "asVd;SjPM;vqVh", "site": "https://openreview.net/forum?id=sOTbFCUrDj", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;4", "excitement": "4;3;4", "reproducibility": "3;3;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0003-1494-7617;;;;", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Renmin University of China", "aff_unique_dep": "", "aff_unique_url": "http://www.ruc.edu.cn", "aff_unique_abbr": "RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "sOngusZCsN", "title": "Knowledge-Augmented Language Model Verification", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recent Language Models (LMs) have shown impressive capabilities in generating texts with the knowledge internalized in parameters. Yet, LMs often generate the factually incorrect responses to the given queries, since their knowledge may be inaccurate, incomplete, and outdated. To address this problem, previous works propose to augment LMs with the knowledge retrieved from an external knowledge source. However, such approaches often show suboptimal text generation performance due to two reasons: 1) the model may fail to retrieve the knowledge relevant to the given query, or 2) the model may not faithfully reflect the retrieved knowledge in the generated text. To overcome these, we propose to verify the output and the knowledge of the knowledge-augmented LMs with a separate verifier, which is a small LM that is trained to detect those two types of errors through instruction-finetuning. Then, when the verifier recognizes an error, we can rectify it by either retrieving new knowledge or generating new text. Further, we use an ensemble of the outputs from different instructions with a single verifier to enhance the reliability of the verification processes. We validate the effectiveness of the proposed verification steps on multiple question answering benchmarks, whose results show that the proposed verifier effectively identifies retrieval and generation errors, allowing LMs to provide more factually correct outputs. Our code is available at https://github.com/JinheonBaek/KALMV.", "keywords": "Knowledge-Augmented Language Models;Verification", "primary_area": "", "supplementary_material": "", "author": "Jinheon Baek;Soyeong Jeong;Minki Kang;Jong C. Park;Sung Ju Hwang", "authorids": "~Jinheon_Baek1;~Soyeong_Jeong1;~Minki_Kang1;~Jong_C._Park2;~Sung_Ju_Hwang1", "gender": "M;F;M;M;", "homepage": "https://jinheonbaek.github.io;https://starsuzi.github.io/;https://nardien.github.io;http://nlpcl.kaist.ac.kr/prof;", "dblp": "262/6003;164/0452;232/2406;73/5376;", "google_scholar": "U1FHaSUAAAAJ;0wnquCEAAAAJ;90G751oAAAAJ;XP5heVgAAAAJ;", "or_profile": "~Jinheon_Baek1;~Soyeong_Jeong1;~Minki_Kang1;~Jong_C._Park2;~Sung_Ju_Hwang1", "aff": "Microsoft Research;Korea Advanced Institute of Science & Technology;AITRICS;Korea Advanced Institute of Science & Technology;", "aff_domain": "microsoft.com;kaist.ac.kr;aitrics.com;kaist.ac.kr;", "position": "Intern;PhD student;Researcher;Full Professor;", "bibtex": "@inproceedings{\nbaek2023knowledgeaugmented,\ntitle={Knowledge-Augmented Language Model Verification},\nauthor={Jinheon Baek and Soyeong Jeong and Minki Kang and Jong C. Park and Sung Ju Hwang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sOngusZCsN}\n}", "github": "", "project": "", "reviewers": "rWYL;QSUP;Fsm3", "site": "https://openreview.net/forum?id=sOngusZCsN", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;4", "excitement": "3;3;4", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-9367-560X;;;0000-0002-8859-5111;", "linkedin": "jinheon-baek-8100a8144/;soyeong-jeong-900155141;;;", "aff_unique_index": "0;1;2;1", "aff_unique_norm": "Microsoft;Korea Advanced Institute of Science and Technology;AITRICS", "aff_unique_dep": "Microsoft Research;;", "aff_unique_url": "https://www.microsoft.com/en-us/research;https://www.kaist.ac.kr;https://www.aitrics.com", "aff_unique_abbr": "MSR;KAIST;AITRICS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1", "aff_country_unique": "United States;South Korea" }, { "id": "sPB354cbmL", "title": "Improved Training of Deep Text Clustering", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "The classical deep clustering optimization methods basically leverage information such as clustering centers, mutual information, and distance metrics to construct implicit generalized labels to establish information feedback (weak supervision) and thus optimize the deep model. However, the resulting generalized labels have different degrees of errors in the whole clustering process due to the limitation of clustering accuracy, which greatly interferes with the clustering process. To this end, this paper proposes a general deep clustering optimization method from the perspective of empirical risk minimization, using the correlation relationship between the samples. Experiments on two classical deep clustering methods demonstrate the necessity and effectiveness of the method. Code is available at https://github.com/yangzonghao1024/DCGLU.", "keywords": "Text Clustering;Deep Clustering", "primary_area": "", "supplementary_material": "", "author": "Zonghao Yang;Wenpeng Hu;Yushan Tan;Zhunchen Luo", "authorids": "~Zonghao_Yang1;~Wenpeng_Hu1;~Yushan_Tan1;~Zhunchen_Luo2", "gender": "M;M;F;M", "homepage": ";;;https://dblp.org/pid/82/11518.html", "dblp": ";191/6009;;82/11518.html", "google_scholar": ";YrTszToAAAAJ;;https://scholar.google.co.uk/citations?user=-4u9k60AAAAJ", "or_profile": "~Zonghao_Yang1;~Wenpeng_Hu1;~Yushan_Tan1;~Zhunchen_Luo2", "aff": "Information Research Center of Military Science, Academy of Military Science of the People's Liberation Army;Academy of Military Science;Information Research Center of Military Science, Academy of Military Science of the People's Liberation Army;National University of Defense Technology", "aff_domain": "163.com;ams.edu;163.com;nudt.edu.cn", "position": "MS student;Assistant Professor;Full Professor;Researcher", "bibtex": "@inproceedings{\nyang2023improved,\ntitle={Improved Training of Deep Text Clustering},\nauthor={Zonghao Yang and Wenpeng Hu and Yushan Tan and Zhunchen Luo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sPB354cbmL}\n}", "github": "", "project": "", "reviewers": "ynRi;kABq;xqXP", "site": "https://openreview.net/forum?id=sPB354cbmL", "pdf_size": 0, "rating": "1;1;1", "confidence": "4;4;3", "excitement": "3;3;2", "reproducibility": "3;4;3", "correctness": "4;4;2", "rating_avg": 1.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-1075-3386;;0000-0002-5544-1239;", "linkedin": ";;;", "aff_unique_index": "0;1;0;2", "aff_unique_norm": "Academy of Military Science of the People's Liberation Army;Academy of Military Science;National University of Defense Technology", "aff_unique_dep": "Information Research Center of Military Science;;", "aff_unique_url": ";;http://www.nudt.edu.cn/", "aff_unique_abbr": ";;NUDT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "sPpft5DQJN", "title": "Interpreting Embedding Spaces by Conceptualization", "track": "main", "status": "Long Main", "tldr": "", "abstract": "One of the main methods for computational interpretation of a text is mapping it into a vector in some embedding space. Such vectors can then be used for a variety of textual processing tasks. Recently, most embedding spaces are a product of training large language models (LLMs). One major drawback of this type of representation is their incomprehensibility to humans. Understanding the embedding space is crucial for several important needs, including the need to debug the embedding method and compare it to alternatives, and the need to detect biases hidden in the model.\nIn this paper, we present a novel method of understanding embeddings by transforming a latent embedding space into a comprehensible \nconceptual space. We present an algorithm for deriving a conceptual space with dynamic on-demand granularity. We devise a new evaluation method, using either human rater or LLM-based raters, to show that the conceptualized vectors indeed represent the semantics of the original latent ones. We show the use of our method for various tasks, including comparing the semantics of alternative models and tracing the layers of the LLM. The code is available online https://github.com/adiSimhi/Interpreting-Embedding-Spaces-by-Conceptualization.", "keywords": "Interpretability;knowledge tracing", "primary_area": "", "supplementary_material": "", "author": "Adi Simhi;Shaul Markovitch", "authorids": "~Adi_Simhi1;~Shaul_Markovitch1", "gender": ";M", "homepage": ";http://www.cs.technion.ac.il/~shaulm/", "dblp": "329/0725.html;m/ShaulMarkovitch", "google_scholar": ";https://scholar.google.com/citations?hl=en", "or_profile": "~Adi_Simhi1;~Shaul_Markovitch1", "aff": "Computer Science Department, Technion - Israel Institute of Technology;Technion - Israel Institute of Technology, Technion", "aff_domain": "cs.technion.ac.il;technion.ac.il", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\nsimhi2023interpreting,\ntitle={Interpreting Embedding Spaces by Conceptualization},\nauthor={Adi Simhi and Shaul Markovitch},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sPpft5DQJN}\n}", "github": "", "project": "", "reviewers": "nPyK;EU1c;iTU3", "site": "https://openreview.net/forum?id=sPpft5DQJN", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;5;3", "excitement": "4;4;4", "reproducibility": "2;4;4", "correctness": "3;4;3", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-1730-7087;0000-0003-1485-996X", "linkedin": "adisimhi/;", "aff_unique_index": "0;0", "aff_unique_norm": "Technion - Israel Institute of Technology", "aff_unique_dep": "Computer Science Department", "aff_unique_url": "https://www.technion.ac.il", "aff_unique_abbr": "Technion", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Israel" }, { "id": "sQ1iTreITk", "title": "Density-Aware Prototypical Network for Few-Shot Relation Classification", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In recent years, few-shot relation classification has evoked many research interests. Yet a more challenging problem, i.e. none-of-the-above (NOTA), is under-explored. Existing works mainly regard NOTA as an extra class and treat it the same as known relations. However, such a solution ignores the overall instance distribution, where NOTA instances are actually outliers and distributed unnaturally compared with known ones. In this paper, we propose a density-aware prototypical network (D-Proto) to treat various instances distinctly. Specifically, we design unique training objectives to separate known instances and isolate NOTA instances, respectively. This produces an ideal instance distribution, where known instances are dense yet NOTAs have a small density. Moreover, we propose a NOTA detection module to further enlarge the density of known samples, and discriminate NOTA and known samples accurately. Experimental results demonstrate that the proposed method outperforms strong baselines with robustness towards various NOTA rates. The code will be made public after the paper is accepted.", "keywords": "Few-shot relation classification;None-of-the-above challenge;Density estimation", "primary_area": "", "supplementary_material": "", "author": "Jianfeng Wu;Mengting Hu;Yike Wu;Bingzhe Wu;Yalan Xie;Mingming Liu;Renhong Cheng", "authorids": "~Jianfeng_Wu1;~Mengting_Hu1;~Yike_Wu2;~Bingzhe_Wu1;~Yalan_Xie1;~Mingming_Liu1;~Renhong_Cheng1", "gender": "M;F;;M;F;F;M", "homepage": "https://github.com/Pisces-29;https://hmt2014.github.io/homepage/;https://yikewu.tech/;;https://www.researchgate.net/profile/Yalan-Xie;;https://cc.nankai.edu.cn/", "dblp": ";;246/5764;207/4843;;;", "google_scholar": ";cYxJCNIAAAAJ;JOPICP0AAAAJ;_3hgtf8AAAAJ;;;", "or_profile": "~Jianfeng_Wu1;~Mengting_Hu1;~Yike_Wu2;~Bingzhe_Wu1;~Yalan_Xie1;~Mingming_Liu1;~Renhong_Cheng1", "aff": "Nankai University;Nankai University;Nankai University;Tencent AI Lab;Nankai University;Nankai University;Nankai University", "aff_domain": "nankai.edu.cn;nankai.edu.cn;nankai.edu.cn;tencent.com;nankai.edu.cn;nankai.edu.cn;nankai.edu.cn", "position": "MS student;Assistant Professor;Lecturer;Researcher;MS student;Lecturer;Full Professor", "bibtex": "@inproceedings{\nwu2023densityaware,\ntitle={Density-Aware Prototypical Network for Few-Shot Relation Classification},\nauthor={Jianfeng Wu and Mengting Hu and Yike Wu and Bingzhe Wu and Yalan Xie and Mingming Liu and Renhong Cheng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sQ1iTreITk}\n}", "github": "", "project": "", "reviewers": "sq7x;sQUS;vjzP", "site": "https://openreview.net/forum?id=sQ1iTreITk", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "4;3;4", "reproducibility": "4;3;4", "correctness": "4;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 9, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-1536-5400;0000-0001-7384-8836;;;;", "linkedin": ";;;;;;", "aff_unique_index": "0;0;0;1;0;0;0", "aff_unique_norm": "Nankai University;Tencent", "aff_unique_dep": ";Tencent AI Lab", "aff_unique_url": "http://www.nankai.edu.cn;https://ai.tencent.com", "aff_unique_abbr": "NKU;Tencent AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "sRHVpB7GE6", "title": "Fast and Accurate Factual Inconsistency Detection Over Long Documents", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Generative AI models exhibit remarkable potential; however, hallucinations across various tasks present a significant challenge, particularly for longer inputs that current approaches struggle to address effectively. We introduce SCALE (Source Chunking Approach for Large-scale inconsistency Evaluation), a task-agnostic model for detecting factual inconsistencies using a novel chunking strategy. Specifically, SCALE is a Natural Language Inference (NLI) based model that uses large text chunks to condition over long texts. This approach achieves state-of-the-art performance in factual inconsistency detection for diverse tasks and long inputs. Additionally, we leverage the chunking mechanism and employ a novel algorithm to explain SCALE's decisions through relevant source sentence retrieval. Our evaluations reveal that SCALE outperforms existing methods on both standard benchmarks and a new long-form dialogue dataset ScreenEval we constructed. Moreover, SCALE surpasses competitive systems in efficiency and model explanation evaluations. We have released our code and data publicly to GitHub.", "keywords": "Hallucination detection;Inconsistency detection;hallucination;automatic evaluation;metric;long document;efficient;fast;accurate;long;natural language generation;task agnostic;nlp;nlg", "primary_area": "", "supplementary_material": "", "author": "Barrett Martin Lattimer;Patrick CHen;Xinyuan Zhang;Yi Yang", "authorids": "~Barrett_Martin_Lattimer1;~Patrick_CHen1;~Xinyuan_Zhang2;~Yi_Yang16", "gender": "M;M;M;M", "homepage": ";https://patrick-h-chen.github.io/;;https://yiyangnlp.github.io/", "dblp": ";222/2938.html;22/4397-1;", "google_scholar": "hdbKdGMAAAAJ;;hat9GwIAAAAJ;u1s2HN4AAAAJ", "or_profile": "~Barrett_Martin_Lattimer1;~Patrick_CHen1;~Xinyuan_Zhang2;~Yi_Yang16", "aff": "ASAPP;;ASAPP;ASAPP Inc", "aff_domain": "asapp.com;;asapp.com;asapp.com", "position": "Researcher;;Researcher;Director of NLP", "bibtex": "@inproceedings{\nlattimer2023fast,\ntitle={Fast and Accurate Factual Inconsistency Detection Over Long Documents},\nauthor={Barrett Martin Lattimer and Patrick CHen and Xinyuan Zhang and Yi Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sRHVpB7GE6}\n}", "github": "", "project": "", "reviewers": "xTxN;p7Jm;2Gpy", "site": "https://openreview.net/forum?id=sRHVpB7GE6", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "2;3;4", "reproducibility": "3;4;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;xinyuan-zhang-duke/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "ASAPP", "aff_unique_dep": "", "aff_unique_url": "https://www.asapp.com", "aff_unique_abbr": "ASAPP", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "sS02W7Sloj", "title": "Diversify Question Generation with Retrieval-Augmented Style Transfer", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Given a textual passage and an answer, humans are able to ask questions with various expressions, but this ability is still challenging for most question generation (QG) systems. Existing solutions mainly focus on the internal knowledge within the given passage or the semantic word space for diverse content planning. These methods, however, have not considered the potential of external knowledge for expression diversity. To bridge this gap, we propose RAST, a framework for Retrieval-Augmented Style Transfer, where the objective is to utilize the style of diverse templates for question generation. For training RAST, we develop a novel Reinforcement Learning (RL) based approach that maximizes a weighted combination of diversity reward and consistency reward. Here, the consistency reward is computed by a Question-Answering (QA) model, whereas the diversity reward measures how much the final output mimics the retrieved template. Experimental results show that our method outperforms previous diversity-driven baselines on diversity while being comparable in terms of consistency scores. Our code is available at \\url{https://github.com/gouqi666/RAST}.", "keywords": "Question Generation; Retrieval Augmented Generation;Style Transfer", "primary_area": "", "supplementary_material": "", "author": "Qi Gou;Zehua Xia;Bowen Yu;Haiyang Yu;Fei Huang;Yongbin Li;Nguyen Cam-Tu", "authorids": "~Qi_Gou1;~Zehua_Xia1;~Bowen_Yu3;~Haiyang_Yu3;~Fei_Huang2;~Yongbin_Li2;~Nguyen_Cam-Tu2", "gender": "M;M;M;M;M;M;F", "homepage": "https://gouqi666.github.io/;https://dbm1.github.io/;https://yubowen-ph.github.io/;;https://sites.google.com/view/fei-huang;https://yongbin-li.github.io/;https://ai.nju.edu.cn/main.htm", "dblp": ";;95/10266-2.html;90/6643-3;h/FeiHuang.html;;14/5079.html", "google_scholar": ";;oHoEp34AAAAJ;VhWV-1wAAAAJ;9r98PpoAAAAJ;xF5VrokAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Qi_Gou1;~Zehua_Xia1;~Bowen_Yu3;~Haiyang_Yu3;~Fei_Huang2;~Yongbin_Li2;~Nguyen_Cam-Tu2", "aff": "Nanjing University;Nanjing University;Alibaba Group;Alibaba Group;Alibaba Group US;Alibaba Group;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;nju.edu.cn", "position": "MS student;MS student;Researcher;Researcher;Senior Research Director;Researcher;Associate Professor", "bibtex": "@inproceedings{\ngou2023diversify,\ntitle={Diversify Question Generation with Retrieval-Augmented Style Transfer},\nauthor={Qi Gou and Zehua Xia and Bowen Yu and Haiyang Yu and Fei Huang and Yongbin Li and Nguyen Cam-Tu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sS02W7Sloj}\n}", "github": "", "project": "", "reviewers": "nwXM;Tuqy;Yvj5", "site": "https://openreview.net/forum?id=sS02W7Sloj", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;2;1", "excitement": "3;3;4", "reproducibility": "3;4;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 2.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-6804-1859;;;;", "linkedin": ";;;;fei-huang-cas-cmu;;", "aff_unique_index": "0;0;1;1;1;1;0", "aff_unique_norm": "Nanjing University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.nju.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "Nanjing U;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "id": "sTeoqvTH2j", "title": "HiCL: Hierarchical Contrastive Learning of Unsupervised Sentence Embeddings", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In this paper, we propose a hierarchical contrastive learning framework, HiCL, which considers local segment-level and global sequence-level relationships to improve training efficiency and effectiveness. \nTraditional methods typically encode a sequence in its entirety for contrast with others, often neglecting local representation learning, leading to challenges in generalizing to shorter texts. Conversely, HiCL improves its effectiveness by dividing the sequence into several segments and employing both local and global contrastive learning to model segment-level and sequence-level relationships. \nFurther, considering the quadratic time complexity of transformers over input tokens, HiCL boosts training efficiency by first encoding short segments and then aggregating them to obtain the sequence representation.\nExtensive experiments show that HiCL enhances the prior top-performing SNCSE model across seven extensively evaluated STS tasks, with an average increase of +0.2% observed on $BERT_{large}$ and +0.44% on $RoBERTa_{large}$.", "keywords": "Contrastive Learning;Semantic Textual Similarity;Hierarchical Training", "primary_area": "", "supplementary_material": "", "author": "Zhuofeng Wu;Chaowei Xiao;V.G.Vinod Vydiswaran", "authorids": "~Zhuofeng_Wu1;~Chaowei_Xiao2;~V.G.Vinod_Vydiswaran1", "gender": ";M;M", "homepage": "https://cserxy.github.io/;;https://xiaocw11.github.io/", "dblp": "153/7524-1;67/6469.html;150/3317", "google_scholar": "bqinFgYAAAAJ;;Juoqtj8AAAAJ", "or_profile": "~Zhuofeng_Wu1;~V.G.Vinod_Vydiswaran1;~chaowei_xiao1", "aff": "University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;Arizona State University", "aff_domain": "umich.edu;umich.edu;asu.edu", "position": "PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nwu2023hicl,\ntitle={Hi{CL}: Hierarchical Contrastive Learning of Unsupervised Sentence Embeddings},\nauthor={Zhuofeng Wu and Chaowei Xiao and V.G.Vinod Vydiswaran},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sTeoqvTH2j}\n}", "github": "", "project": "", "reviewers": "pM2M;etFb;2q7u;69hi", "site": "https://openreview.net/forum?id=sTeoqvTH2j", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;3;4;2", "excitement": "2;3;3;4", "reproducibility": "3;4;3;4", "correctness": "2;4;3;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.5, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-3775-2436;0000-0002-3122-1936;0000-0002-7043-4926", "linkedin": "zhuofeng-wu-914193127/;;", "aff_unique_index": "0;0;1", "aff_unique_norm": "University of Michigan;Arizona State University", "aff_unique_dep": ";", "aff_unique_url": "https://www.umich.edu;https://www.asu.edu", "aff_unique_abbr": "UM;ASU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "sVSeGRCZT8", "title": "Three Stream Based Multi-level Event Contrastive Learning for Text-Video Event Extraction", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Text-video based multimodal event extraction refers to identifying event information from the given text-video pairs. Existing methods predominantly utilize video appearance features (VAF) and text sequence features (TSF) as input information. Some of them employ contrastive learning to align VAF with the event types extracted from TSF. However, they disregard the motion representations in videos and the optimization of contrastive objective could be misguided by the background noise from RGB frames. We observe that the same event triggers correspond to similar motion trajectories, which are hardly affected by the background noise. Moviated by this, we propose a Three Stream Multimodal Event Extraction framework (TSEE) that simultaneously utilizes the features of text sequence and video appearance, as well as the motion representations to enhance the event extraction capacity. Firstly, we extract the optical flow features (OFF) as motion representations from videos to incorporate with VAF and TSF. Then we introduce a Multi-level Event Contrastive Learning module to align the embedding space between OFF and event triggers, as well as between event triggers and types. Finally, a Dual Querying Text module is proposed to enhance the interaction between modalities. Experimental results show that TSEE outperforms the state-of-the-art methods, which demonstrates its superiority.", "keywords": "Event extraction; Multimodal", "primary_area": "", "supplementary_material": "", "author": "Jiaqi Li;Chuanyi Zhang;Miaozeng Du;Dehai Min;Yongrui Chen;Guilin Qi", "authorids": "~Jiaqi_Li5;~Chuanyi_Zhang1;~Miaozeng_Du1;~Dehai_Min1;~Yongrui_Chen1;~Guilin_Qi2", "gender": "M;;M;M;M;M", "homepage": "https://scholar.google.com/citations?user=dhHKxpoAAAAJ&hl=en;;https://github.com/DiWHNJ;https://zhishanq.github.io/;;https://cse.seu.edu.cn/_s191/2023/1024/c23024a469541/page.psp", "dblp": ";87/6424;;342/7720;143/0948-2.html;71/5935", "google_scholar": "dhHKxpoAAAAJ;;;https://scholar.google.com/citations?hl=en;8ZjIHyEAAAAJ;", "or_profile": "~Jiaqi_Li5;~Chuanyi_Zhang1;~Miaozeng_Du1;~Dehai_Min1;~Yongrui_Chen1;~Guilin_Qi2", "aff": "Southeast University;Nanjing University of Science and Technology;Huaihua University;Monash University;Southeast University;Southeast University", "aff_domain": "seu.edu.cn;njust.edu.cn;hhu.edu.cn;monash.edu;seu.edu.cn;seu.edu.cn", "position": "PhD student;PhD student;Undergrad student;MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nli2023three,\ntitle={Three Stream Based Multi-level Event Contrastive Learning for Text-Video Event Extraction},\nauthor={Jiaqi Li and Chuanyi Zhang and Miaozeng Du and Dehai Min and Yongrui Chen and Guilin Qi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sVSeGRCZT8}\n}", "github": "", "project": "", "reviewers": "ZJRe;aGn9;necu", "site": "https://openreview.net/forum?id=sVSeGRCZT8", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;5", "excitement": "3;3;4", "reproducibility": "4;4;5", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-4559-9868;0000-0001-8724-5796;;0009-0003-8528-6916;0000-0001-8934-3920;0000-0003-0150-7236", "linkedin": ";;;;;", "aff_unique_index": "0;1;2;3;0;0", "aff_unique_norm": "Southeast University;Nanjing University of Science and Technology;Huaihua University;Monash University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.seu.edu.cn/;http://www.nust.edu.cn/;http://www.hhu.edu.cn;https://www.monash.edu", "aff_unique_abbr": "SEU;NUST;;Monash", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "China;Australia" }, { "id": "sX4yqbYlRm", "title": "Beneath Surface Similarity: Large Language Models Make Reasonable Scientific Analogies after Structure Abduction", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The vital role of analogical reasoning in human cognition allows us to grasp novel concepts by linking them with familiar ones through shared relational structures. Despite the attention previous research has given to word analogies, this work suggests that Large Language Models (LLMs) often overlook the structures that underpin these analogies, raising questions about the efficacy of word analogies as a measure of analogical reasoning skills akin to human cognition. In response to this, our paper introduces a task of analogical structure abduction, grounded in cognitive psychology, designed to abduce structures that form an analogy between two systems. In support of this task, we establish a benchmark called SCAR, containing 400 scientific analogies from 13 distinct fields, tailored for evaluating analogical reasoning with structure abduction. The empirical evidence underlines the continued challenges faced by LLMs, including ChatGPT and GPT-4, in mastering this task, signifying the need for future exploration to enhance their abilities.", "keywords": "Analogical Reasoning;Large Language Models;Resources and Benchmark", "primary_area": "", "supplementary_material": "", "author": "Siyu Yuan;Jiangjie Chen;Xuyang Ge;Yanghua Xiao;Deqing Yang", "authorids": "~Siyu_Yuan2;~Jiangjie_Chen1;~Xuyang_Ge1;~Yanghua_Xiao1;~Deqing_Yang1", "gender": "M;M;;M;F", "homepage": "https://jiangjiechen.github.io;https://dest1n1s.github.io/;;http://kw.fudan.edu.cn/people/yangdeqing/;https://siyuyuan.github.io/", "dblp": "236/6076;253/2381;96/999;01/2462.html;237/8189-01", "google_scholar": "https://scholar.google.com.hk/citations?user=XarNs8oAAAAJ;Eue1seYAAAAJ;https://scholar.google.com/citations?hl=zh-CN;uZdQxkwAAAAJ;6JMfD44AAAAJ", "or_profile": "~Jiangjie_Chen1;~Xuyang_Ge1;~Yanghua_Xiao1;~Deqing_Yang1;~siyu_Yuan1", "aff": "ByteDance;Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "bytedance.com;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "Intern;Undergrad student;Full Professor;Associate Professor;PhD student", "bibtex": "@inproceedings{\nyuan2023beneath,\ntitle={Beneath Surface Similarity: Large Language Models Make Reasonable Scientific Analogies after Structure Abduction},\nauthor={Siyu Yuan and Jiangjie Chen and Xuyang Ge and Yanghua Xiao and Deqing Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sX4yqbYlRm}\n}", "github": "", "project": "", "reviewers": "1Lsr;SVGF;6uTV", "site": "https://openreview.net/forum?id=sX4yqbYlRm", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "4;4;2", "reproducibility": "4;4;4", "correctness": "4;4;2", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 9, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-8403-9591;0000-0002-1390-3861;0000-0001-8161-6429", "linkedin": ";;;;", "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "ByteDance;Fudan University", "aff_unique_dep": ";", "aff_unique_url": "https://www.bytedance.com;https://www.fudan.edu.cn", "aff_unique_abbr": "ByteDance;Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "sXErPfdA7Q", "title": "Document-Level Machine Translation with Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) such as ChatGPT can produce coherent, cohesive, relevant, and fluent answers for various natural language processing (NLP) tasks. Taking document-level machine translation (MT) as a testbed, this paper provides an in-depth evaluation of LLMs' ability on discourse modeling. The study focuses on three aspects: 1) Effects of Context-Aware Prompts, where we investigate the impact of different prompts on document-level translation quality and discourse phenomena; 2) Comparison of Translation Models, where we compare the translation performance of ChatGPT with commercial MT systems and advanced document-level MT methods; 3) Analysis of Discourse Modelling Abilities, where we further probe discourse knowledge encoded in LLMs and shed light on impacts of training techniques on discourse modeling. By evaluating on a number of benchmarks, we surprisingly find that LLMs have demonstrated superior performance and show potential to become a new paradigm for document-level translation: 1) leveraging their powerful long-text modeling capabilities, GPT-3.5 and GPT-4 outperform commercial MT systems in terms of human evaluation; 2) GPT-4 demonstrates a stronger ability for probing linguistic knowledge than GPT-3.5. This work highlights the challenges and opportunities of LLMs for MT, which we hope can inspire the future design and evaluation of LLMs (We release our data and annotations at https://github.com/longyuewangdcu/Document-MT-LLM).", "keywords": "Large Language Models;Document-Level Machine Translation;Evaluation and Explaination", "primary_area": "", "supplementary_material": "", "author": "Longyue Wang;Chenyang Lyu;Tianbo Ji;Zhirui Zhang;Dian Yu;Shuming Shi;Zhaopeng Tu", "authorids": "~Longyue_Wang3;~Chenyang_Lyu1;~Tianbo_Ji1;~Zhirui_Zhang1;~Dian_Yu3;~Shuming_Shi1;~Zhaopeng_Tu1", "gender": "M;M;M;M;F;M;M", "homepage": "http://longyuewang.com/;https://lyuchenyang.github.io;https://tianboji.github.io/;;https://sites.google.com/site/yudiandoris/;;http://www.zptu.net", "dblp": "127/3421;248/1663;227/8987;202/1838;136/8648-1.html;s/ShumingShi;71/9281", "google_scholar": "r1ctChkAAAAJ;;mLc1OxUAAAAJ;C8Ylo7sAAAAJ;ERdzqyYAAAAJ;Lg31AKMAAAAJ;IvE2zRgAAAAJ", "or_profile": "~Longyue_Wang3;~Chenyang_Lyu1;~Tianbo_Ji1;~Zhirui_Zhang1;~Dian_Yu3;~Shuming_Shi1;~Zhaopeng_Tu1", "aff": "Tencent AI Lab;Dublin City University;Nantong University;Tencent AI Lab;Tencent AI Lab;Tencent AI Lab;Tencent AI Lab", "aff_domain": "tencent.com;dcu.ie;ntu.edu.cn;tencent.com;tencent.com;tencent.com;tencent.com", "position": "Senior Researcher;PhD student;Lecturer;Senior Researcher;NLP researcher;Principal Researcher;Principal Researcher", "bibtex": "@inproceedings{\nwang2023documentlevel,\ntitle={Document-Level Machine Translation with Large Language Models},\nauthor={Longyue Wang and Chenyang Lyu and Tianbo Ji and Zhirui Zhang and Dian Yu and Shuming Shi and Zhaopeng Tu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sXErPfdA7Q}\n}", "github": "", "project": "", "reviewers": "Dtpc;AmZG;GjYL", "site": "https://openreview.net/forum?id=sXErPfdA7Q", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;4;3", "excitement": "4;4;4", "reproducibility": "3;4;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-9062-6183;;0000-0003-0143-6220;;;;", "linkedin": "vincentwang0229/;;;;;;tuzhaopeng", "aff_unique_index": "0;1;2;0;0;0;0", "aff_unique_norm": "Tencent;Dublin City University;Nantong University", "aff_unique_dep": "Tencent AI Lab;;", "aff_unique_url": "https://ai.tencent.com;https://www.dcu.ie;https://www.ntu.edu.cn/", "aff_unique_abbr": "Tencent AI Lab;DCU;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0;0", "aff_country_unique": "China;Ireland" }, { "id": "sYYRTVaG3n", "title": "Meta-Learning of Prompt Generation for Lightweight Prompt Engineering on Language-Model-as-a-Service", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recently, many companies have been providing the capabilities of large language models as services.\nThese Language-Model-as-a-Service (LMaaS) offerings support a variety of user tasks through in-context learning from prompts, which include instructions and demonstrations of the task.\nHowever, for users, manually crafting prompts or running automatic prompt tuning methods themselves can be demanding.\nDespite these challenges, LMaaS providers do not offer automatic prompt engineering methods as part of their services.\nOne of the major obstacles to deploying them on an LMaaS is the heavy computational costs associated with automatic prompt engineering methods.\nThese methods are typically designed to iterate through tens of thousands of examples, which impose unaffordable overheads for LMaaS providers.\nIn this paper, we introduce MetaL-Prompt, a novel lightweight automatic prompt generation method for LMaaS. MetaL-Prompt meta-trains a prompt generation model (PGM) to enable robust learning by the language model from the contexts created by the generated prompts (i.e., in-context learning). Thanks to our meta-learning approach, a PGM can generate prompts for unseen tasks without requiring additional training for those specific tasks.\nFurthermore, the PGM can generate prompts with a single forward pass, significantly reducing computational costs compared to previous methods. We evaluate MetaL-Prompt on a range of unseen tasks and find that it improves performance by up to 19.4\\% in terms of mean F1 score on QA datasets compared to the state-of-the-art baseline P-tuning, with limited computational cost.", "keywords": "prompt tuning;prompt engineering;in-context learning;language model", "primary_area": "", "supplementary_material": "", "author": "Hyeonmin Ha;Jihye Lee;Wookje Han;Byung-Gon Chun", "authorids": "~Hyeonmin_Ha1;~Jihye_Lee2;~Wookje_Han1;~Byung-Gon_Chun1", "gender": "M;M;;F", "homepage": ";https://wookjehan.github.io/;;", "dblp": "198/6798.html;319/4219;34/3515;", "google_scholar": "IG6eO5cAAAAJ;NNS8zUsAAAAJ;;", "or_profile": "~Hyeonmin_Ha1;~Wookje_Han1;~Byung-Gon_Chun1;~JIHYE_LEE1", "aff": "Seoul National University;Seoul National University;FriendliAI;Seoul National University", "aff_domain": "snu.ac.kr;snu.ac.kr;friendli.ai;snu.ac.kr", "position": "PhD student;Undergrad student;Chief Executive Officer;MS student", "bibtex": "@inproceedings{\nha2023metalearning,\ntitle={Meta-Learning of Prompt Generation for Lightweight Prompt Engineering on Language-Model-as-a-Service},\nauthor={Hyeonmin Ha and Jihye Lee and Wookje Han and Byung-Gon Chun},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sYYRTVaG3n}\n}", "github": "", "project": "", "reviewers": "fHF6;Y1iN;SZaX", "site": "https://openreview.net/forum?id=sYYRTVaG3n", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;4;3", "reproducibility": "3;4;2", "correctness": "4;4;2", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";wookje-han-2052691a6;;jihye-lee-68843027b/", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Seoul National University;FriendliAI", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;https://www.friendliai.com", "aff_unique_abbr": "SNU;FriendliAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "South Korea;United States" }, { "id": "sZEAMUizsd", "title": "Outlier Suppression+: Accurate quantization of large language models by equivalent and effective shifting and scaling", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Post-training quantization (PTQ) of transformer language models faces significant challenges due to the existence of detrimental outliers in activations. We observe that these outliers are concentrated in specific channels and are asymmetric across channels. To address this issue, we propose the Outlier Suppression+ (OS+) framework, which contains the channel-wise shifting for asymmetry and channel-wise scaling for concentration. We show that these operations can be seamlessly migrated into subsequent modules while maintaining equivalence. Second, we propose a fast and stable scheme to calculate effective shifting and scaling values. The channel-wise shifting aligns the center of each channel for removal of outlier asymmetry. The channel-wise scaling quantitatively evaluates changes brought by migration and quantization for better quantization burden balance. We validate our OS+ under both standard and fine-grained quantization settings with models including BERT, OPT, BLOOM, BLOOMZ, and LLaMA. Comprehensive results across various tasks demonstrate the superiority of our approach. Especially, with standard quantization, OS+ can achieve near-floating-point performance on both small models and large language models on 8-bit and 6-bit. Besides, we establish a new state-of-the-art for 4-bit BERT with 15.5\\% improvement. Our code is available at \\url{https://github.com/ModelTC/Outlier_Suppression_Plus}.", "keywords": "quantization;large language models;outlier", "primary_area": "", "supplementary_material": "", "author": "Xiuying Wei;Yunchen Zhang;Yuhang Li;Xiangguo Zhang;Ruihao Gong;Jinyang Guo;Xianglong Liu", "authorids": "~Xiuying_Wei1;~Yunchen_Zhang2;~Yuhang_Li1;~Xiangguo_Zhang1;~Ruihao_Gong1;~Jinyang_Guo1;~Xianglong_Liu3", "gender": "F;M;M;M;M;M;", "homepage": "https://wimh966.github.io/;;;;https://xhplus.github.io;https://jinyangguo.github.io/;", "dblp": "315/9021;;;;247/1172;;", "google_scholar": ";;3UzXL-AAAAAJ;Rto6qmsAAAAJ;8i7Z15kAAAAJ;uJGeT1AAAAAJ;", "or_profile": "~Xiuying_Wei1;~Yunchen_Zhang2;~Yuhang_Li1;~Xiangguo_Zhang1;~Ruihao_Gong1;~Jinyang_Guo1;~Xianglong_Liu3", "aff": "Beihang University;;Yale University;;SenseTime;Beihang University;", "aff_domain": "buaa.edu.cn;;yale.edu;;sensetime.com;buaa.edu.cn;", "position": "MS student;;PhD student;;Principal Researcher;Assistant Professor;", "bibtex": "@inproceedings{\nwei2023outlier,\ntitle={Outlier Suppression+: Accurate quantization of large language models by equivalent and effective shifting and scaling},\nauthor={Xiuying Wei and Yunchen Zhang and Yuhang Li and Xiangguo Zhang and Ruihao Gong and Jinyang Guo and Xianglong Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sZEAMUizsd}\n}", "github": "", "project": "", "reviewers": "NJj5;rLws;vshF;uJMf;AfNB", "site": "https://openreview.net/forum?id=sZEAMUizsd", "pdf_size": 0, "rating": "5;5;5;5;5", "confidence": "3;4;3;4;4", "excitement": "3;3;4;4;3", "reproducibility": "3;4;3;4;4", "correctness": "2;4;4;4;3", "rating_avg": 5.0, "confidence_avg": 3.6, "excitement_avg": 3.4, "reproducibility_avg": 3.6, "correctness_avg": 3.4, "replies_avg": 16, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-6024-7086;;", "linkedin": "%E7%A7%80%E9%A2%96-%E9%AD%8F-6b1277221/;;;;;;", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "Beihang University;Yale University;SenseTime", "aff_unique_dep": ";;", "aff_unique_url": "http://www.buaa.edu.cn/;https://www.yale.edu;https://www.sensetime.com", "aff_unique_abbr": "BUAA;Yale;SenseTime", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;United States" }, { "id": "sZGAxcUcNU", "title": "Memory-Based Invariance Learning for Out-of-Domain Text Classification", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We investigate the task of out-of-domain (OOD) text classification with the aim of extending a classification model, trained on multiple source domains, to an unseen target domain. Recent studies have shown that learning invariant representations can enhance the performance of OOD generalization. However, the inherent disparity in data distribution across different domains poses challenges for achieving effective invariance learning. This study addresses this issue by employing memory augmentations. Specifically, we augment the original feature space using key-value memory and employ a meta-learning-based approach to enhance the quality of the invariant representations. Experimental results on sentiment analysis and natural language inference tasks show the effectiveness of memory-based method for invariance learning, leading to state-of-the-art performance on six datasets.", "keywords": "Domain generalization; key-value memory; invariance learning; transfer learning", "primary_area": "", "supplementary_material": "", "author": "Chen Jia;Yue Zhang", "authorids": "~Chen_Jia1;~Yue_Zhang7", "gender": ";M", "homepage": ";http://frcchang.github.io", "dblp": ";47/722-4", "google_scholar": ";", "or_profile": "~Chen_Jia1;~Yue_Zhang7", "aff": ";Westlake University", "aff_domain": ";westlake.edu.cn", "position": ";Full Professor", "bibtex": "@inproceedings{\njia2023memorybased,\ntitle={Memory-Based Invariance Learning for Out-of-Domain Text Classification},\nauthor={Chen Jia and Yue Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sZGAxcUcNU}\n}", "github": "", "project": "", "reviewers": "545j;VVu8;goM8", "site": "https://openreview.net/forum?id=sZGAxcUcNU", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;2", "excitement": "4;3;3", "reproducibility": "3;4;3", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-5214-2268", "linkedin": ";", "aff_unique_index": "0", "aff_unique_norm": "Westlake University", "aff_unique_dep": "", "aff_unique_url": "https://www.westlake.edu.cn", "aff_unique_abbr": "WU", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "sbLFUT4DaG", "title": "Increasing Coverage and Precision of Textual Information in Multilingual Knowledge Graphs", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recent work in Natural Language Processing and Computer Vision has been using textual information \u2013 e.g., entity names and descriptions \u2013 available in knowledge graphs to ground neural models to high-quality structured data. However, when it comes to non-English languages, the quantity and quality of textual information are comparatively scarce. To address this issue, we introduce the novel task of automatic Knowledge Graph Completion (KGE) and perform a thorough investigation on bridging the gap in both the quantity and quality of textual information between English and non-English languages. More specifically, we: i) bring to light the problem of increasing multilingual coverage and precision of entity names and descriptions in Wikidata; ii) demonstrate that state-of-the-art methods, namely, Machine Translation (MT), Web Search (WS), and Large Language Models (LLMs), struggle with this task; iii) present M-NTA, a novel unsupervised approach that combines MT, WS, and LLMs to generate high-quality textual information; and, iv) study the impact of increasing multilingual coverage and precision of non-English textual information in Entity Linking, Knowledge Graph Completion, and Question Answering. As part of our effort towards better multilingual knowledge graphs, we also introduce WikiKGE-10, the first human-curated benchmark to evaluate KGE approaches in 10 languages across 7 language families.", "keywords": "knowledge graph;multilingual;entity linking;knowledge graph completion", "primary_area": "", "supplementary_material": "", "author": "Simone Conia;Min Li;Daniel Lee;Umar Farooq Minhas;Ihab Ilyas;Yunyao Li", "authorids": "~Simone_Conia1;~Min_Li17;~Daniel_Lee3;~Umar_Farooq_Minhas1;~Ihab_Ilyas1;~Yunyao_Li2", "gender": "M;F;M;;;M", "homepage": "https://c-simone.github.io;https://sites.google.com/site/liminresearch/;;;;https://cs.uwaterloo.ca/~ilyas/", "dblp": "254/8205;;;;60/2319;i/IhabFIlyas", "google_scholar": "S1tqbTcAAAAJ;rABgwqkAAAAJ;AsxRqm0AAAAJ;NKYtfsYAAAAJ;;https://scholar.google.com.tw/citations?user=YG6mTEIAAAAJ", "or_profile": "~Simone_Conia1;~Min_Li17;~Daniel_Lee3;~Umar_Farooq_Minhas1;~Yunyao_Li2;~Ihab_F_Ilyas1", "aff": "Sapienza University of Rome;Apple;Apple;;Apple;University of Waterloo", "aff_domain": "uniroma1.it;apple.com;apple.com;;apple.com;uwaterloo.ca", "position": "PhD student;Researcher;Intern;;Head of Machine Learning;Full Professor", "bibtex": "@inproceedings{\nconia2023increasing,\ntitle={Increasing Coverage and Precision of Textual Information in Multilingual Knowledge Graphs},\nauthor={Simone Conia and Min Li and Daniel Lee and Umar Farooq Minhas and Ihab Ilyas and Yunyao Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sbLFUT4DaG}\n}", "github": "", "project": "", "reviewers": "N39A;JXeC;sJ2r", "site": "https://openreview.net/forum?id=sbLFUT4DaG", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;4", "excitement": "3;4;4", "reproducibility": "4;4;4", "correctness": "3;4;5", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": "simone-conia;;danieljslee;;;", "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "Sapienza University of Rome;Apple;University of Waterloo", "aff_unique_dep": ";Apple Inc.;", "aff_unique_url": "https://www.uniroma1.it;https://www.apple.com;https://uwaterloo.ca", "aff_unique_abbr": "Sapienza;Apple;UW", "aff_campus_unique_index": "0", "aff_campus_unique": "Rome;", "aff_country_unique_index": "0;1;1;1;2", "aff_country_unique": "Italy;United States;Canada" }, { "id": "sbuO0s1r71", "title": "Evaluating Cross-Domain Text-to-SQL Models and Benchmarks", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Text-to-SQL benchmarks play a crucial role in evaluating the progress made in the field and the ranking of different models. However, accurately matching a model-generated SQL query to a reference SQL query in a benchmark fails for various reasons, such as underspecified natural language queries, inherent assumptions in both model-generated and reference queries, and the non-deterministic nature of SQL output under certain conditions. In this paper, we conduct an extensive study of several prominent cross-domain text-to-SQL benchmarks and re-evaluate some of the top-performing models within these benchmarks, by both manually evaluating the SQL queries and rewriting them in equivalent expressions. Our evaluation reveals that attaining a perfect performance on these benchmarks is unfeasible due to the multiple interpretations that can be derived from the provided samples. Furthermore, we find that the true performance of the models is underestimated and their relative performance changes after a re-evaluation. Most notably, our evaluation reveals a surprising discovery: a recent GPT4-based model surpasses the gold standard reference queries in the Spider benchmark in our human evaluation. This finding highlights the importance of interpreting benchmark evaluations cautiously, while also acknowledging the critical role of additional independent evaluations in driving advancements in the field.", "keywords": "Text-to-SQL;Natural language interfaces to databases;Benchmarks;Evaluation", "primary_area": "", "supplementary_material": "", "author": "Mohammadreza Pourreza;Davood Rafiei", "authorids": "~Mohammadreza_Pourreza1;~Davood_Rafiei2", "gender": "M;M", "homepage": ";https://webdocs.cs.ualberta.ca/~drafiei/", "dblp": "338/7789;r/DRafiei", "google_scholar": "https://scholar.google.ca/citations?user=_rOg88EAAAAJ;https://scholar.google.com.tw/citations?user=lNxSDIwAAAAJ", "or_profile": "~Mohammadreza_Pourreza1;~Davood_Rafiei2", "aff": "University of Alberta;University of Alberta", "aff_domain": "ualberta.ca;ualberta.ca", "position": "MS student;Full Professor", "bibtex": "@inproceedings{\npourreza2023evaluating,\ntitle={Evaluating Cross-Domain Text-to-{SQL} Models and Benchmarks},\nauthor={Mohammadreza Pourreza and Davood Rafiei},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sbuO0s1r71}\n}", "github": "", "project": "", "reviewers": "5LcL;FJpx;ReVM", "site": "https://openreview.net/forum?id=sbuO0s1r71", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;5;4", "excitement": "3;4;3", "reproducibility": "0;3;2", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 1.6666666666666667, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "University of Alberta", "aff_unique_dep": "", "aff_unique_url": "https://www.ualberta.ca", "aff_unique_abbr": "UAlberta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "id": "scAXKWMJR3", "title": "Automated Few-Shot Classification with Instruction-Finetuned Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "A particularly successful class of approaches for few-shot learning combines language models with prompts - hand-crafted task descriptions that complement data samples. However, designing prompts by hand for each task commonly requires domain knowledge and substantial guesswork. We observe, in the context of classification tasks, that instruction finetuned language models are remarkably robust towards some dimensions of a prompt's design. We subsequently propose a simple method to eliminate the need for handcrafted prompts, named AuT-Few.\nThis approach consists of (i) a prompt retrieval module that selects suitable task instructions from the instruction-tuning knowledge base, and (ii) the generation of two distinct, semantically meaningful, class descriptions and a selection mechanism via cross-validation. Over 12 datasets, spanning 8 classification tasks, we show that AuT-Few outperforms current state-of-the-art few-shot learning methods. Moreover, AuT-Few is the best ranking method across datasets on the RAFT few-shot benchmark. Notably, these results are achieved without task-specific handcrafted prompts on unseen tasks.", "keywords": "few-shot classification;prompt automation;large language models", "primary_area": "", "supplementary_material": "", "author": "Rami Aly;Xingjian Shi;Kaixiang Lin;Aston Zhang;Andrew Gordon Wilson", "authorids": "~Rami_Aly1;~Xingjian_Shi1;~Kaixiang_Lin1;~Aston_Zhang2;~Andrew_Gordon_Wilson1", "gender": "M;M;;;Not Specified", "homepage": ";https://sxjscience.github.io/;http://kaixianglin.github.io;;https://cims.nyu.edu/~andrewgw", "dblp": "242/8351.html;145/9987;;;65/10453", "google_scholar": "dbzGY5YAAAAJ;https://scholar.google.com.hk/citations?user=P4G6H7oAAAAJ;egq785sAAAAJ;;https://scholar.google.com.tw/citations?user=twWX2LIAAAAJ", "or_profile": "~Rami_Aly1;~Xingjian_Shi1;~Kaixiang_Lin1;~Aston_Zhang2;~Andrew_Gordon_Wilson1", "aff": "Amazon (AWS);Amazon Web Services;Amazon;;New York University", "aff_domain": "amazon.com;amazon.com;amazon.com;;nyu.edu", "position": "Intern;Applied Scientist;Applied Scientist;;Associate Professor", "bibtex": "@inproceedings{\naly2023automated,\ntitle={Automated Few-Shot Classification with Instruction-Finetuned Language Models},\nauthor={Rami Aly and Xingjian Shi and Kaixiang Lin and Aston Zhang and Andrew Gordon Wilson},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=scAXKWMJR3}\n}", "github": "", "project": "", "reviewers": "YSqb;rdNw;YfFv", "site": "https://openreview.net/forum?id=scAXKWMJR3", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;3;3", "reproducibility": "4;5;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;;", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Amazon;New York University", "aff_unique_dep": "Amazon Web Services;", "aff_unique_url": "https://aws.amazon.com;https://www.nyu.edu", "aff_unique_abbr": "AWS;NYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "sdC55K8cP0", "title": "WikiChat: Stopping the Hallucination of Large Language Model Chatbots by Few-Shot Grounding on Wikipedia", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "This paper presents the first few-shot LLM-based chatbot that almost never hallucinates and has high conversationality and low latency. WikiChat is grounded on the English Wikipedia, the largest curated free-text corpus.\n\nWikiChat generates a response from an LLM, retains only the grounded facts, and combines them with additional information it retrieves\nfrom the corpus to form factual and engaging responses. We distill WikiChat based on GPT-4 into a 7B-parameter LLaMA model with minimal loss of quality, to significantly improve its latency, cost and privacy, and facilitate research and deployment.\n\nUsing a novel hybrid human-and-LLM evaluation methodology, we show that our best system achieves 97.3% factual accuracy in simulated\nconversations. It significantly outperforms all retrieval-based and LLM-based baselines, and by 3.9%, 38.6% and 51.0% on head, tail and recent knowledge compared to GPT-4. Compared to previous state-of-the-art retrieval-based chatbots, WikiChat is also significantly more informative and engaging, just like an LLM.\n\nWikiChat achieves 97.9% factual accuracy in conversations with human users about recent topics, 55.0% better than GPT-4, while receiving significantly higher user ratings and more favorable comments.", "keywords": "large language models;hallucination;knowledge-grounded dialogue", "primary_area": "", "supplementary_material": "", "author": "Sina Semnani;Violet Yao;Heidi Chenyu Zhang;Monica Lam", "authorids": "~Sina_Semnani1;~Violet_Yao1;~Heidi_Chenyu_Zhang1;~Monica_Lam1", "gender": "M;;F;F", "homepage": "https://s-jse.com;;https://cs.stanford.edu/~lam/;", "dblp": "274/1427;348/4538;l/MonicaSLam;", "google_scholar": "ECn_7SYAAAAJ;;4hS0jZ8AAAAJ;", "or_profile": "~Sina_Semnani1;~Violet_Yao1;~Monica_Lam1;~Chenyu_Zhang1", "aff": "Stanford University;Stanford University;Stanford University;Stanford University", "aff_domain": "stanford.edu;stanford.edu;stanford.edu;stanford.edu", "position": "PhD student;MS student;Full Professor;MS student", "bibtex": "@inproceedings{\nsemnani2023wikichat,\ntitle={WikiChat: Stopping the Hallucination of Large Language Model Chatbots by Few-Shot Grounding on Wikipedia},\nauthor={Sina Semnani and Violet Yao and Heidi Chenyu Zhang and Monica Lam},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sdC55K8cP0}\n}", "github": "", "project": "", "reviewers": "zMxx;BJ2C;ApEL", "site": "https://openreview.net/forum?id=sdC55K8cP0", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;3;4", "reproducibility": "2;3;3", "correctness": "2;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 2.6666666666666665, "replies_avg": 12, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-1472-5788;;;", "linkedin": "sina-semnani;violetyao/;lammonica/;chenyu-heidi-zhang-jhu/", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Stanford University", "aff_unique_dep": "", "aff_unique_url": "https://www.stanford.edu", "aff_unique_abbr": "Stanford", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Stanford", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "se0YmUUfPs", "title": "Manipulating the Perceived Personality Traits of Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Psychology research has long explored aspects of human personality like extroversion, agreeableness and emotional stability, three of the personality traits that make up the 'Big Five'. Categorizations like the 'Big Five' are commonly used to assess and diagnose personality types. In this work, we explore whether text generated from large language models exhibits consistency in it's perceived 'Big Five' personality traits. For example, is a language model such as GPT2 likely to respond in a consistent way if asked to go out to a party?\n\nWe also show that when exposed to different types of contexts (such as personality descriptions, or answers to diagnostic questions about personality traits), language models such as BERT and GPT2 consistently identify and mirror personality markers in those contexts. This behavior illustrates an ability to be manipulated in a predictable way (with correlations up to 0.84 between intended and realized changes in personality traits), and frames them as tools for controlling personas in applications such as dialog systems. We contribute two data-sets of personality descriptions of humans subjects.", "keywords": "nlp", "primary_area": "", "supplementary_material": "", "author": "Graham McDougal Caron;Shashank Srivastava", "authorids": "~Graham_McDougal_Caron1;~Shashank_Srivastava1", "gender": "M;M", "homepage": ";https://www.ssriva.com/", "dblp": ";", "google_scholar": ";-vKI5s0AAAAJ", "or_profile": "~Graham_McDougal_Caron1;~Shashank_Srivastava1", "aff": ";University of North Carolina at Chapel Hill", "aff_domain": ";unc.edu", "position": ";Assistant Professor", "bibtex": "@inproceedings{\ncaron2023manipulating,\ntitle={Manipulating the Perceived Personality Traits of Language Models},\nauthor={Graham McDougal Caron and Shashank Srivastava},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=se0YmUUfPs}\n}", "github": "", "project": "", "reviewers": "gkMG;RXZa;9sqR;UeN6", "site": "https://openreview.net/forum?id=se0YmUUfPs", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;3;4", "excitement": "3;4;3;3", "reproducibility": "3;3;3;2", "correctness": "2;5;4;2", "rating_avg": 3.0, "confidence_avg": 3.75, "excitement_avg": 3.25, "reproducibility_avg": 2.75, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": "gcaron00/;", "aff_unique_index": "0", "aff_unique_norm": "University of North Carolina", "aff_unique_dep": "", "aff_unique_url": "https://www.unc.edu", "aff_unique_abbr": "UNC", "aff_campus_unique_index": "0", "aff_campus_unique": "Chapel Hill", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "sfkpJxeDzk", "title": "The Framework Tax: Disparities Between Inference Efficiency in NLP Research and Deployment", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Increased focus on the computational efficiency of systems in natural language processing has motivated the design of efficient model architectures and improvements to underlying hardware accelerators. However, the resulting increases in computational throughput and reductions in floating point operations have not directly translated to improvements in wall-clock inference latency. We demonstrate that these discrepancies can be largely attributed to bottlenecks introduced by deep learning frameworks. We denote this phenomena as the framework tax, and observe that the disparity is growing as hardware speed increases over time. In this work, we examine this phenomena through a series of case studies analyzing the effects of model design decisions, framework paradigms, and hardware platforms on total model latency. Based on our findings, we provide actionable recommendations to researchers and practitioners aimed at narrowing the gap between efficient NLP model research and practice.", "keywords": "efficiency;latency;inference", "primary_area": "", "supplementary_material": "", "author": "Jared Fernandez;Jacob Kahn;Clara Na;Yonatan Bisk;Emma Strubell", "authorids": "~Jared_Fernandez1;~Jacob_Kahn1;~Clara_Na1;~Yonatan_Bisk1;~Emma_Strubell1", "gender": "M;M;;M;Non-Binary", "homepage": "https://jaredfern.com;https://jacobkahn.me/;;http://www.YonatanBisk.com;http://strubell.github.io", "dblp": "205/9049;232/2341;;38/9282;153/2253", "google_scholar": "CQHpFLIAAAAJ;_-pugt8AAAAJ;;bWoGh8UAAAAJ;UCDMtM0AAAAJ", "or_profile": "~Jared_Fernandez1;~Jacob_Kahn1;~Clara_Na1;~Yonatan_Bisk1;~Emma_Strubell1", "aff": "Carnegie Mellon University;Meta AI;;Meta;Allen Institute for Artificial Intelligence", "aff_domain": "cmu.edu;meta.com;;meta.com;allenai.org", "position": "PhD student;Research Engineer;;Visiting Professor;Visiting Researcher", "bibtex": "@inproceedings{\nfernandez2023the,\ntitle={The Framework Tax: Disparities Between Inference Efficiency in {NLP} Research and Deployment},\nauthor={Jared Fernandez and Jacob Kahn and Clara Na and Yonatan Bisk and Emma Strubell},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sfkpJxeDzk}\n}", "github": "", "project": "", "reviewers": "GHwB;fRoK;aBkf", "site": "https://openreview.net/forum?id=sfkpJxeDzk", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "4;4;4", "reproducibility": "4;4;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-2911-2500;;0000-0002-2111-9081;", "linkedin": ";jacobdavidkahn/;;yonatanbisk/;", "aff_unique_index": "0;1;1;2", "aff_unique_norm": "Carnegie Mellon University;Meta;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";Meta AI;", "aff_unique_url": "https://www.cmu.edu;https://meta.com;https://allenai.org", "aff_unique_abbr": "CMU;Meta;AI2", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "siiVduxdRz", "title": "Condensing Multilingual Knowledge with Lightweight Language-Specific Modules", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Incorporating language-specific (LS) modules or Mixture-of-Experts (MoE) are proven methods to boost performance in multilingual model performance, but the scalability of these approaches to hundreds of languages or experts tends to be hard to manage. We present Language-specific Matrix Synthesis (LMS), a novel method that addresses the issue. LMS utilizes parameter-efficient and lightweight modules, reducing the number of parameters while outperforming existing methods, e.g., +1.73 BLEU over Switch Transformer on OPUS-100 multilingual translation. Additionally, we introduce Fuse Distillation (FD) to condense multilingual knowledge from multiple LS modules into a single shared module, improving model inference and storage efficiency. Our approach demonstrates superior scalability and performance compared to state-of-the-art methods.", "keywords": "Multilingual Machine Translation;Lightweight;Language interference;Distillation", "primary_area": "", "supplementary_material": "", "author": "Haoran Xu;Weiting Tan;Shuyue Stella Li;Yunmo Chen;Benjamin Van Durme;Philipp Koehn;Kenton Murray", "authorids": "~Haoran_Xu3;~Weiting_Tan1;~Shuyue_Stella_Li1;~Yunmo_Chen1;~Benjamin_Van_Durme2;~Philipp_Koehn2;~Kenton_Murray1", "gender": "M;M;F;M;;M;", "homepage": "https://www.fe1ixxu.com/;https://steventan0110.github.io/;http://stellalisy.com/;https://omnuy.me;;http://www.cs.jhu.edu/~phi/;http://www.kentonmurray.com", "dblp": ";208/0745;312/6501;252/7831;;84/4538.html;143/9465", "google_scholar": "rhcrGQ0AAAAJ;hD8E4gYAAAAJ;CRfOlOEAAAAJ;V-g2Tx8AAAAJ;;OsIZgIYAAAAJ;", "or_profile": "~Haoran_Xu3;~Weiting_Tan1;~Shuyue_Stella_Li1;~Yunmo_Chen1;~Benjamin_Van_Durme2;~Philipp_Koehn2;~Kenton_Murray1", "aff": "Johns Hopkins University;Johns Hopkins University;Johns Hopkins University;Johns Hopkins University;;Johns Hopkins University;Johns Hopkins University", "aff_domain": "jhu.edu;jhu.edu;jhu.edu;jhu.edu;;jhu.edu;jhu.edu", "position": "PhD student;MS student;Undergrad student;PhD student;;Full Professor;Researcher", "bibtex": "@inproceedings{\nxu2023condensing,\ntitle={Condensing Multilingual Knowledge with Lightweight Language-Specific Modules},\nauthor={Haoran Xu and Weiting Tan and Shuyue Stella Li and Yunmo Chen and Benjamin Van Durme and Philipp Koehn and Kenton Murray},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=siiVduxdRz}\n}", "github": "", "project": "", "reviewers": "LVVn;Khj5;fwLV", "site": "https://openreview.net/forum?id=siiVduxdRz", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;4;3", "excitement": "4;3;4", "reproducibility": "5;3;3", "correctness": "4;3;5", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0003-1565-064X;0000-0002-5628-1003", "linkedin": "haoran-xu-0842b3194/;weiting-steven-tan-30bb4a175/;;yunmochen;;philipp-koehn-bbb8024/;kentonmurray/", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Johns Hopkins University", "aff_unique_dep": "", "aff_unique_url": "https://www.jhu.edu", "aff_unique_abbr": "JHU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "sriK75T3kd", "title": "No offence, Bert - I insult only humans! Multilingual sentence-level attack on toxicity detection networks", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "We introduce a simple yet efficient sentence-level attack on black-box toxicity detector models. By adding several positive words or sentences to the end of a hateful message, we are able to change the prediction of a neural network and pass the toxicity detection system check. This approach is shown to be working on seven languages from three different language families. We also describe the defence mechanism against the aforementioned attack and discuss its limitations.", "keywords": "toxicity detection;adversarial attack;multilingual;neural networks", "primary_area": "", "supplementary_material": "", "author": "Sergey Berezin;Reza Farahbakhsh;Noel Crespi", "authorids": "~Sergey_Berezin1;~Reza_Farahbakhsh1;~Noel_Crespi1", "gender": "M;M;", "homepage": "https://www.linkedin.com/in/s-berezin/;;", "dblp": ";117/9325;", "google_scholar": ";https://scholar.google.com/citations?hl=en;https://scholar.google.fr/citations?user=z_-5wSkAAAAJ", "or_profile": "~Sergey_Berezin1;~Reza_Farahbakhsh1;~Noel_Crespi1", "aff": "Telecom SudParis;Telecom SudParis;Telecom SudParis", "aff_domain": "telecom-sudparis.fr;telecom-sudparis.fr;telecom-sudparis.fr", "position": "PhD student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nberezin2023no,\ntitle={No offence, Bert - I insult only humans! Multilingual sentence-level attack on toxicity detection networks},\nauthor={Sergey Berezin and Reza Farahbakhsh and Noel Crespi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sriK75T3kd}\n}", "github": "", "project": "", "reviewers": "KyeZ;fLb3;RWV8", "site": "https://openreview.net/forum?id=sriK75T3kd", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;4;4", "excitement": "2;3;3", "reproducibility": "4;3;4", "correctness": "3;4;2", "rating_avg": 2.0, "confidence_avg": 4.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-3219-3700;", "linkedin": "s-berezin/;reza-farahbakhsh/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Telecom SudParis", "aff_unique_dep": "", "aff_unique_url": "https://www.telecom-sudparis.eu", "aff_unique_abbr": "TSP", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "id": "st5RaWdLTn", "title": "AdaTranS: Adapting with Boundary-based Shrinking for End-to-End Speech Translation", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "To alleviate the data scarcity problem in End-to-end speech translation (ST), pre-training on data for speech recognition and machine translation is considered as an important technique. However, the modality gap between speech and text prevents the ST model from efficiently inheriting knowledge from the pre-trained models. In this work, we propose AdaTranS for end-to-end ST. It adapts the speech features with a new shrinking mechanism to mitigate the length mismatch between speech and text features by predicting word boundaries. Experiments on the MUST-C dataset demonstrate that AdaTranS achieves better performance than the other shrinking-based methods, with higher inference speed and lower memory usage. Further experiments also show that AdaTranS can be equipped with additional alignment losses to further improve performance.", "keywords": "speech translation;modality adaptation", "primary_area": "", "supplementary_material": "", "author": "Xingshan Zeng;Liangyou Li;Qun Liu", "authorids": "~Xingshan_Zeng1;~Liangyou_Li1;~Qun_Liu1", "gender": "M;M;M", "homepage": ";;http://liuquncn.github.io/", "dblp": "220/2024;78/7942;75/4402-1", "google_scholar": "Ca08I6AAAAAJ;PPDE-uIAAAAJ;2HhiGzcAAAAJ", "or_profile": "~Xingshan_Zeng1;~Liangyou_Li1;~Qun_Liu1", "aff": "Huawei Technologies Ltd.;Huawei Noah's Ark Lab;Huawei Noah's Ark Lab", "aff_domain": "huawei.com;huawei.com;huawei.com", "position": "Researcher;Researcher;Chief Scientist of Speech and Language Computing", "bibtex": "@inproceedings{\nzeng2023adatrans,\ntitle={AdaTranS: Adapting with Boundary-based Shrinking for End-to-End Speech Translation},\nauthor={Xingshan Zeng and Liangyou Li and Qun Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=st5RaWdLTn}\n}", "github": "", "project": "", "reviewers": "kuHR;FEtD;g78k;T3ym", "site": "https://openreview.net/forum?id=st5RaWdLTn", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;4;4;4", "excitement": "3;4;3;3", "reproducibility": "4;4;3;4", "correctness": "2;4;4;3", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 3.25, "reproducibility_avg": 3.75, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-0279-003X;0000-0002-7000-1792", "linkedin": ";;qunliu/", "aff_unique_index": "0;0;0", "aff_unique_norm": "Huawei", "aff_unique_dep": "Huawei Technologies", "aff_unique_url": "https://www.huawei.com", "aff_unique_abbr": "Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "sthusQGkef", "title": "Chain of Thought with Explicit Evidence Reasoning for Few-shot Relation Extraction", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Few-shot relation extraction involves identifying the type of relationship between two specific entities within a text, using a limited number of annotated samples. A variety of solutions to this problem have emerged by applying meta-learning and neural graph techniques which typically necessitate a training process for adaptation. Recently, the strategy of in-context learning has been demonstrating notable results without the need of training. Few studies have already utilized in-context learning for zero-shot information extraction. Unfortunately, the evidence for inference is either not considered or implicitly modeled during the construction of chain-of-thought prompts. In this paper, we propose a novel approach for few-shot relation extraction using large language models, named CoT-ER, chain-of-thought with explicit evidence reasoning. In particular, CoT-ER first induces large language models to generate evidences using task-specific and concept-level knowledge. Then these evidences are explicitly incorporated into chain-of-thought prompting for relation extraction. Experimental results demonstrate that our CoT-ER approach (with 0% training data) achieves competitive performance compared to the fully-supervised (with 100% training data) state-of-the-art approach on the FewRel1.0 and FewRel2.0 datasets.", "keywords": "Relation Extraction;In-context Learning;Few Shot Learning", "primary_area": "", "supplementary_material": "", "author": "Xilai Ma;Jing Li;Min Zhang", "authorids": "~Xilai_Ma1;~Jing_Li19;~Min_Zhang9", "gender": "M;M;M", "homepage": ";https://www.li-jing.com;https://zhangmin-nlp-ai.github.io/", "dblp": ";l/JingLi34;83/5342-5", "google_scholar": ";https://scholar.google.com.sg/citations?user=2QxEwWsAAAAJ;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Xilai_Ma1;~Jing_Li19;~Min_Zhang9", "aff": "Harbin Institute of Technology, Shenzhen;Inception Institute of Artificial Intelligence, United Arab Emirates;Harbin Institute of Technology, Shenzhen", "aff_domain": "hit.edu.cn;inceptioniai.org;hit.edu.cn", "position": "Undergrad student;Researcher;Full Professor", "bibtex": "@inproceedings{\nma2023chain,\ntitle={Chain of Thought with Explicit Evidence Reasoning for Few-shot Relation Extraction},\nauthor={Xilai Ma and Jing Li and Min Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sthusQGkef}\n}", "github": "", "project": "", "reviewers": "6ESd;josS;cCve", "site": "https://openreview.net/forum?id=sthusQGkef", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;3;3", "reproducibility": "4;4;3", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "https://www.linkedin.cn/incareer/in/ACoAAEQ6a8YBCFB2evJFFYYyHo7GcAVK8Jfwz6Q;;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Harbin Institute of Technology;Inception Institute of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "http://en.hhit.edu.cn/;", "aff_unique_abbr": "HIT;", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Shenzhen;", "aff_country_unique_index": "0;1;0", "aff_country_unique": "China;United Arab Emirates" }, { "id": "svSNikfCs1", "title": "Exploiting Asymmetry for Synthetic Training Data Generation: SynthIE and the Case of Information Extraction", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) have great potential for synthetic data generation. This work shows that useful data can be synthetically generated even for tasks that cannot be solved directly by LLMs: for problems with structured outputs, it is possible to prompt an LLM to perform the task in the reverse direction, by generating plausible input text for a target output structure. Leveraging this asymmetry in task difficulty makes it possible to produce large-scale, high-quality data for complex tasks. We demonstrate the effectiveness of this approach on closed information extraction, where collecting ground-truth data is challenging, and no satisfactory dataset exists to date. We synthetically generate a dataset of 1.8M data points, establish its superior quality compared to existing datasets in a human evaluation, and use it to finetune small models (220M and 770M parameters), termed SynthIE, that outperform the prior state of the art (with equal model size) by a substantial margin of 57 absolute points in micro-F1 and 79 points in macro-F1. Code, data, and models are available at anonymous.", "keywords": "LLM;Synthetic Data Generation;Information Extraction;Closed Information Extraction;Parsing;Structured Output", "primary_area": "", "supplementary_material": "", "author": "Martin Josifoski;Marija Sakota;Maxime Peyrard;Robert West", "authorids": "~Martin_Josifoski1;~Marija_Sakota1;~Maxime_Peyrard2;~Robert_West1", "gender": "M;F;M;M", "homepage": ";;https://peyrardm.github.io;https://dlab.epfl.ch/people/west/", "dblp": "234/6886.html;255/5704;184/3721;20/7441-1", "google_scholar": "XpzKdlkAAAAJ;vMyxVeMAAAAJ;RFMdKLMAAAAJ;ZiFn598AAAAJ", "or_profile": "~Martin_Josifoski1;~Marija_Sakota1;~Maxime_Peyrard2;~Robert_West1", "aff": "Swiss Federal Institute of Technology Lausanne;EPFL - EPF Lausanne;Swiss Federal Institute of Technology Lausanne;EPFL - EPF Lausanne", "aff_domain": "epfl.ch;epfl.ch;epfl.ch;epfl.ch", "position": "PhD student;PhD student;Postdoc;Assistant Professor", "bibtex": "@inproceedings{\njosifoski2023exploiting,\ntitle={Exploiting Asymmetry for Synthetic Training Data Generation: Synth{IE} and the Case of Information Extraction},\nauthor={Martin Josifoski and Marija Sakota and Maxime Peyrard and Robert West},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=svSNikfCs1}\n}", "github": "", "project": "", "reviewers": "kAnw;CoFT;qw9q", "site": "https://openreview.net/forum?id=svSNikfCs1", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "4;3;3", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 9, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-5192-5842;;", "linkedin": "martin-josifoski-56b395104/;;;", "aff_unique_index": "0;1;0;1", "aff_unique_norm": "Swiss Federal Institute of Technology Lausanne;EPFL", "aff_unique_dep": ";", "aff_unique_url": "https://www.epfl.ch;https://www.epfl.ch", "aff_unique_abbr": "EPFL;EPFL", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Lausanne", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Switzerland" }, { "id": "svUOik2Xu1", "title": "Robust Prompt Optimization for Large Language Models Against Distribution Shifts", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large Language Model (LLM) has demonstrated significant ability in various Natural Language Processing tasks. However, their effectiveness is highly dependent on the phrasing of the task prompt, leading to research on automatic prompt optimization using labeled task data. We reveal that these prompt optimization techniques are vulnerable to distribution shifts such as subpopulation shifts, which are common for LLMs in real-world scenarios such as customer reviews analysis. In this light, we propose a new problem of robust prompt optimization for LLMs against distribution shifts, which requires the prompt optimized over the labeled source group can simultaneously generalize to an unlabeled target group. To solve this problem, we propose Generalized Prompt Optimization framework , which incorporates the unlabeled data from the target group into prompt optimization. Extensive experimental results demonstrate the effectiveness of the proposed framework with significant performance improvement on the target group and comparable performance on the source group.", "keywords": "Large Language Model; prompt optimization; distribution shifts; sentiment analysis; question answering;", "primary_area": "", "supplementary_material": "", "author": "Moxin Li;Wenjie Wang;Fuli Feng;Yixin Cao;Jizhi Zhang;Tat-Seng Chua", "authorids": "~Moxin_Li2;~Wenjie_Wang1;~Fuli_Feng1;~Yixin_Cao2;~Jizhi_Zhang1;~Tat-Seng_Chua2", "gender": "M;M;M;M;F;M", "homepage": "https://wenjiewwj.github.io/;https://fulifeng.github.io/;https://sites.google.com/view/yixin-homepage;https://github.com/jizhi-zhang;https://li-moxin.github.io/HelloFromMoxin/;http://www.comp.nus.edu.sg/~chuats/", "dblp": "38/1956-7;183/9198;20/8038-2;122/4098;266/2836;", "google_scholar": "Ma5DtmoAAAAJ;https://scholar.google.com.sg/citations?user=QePM4u8AAAAJ;https://scholar.google.co.uk/citations?user=CnhTvdoAAAAJ;rfnARlcAAAAJ;5Yp7L3kAAAAJ;https://scholar.google.com.tw/citations?user=Z9DWCBEAAAAJ", "or_profile": "~Wenjie_Wang1;~Fuli_Feng1;~Yixin_Cao2;~Jizhi_Zhang1;~Li_Moxin1;~Tat-seng_Chua1", "aff": "National University of Singapore;University of Science and Technology of China;Singapore Management University;University of Science and Technology of China;National University of Singapore;National University of Singapore", "aff_domain": "nus.edu.sg;ustc.edu.cn;smu.edu.sg;ustc.edu.cn;nus.edu.sg;nus.edu.sg", "position": "PhD student;Full Professor;Assistant Professor;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nli2023robust,\ntitle={Robust Prompt Optimization for Large Language Models Against Distribution Shifts},\nauthor={Moxin Li and Wenjie Wang and Fuli Feng and Yixin Cao and Jizhi Zhang and Tat-Seng Chua},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=svUOik2Xu1}\n}", "github": "", "project": "", "reviewers": "Uxx8;P2wd;XdbA", "site": "https://openreview.net/forum?id=svUOik2Xu1", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;3", "excitement": "3;4;3", "reproducibility": "4;4;3", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-5199-1428;0000-0002-5828-9842;;0000-0002-0251-465X;;0000-0001-6097-7807", "linkedin": ";;;;;", "aff_unique_index": "0;1;2;1;0;0", "aff_unique_norm": "National University of Singapore;University of Science and Technology of China;Singapore Management University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.nus.edu.sg;http://www.ustc.edu.cn;https://www.smu.edu.sg", "aff_unique_abbr": "NUS;USTC;SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;0;0", "aff_country_unique": "Singapore;China" }, { "id": "sxJU7X2ZG0", "title": "Generative Calibration for In-context Learning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "As one of the most exciting features of large language models (LLMs), in-context learning is a mixed blessing. While it allows users to fast-prototype a task solver with only a few training examples, the performance is generally sensitive to various configurations of the prompt such as the choice or order of the training examples. In this paper, we for the first time theoretically and empirically identify that such a paradox is mainly due to the label shift of the in-context model to the data distribution, in which LLMs shift the label marginal $p(y)$ while having a good label conditional $p(x|y)$. With this understanding, we can simply calibrate the in-context predictive distribution by adjusting the label marginal, which is estimated via Monte-Carlo sampling over the in-context model, i.e., generation of LLMs. We call our approach as generative calibration. We conduct exhaustive experiments with 12 text classification tasks and 12 LLMs scaling from 774M to 33B, generally find that the proposed method greatly and consistently outperforms the ICL as well as state-of-the-art calibration methods, by up to 27% absolute in macro-F1. Meanwhile, the proposed method is also stable under different prompt configurations.", "keywords": "Calibration;In-context Learning", "primary_area": "", "supplementary_material": "", "author": "Zhongtao Jiang;Yuanzhe Zhang;Cao Liu;Jun Zhao;Kang Liu", "authorids": "~Zhongtao_Jiang1;~Yuanzhe_Zhang1;~Cao_Liu1;~Jun_Zhao4;~Kang_Liu1", "gender": "M;M;M;M;M", "homepage": ";https://yuanzhe-zhang.github.io/;;http://nlpr-web.ia.ac.cn/cip/english/~junzhao/index.html;http://www.nlpr.ia.ac.cn/cip/~liukang/index.html", "dblp": "268/1025;141/4448;26/6730;https://dblp.uni-trier.de/pid/47/2026-1.html;42/4903.html", "google_scholar": "z1BoOUkAAAAJ;H4GYRx8AAAAJ;;https://scholar.google.com.hk/citations?user=HljRttwAAAAJ;DtZCfl0AAAAJ", "or_profile": "~Zhongtao_Jiang1;~Yuanzhe_Zhang1;~Cao_Liu1;~Jun_Zhao4;~Kang_Liu1", "aff": "University of Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;;Institute of automation, Chinese academy of science;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ucas.ac.cn;ia.ac.cn;;nlpr.ia.ac.cn;ia.ac.cn", "position": "PhD student;Associate Professor;;Full Professor;Professor", "bibtex": "@inproceedings{\njiang2023generative,\ntitle={Generative Calibration for In-context Learning},\nauthor={Zhongtao Jiang and Yuanzhe Zhang and Cao Liu and Jun Zhao and Kang Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=sxJU7X2ZG0}\n}", "github": "", "project": "", "reviewers": "eRKA;Vz7F;HMv7", "site": "https://openreview.net/forum?id=sxJU7X2ZG0", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "4;4;4", "reproducibility": "4;2;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";;;;", "aff_unique_index": "0;1;1;1", "aff_unique_norm": "University of Chinese Academy of Sciences;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Automation", "aff_unique_url": "http://www.ucas.ac.cn;http://www.ia.cas.cn", "aff_unique_abbr": "UCAS;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "syj9VaxutQ", "title": "A Framework for Exploring Player Perceptions of LLM-Generated Dialogue in Commercial Video Games", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The growing capabilities of large language models (LLMs) have inspired recent efforts to integrate LLM-generated dialogue into video games. However, evaluation remains a major challenge: how do we assess the player experience in a commercial game augmented with LLM-generated dialogue? To explore this question, we introduce a dynamic evaluation framework for the dialogue management systems that govern the task-oriented dialogue often found in roleplaying video games. We first extract dialogue from the widely-acclaimed role-playing game *Disco Elysium: The Final Cut*, which contains 1.1M words of dialogue spread across a complex graph of utterances where node reachability depends on game state (e.g., whether a certain item is held). Using this dataset, we have GPT-4 perform *dialogue infilling* to generate grounded utterances based on game state represented via code. In a statistically robust study of 28 players recruited from the r/DiscoyElysium subreddit, the LLM outputs are evaluated against the game designers' writing via both preference judgments and free-form feedback using a web interface that recreates the game's core conversation functionality. Overall, the game designers' prose is significantly preferred to GPT-4 generations, with participants citing reasons such as improved logical flow and grounding with the game state. To spur more principled future research in this area, we release our web interface and tools to enable researchers to build upon our work. https://pl.aiwright.dev", "keywords": "human-centered;dialogue generation;video games;interactive storytelling", "primary_area": "", "supplementary_material": "", "author": "Nader Akoury;Qian Yang;Mohit Iyyer", "authorids": "~Nader_Akoury1;~Qian_Yang2;~Mohit_Iyyer1", "gender": ";;M", "homepage": "https://cs.umass.edu/~nsa;;http://cs.umass.edu/~miyyer", "dblp": "211/6802;;148/9178", "google_scholar": "8Zz8fR0AAAAJ;yaSMILkAAAAJ;rBVA5tcAAAAJ", "or_profile": "~Nader_Akoury1;~Qian_Yang2;~Mohit_Iyyer1", "aff": "Department of Computer Science, University of Massachusetts, Amherst;Cornell University;University of Massachusetts Amherst", "aff_domain": "cs.umass.edu;cornell.edu;cs.umass.edu", "position": "PhD student;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nakoury2023a,\ntitle={A Framework for Exploring Player Perceptions of {LLM}-Generated Dialogue in Commercial Video Games},\nauthor={Nader Akoury and Qian Yang and Mohit Iyyer},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=syj9VaxutQ}\n}", "github": "", "project": "", "reviewers": "NDsa;aXdf;h39F", "site": "https://openreview.net/forum?id=syj9VaxutQ", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "3;4;4", "reproducibility": "1;3;2", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 2.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Massachusetts Amherst;Cornell University", "aff_unique_dep": "Department of Computer Science;", "aff_unique_url": "https://www.umass.edu;https://www.cornell.edu", "aff_unique_abbr": "UMass Amherst;Cornell", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Amherst;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "t035Emm4Vt", "title": "WSDMS: Debunk Fake News via Weakly Supervised Detection of Misinforming Sentences with Contextualized Social Wisdom", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Fake news debunking primarily focuses on determining the truthfulness of news articles, which oversimplifies the issue as fake news often combines elements of both truth and falsehood. Thus, it becomes crucial to identify specific instances of misinformation within the articles. In this research, we investigate a novel task in the field of fake news debunking, which involves detecting sentence-level misinformation. One of the major challenges in this task is the absence of a training dataset with sentence-level annotations regarding veracity. Inspired by the Multiple Instance Learning (MIL) approach, we propose a model called Weakly Supervised Detection of Misinforming Sentences (WSDMS). This model only requires bag-level labels for training but is capable of inferring both sentence-level misinformation and article-level veracity, aided by relevant social media conversations that are attentively contextualized with news sentences. We evaluate WSDMS on three real-world benchmarks and demonstrate that it outperforms existing state-of-the-art baselines in debunking fake news at both the sentence and article levels.", "keywords": "Fake News Detection;Weakly Supervised Learning;Misinformation;Social Network", "primary_area": "", "supplementary_material": "", "author": "Ruichao Yang;Wei Gao;Jing Ma;Hongzhan Lin;Zhiwei Yang", "authorids": "~Ruichao_Yang1;~Wei_Gao1;~Jing_Ma4;~Hongzhan_Lin1;~Zhiwei_Yang1", "gender": "F;M;F;M;M", "homepage": ";https://sites.google.com/view/gaowei/;https://majingcuhk.github.io/;https://daniellin97.github.io;https://sites.google.com/view/yangzhiwei", "dblp": ";28/2073-1;96/6129-4.html;292/1751-1;78/8054-5", "google_scholar": ";8uaZwkwAAAAJ;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com.hk/citations?user=hOF1SLoAAAAJ;SmOi-WYAAAAJ", "or_profile": "~Ruichao_Yang1;~Wei_Gao1;~Jing_Ma4;~Hongzhan_Lin1;~Zhiwei_Yang1", "aff": "Hong Kong Baptist University;Singapore Management University;Hong Kong Baptist University;Hong Kong Baptist University;Jilin University\uff1bHong Kong Baptist University", "aff_domain": "hkbu.edu.hk;smu.edu.sg;hkbu.edu.hk;hkbu.edu.hk;jlu.edu.cn", "position": "PhD student;Assistant Professor;Assistant Professor;PhD student;PhD student", "bibtex": "@inproceedings{\nyang2023wsdms,\ntitle={{WSDMS}: Debunk Fake News via Weakly Supervised Detection of Misinforming Sentences with Contextualized Social Wisdom},\nauthor={Ruichao Yang and Wei Gao and Jing Ma and Hongzhan Lin and Zhiwei Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=t035Emm4Vt}\n}", "github": "", "project": "", "reviewers": "np1j;HG47;BSaA", "site": "https://openreview.net/forum?id=t035Emm4Vt", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;5", "excitement": "4;4;4", "reproducibility": "3;4;3", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-3749-3622;;;0000-0002-4111-8334;0000-0002-0534-158X", "linkedin": ";;;;", "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Hong Kong Baptist University;Singapore Management University;Jilin University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.hkbu.edu.hk;https://www.smu.edu.sg;http://www.jlu.edu.cn", "aff_unique_abbr": "HKBU;SMU;JLU", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;1;0;0;0", "aff_country_unique": "China;Singapore" }, { "id": "t42YUsyv3d", "title": "DRAFT: Dense Retrieval Augmented Few-shot Topic classifier Framework", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "With the growing volume of diverse information, the demand for classifying arbitrary topics has become increasingly critical. To address this challenge, we introduce DRAFT, a simple framework designed to train a classifier for few-shot topic classification. DRAFT uses a few examples of a specific topic as queries to construct Customized dataset with a dense retriever model. Multi-query retrieval (MQR) algorithm, which effectively handles multiple queries related to a specific topic, is applied to construct the Customized dataset. Subsequently, we fine-tune a classifier using the Customized dataset to identify the topic. To demonstrate the efficacy of our proposed approach, we conduct evaluations on both widely used classification benchmark datasets and manually constructed datasets with 291 diverse topics, which simulate diverse contents encountered in real-world applications. DRAFT shows competitive or superior performance compared to baselines that use in-context learning, such as GPT-3 175B and InstructGPT 175B, on few-shot topic classification tasks despite having 177 times fewer parameters, demonstrating its effectiveness.", "keywords": "few-shot topic classification;real-world application", "primary_area": "", "supplementary_material": "", "author": "Keonwoo Kim;Younggun Lee", "authorids": "~Keonwoo_Kim2;~Younggun_Lee1", "gender": "M;M", "homepage": ";", "dblp": "58/2926;", "google_scholar": "IRStRngAAAAJ;https://scholar.google.co.kr/citations?user=HlMBJpMAAAAJ", "or_profile": "~Keonwoo_Kim2;~Younggun_Lee1", "aff": "Seoul National University;Neosapience, Inc.", "aff_domain": "snu.ac.kr;neosapience.com", "position": "MS student;Research Scientist", "bibtex": "@inproceedings{\nkim2023draft,\ntitle={{DRAFT}: Dense Retrieval Augmented Few-shot Topic classifier Framework},\nauthor={Keonwoo Kim and Younggun Lee},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=t42YUsyv3d}\n}", "github": "", "project": "", "reviewers": "r9Me;dsxP;1bbv", "site": "https://openreview.net/forum?id=t42YUsyv3d", "pdf_size": 0, "rating": "1;1;1", "confidence": "4;3;4", "excitement": "2;2;3", "reproducibility": "3;5;4", "correctness": "3;3;3", "rating_avg": 1.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": "keonwookim97;", "aff_unique_index": "0;1", "aff_unique_norm": "Seoul National University;Neosapience, Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;", "aff_unique_abbr": "SNU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "South Korea;United States" }, { "id": "t6p5LtTlqr", "title": "Enhancing Neural Machine Translation with Semantic Units", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Conventional neural machine translation (NMT) models typically use subwords and words as the basic units for model input and comprehension. However, complete words and phrases composed of several tokens are often the fundamental units for expressing semantics, referred to as semantic units. To address this issue, we propose a method Semantic Units for Machine Translation (SU4MT) which models the integral meanings of semantic units within a sentence, and then leverages them to provide a new perspective for understanding the sentence. Specifically, we first propose Word Pair Encoding (WPE), a phrase extraction method to help identify the boundaries of semantic units. Next, we design an Attentive Semantic Fusion (ASF) layer to integrate the semantics of multiple subwords into a single vector: the semantic unit representation. Lastly, the semantic-unit-level sentence representation is concatenated to the token-level one, and they are combined as the input of encoder. Experimental results demonstrate that our method effectively models and leverages semantic-unit-level information and outperforms the strong baselines.", "keywords": "machine translation", "primary_area": "", "supplementary_material": "", "author": "Langlin Huang;Shuhao Gu;Zhang Zhuocheng;Yang Feng", "authorids": "~Langlin_Huang1;~Shuhao_Gu1;~Zhang_Zhuocheng1;~Yang_Feng4", "gender": "Not Specified;M;M;", "homepage": "https://shrango.github.io/;;https://github.com/salvation-z;http://people.ucas.edu.cn/~yangfeng?language=en", "dblp": "349/8478;239/5079;;07/6095-4.html", "google_scholar": "Mt9xdjYAAAAJ;PED7pDIAAAAJ;;https://scholar.google.com/citations?hl=en", "or_profile": "~Langlin_Huang1;~Shuhao_Gu1;~Zhang_Zhuocheng1;~Yang_Feng4", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn", "position": "MS student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nhuang2023enhancing,\ntitle={Enhancing Neural Machine Translation with Semantic Units},\nauthor={Langlin Huang and Shuhao Gu and Zhang Zhuocheng and Yang Feng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=t6p5LtTlqr}\n}", "github": "", "project": "", "reviewers": "mJLU;8dZL;V8Rx;PSur", "site": "https://openreview.net/forum?id=t6p5LtTlqr", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;5;4;1", "excitement": "3;3;3;2", "reproducibility": "4;4;2;3", "correctness": "4;4;3;3", "rating_avg": 4.0, "confidence_avg": 3.5, "excitement_avg": 2.75, "reproducibility_avg": 3.25, "correctness_avg": 3.5, "replies_avg": 13, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-9631-0334;;;", "linkedin": ";;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences", "aff_unique_dep": "Institute of Computing Technology", "aff_unique_url": "http://www.ict.ac.cn", "aff_unique_abbr": "CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "tBtc4Ousge", "title": "Intervention-Based Alignment of Code Search with Execution Feedback", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "One of the fundamental goals in code search is to retrieve a functionally correct code for a given natural language query. \nAs annotating for correctness requires executing test cases (i.e. obtaining execution feedback), existing code search training datasets approximate text-code co-occurrences as positive execution feedback. However, this approximation may misalign models\u2019 retrieval decisions from ground-truth correctness. \nTo address such limitation, we propose Code Intervention-based Reinforcement Learning (CIRL) that perturbs training code to result in misalignment (i.e. code intervention), then tests models\u2019 decisions and corrects them with the execution feedback by reinforcement learning. The first technical contribution of CIRL is to induce the execution feedback from perturbation, without actual execution. Secondly, CIRL introduces structural perturbations using abstract syntax trees, going beyond simple lexical changes. Experimental results on various datasets demonstrate the effectiveness of CIRL compared to conventional approaches.", "keywords": "Code Search;Misalignment;Reinforcement Learning;Intervention", "primary_area": "", "supplementary_material": "", "author": "Hojae Han;Minsoo Kim;seung-won hwang;Nan Duan;Shuai Lu", "authorids": "~Hojae_Han1;~Minsoo_Kim1;~seung-won_hwang2;~Nan_Duan1;~Shuai_Lu1", "gender": "M;M;;M;M", "homepage": ";;http://seungwonh.github.io;https://nanduan.github.io/;", "dblp": "254/8084;;h/SeungwonHwang;;", "google_scholar": "Jard20IAAAAJ;FCjc3TsAAAAJ;63bBmc3mYrAC;Qaa6OxIAAAAJ;GAokfukAAAAJ", "or_profile": "~Hojae_Han1;~Minsoo_Kim1;~seung-won_hwang2;~Nan_Duan1;~Shuai_Lu1", "aff": "LG AI Research;Seoul National University;Seoul National University;Microsoft Research Asia;Microsoft", "aff_domain": "lgresearch.ai;snu.ac.kr;snu.ac.kr;microsoft.com;microsoft.com", "position": "Intern;PhD student;Full Professor;Principal Researcher;Research SDE", "bibtex": "@inproceedings{\nhan2023interventionbased,\ntitle={Intervention-Based Alignment of Code Search with Execution Feedback},\nauthor={Hojae Han and Minsoo Kim and seung-won hwang and Nan Duan and Shuai Lu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=tBtc4Ousge}\n}", "github": "", "project": "", "reviewers": "4Eqj;Xw4P;fqev", "site": "https://openreview.net/forum?id=tBtc4Ousge", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;2", "excitement": "3;3;3", "reproducibility": "5;4;5", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 4.666666666666667, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "hojae-han/;;;;", "aff_unique_index": "0;1;1;2;2", "aff_unique_norm": "LG;Seoul National University;Microsoft", "aff_unique_dep": "LG AI Research;;Research", "aff_unique_url": "https://www.lgaires.com;https://www.snu.ac.kr;https://www.microsoft.com/en-us/research/group/asia", "aff_unique_abbr": "LG AI;SNU;MSR Asia", "aff_campus_unique_index": "1", "aff_campus_unique": ";Asia", "aff_country_unique_index": "0;0;0;1;2", "aff_country_unique": "South Korea;China;United States" }, { "id": "tCEtFcrq8n", "title": "Generalizing Few-Shot Named Entity Recognizers to Unseen Domains with Type-Related Features", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Few-shot named entity recognition (NER) has shown remarkable progress in identifying entities in low-resource domains. However, few-shot NER methods still struggle with out-of-domain (OOD) examples due to their reliance on manual labeling for the target domain. To address this limitation, recent studies enable generalization to an unseen target domain with only a few labeled examples using data augmentation techniques. Two important challenges remain: First, augmentation is limited to the training data, resulting in minimal overlap between the generated data and OOD examples. Second, knowledge transfer is implicit and insufficient, severely hindering model generalizability and the integration of knowledge from the source domain. In this paper, we propose a framework, prompt learning with type-related features (PLTR), to address these challenges. To identify useful knowledge in the source domain and enhance knowledge transfer, PLTR automatically extracts entity type-related features (TRFs) based on mutual information criteria. To bridge the gap between training and OOD data, PLTR generates a unique prompt for each unseen example by selecting relevant TRFs. We show that PLTR achieves significant performance improvements on in-domain and cross-domain datasets. The use of PLTR facilitates model adaptation and increases representation similarities between the source and unseen domains.", "keywords": "Named entity recognition;Few-shot learning;Domain generalization;Prompt learning", "primary_area": "", "supplementary_material": "", "author": "Zihan Wang;Ziqi Zhao;Zhumin Chen;Pengjie Ren;Maarten de Rijke;Zhaochun Ren", "authorids": "~Zihan_Wang13;~Ziqi_Zhao2;~Zhumin_Chen1;~Pengjie_Ren1;~Maarten_de_Rijke1;~Zhaochun_Ren1", "gender": "M;M;;;;M", "homepage": "https://wzh-nlp.github.io/;https://github.com/ZiqiZhao1;https://ir.sdu.edu.cn/~zhuminchen/~zhuminchen_en.htm;;https://staff.fnwi.uva.nl/m.derijke/;https://renzhaochun.github.io/", "dblp": "152/5077-2;;88/1081;;r/MdRijke;58/10440", "google_scholar": "npvYA9MAAAAJ;;;;https://scholar.google.com/citations?hl=en;fPcIPt0AAAAJ", "or_profile": "~Zihan_Wang13;~Ziqi_Zhao2;~Zhumin_Chen1;~Pengjie_Ren1;~Maarten_de_Rijke1;~Zhaochun_Ren1", "aff": "University of Amsterdam;Tongji University;Shandong University;;University of Amsterdam;Shandong University", "aff_domain": "uva.nl;tongji.edu.cn;sdu.edu.cn;;uva.nl;sdu.edu.cn", "position": "PhD student;Undergrad student;Full Professor;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nwang2023generalizing,\ntitle={Generalizing Few-Shot Named Entity Recognizers to Unseen Domains with Type-Related Features},\nauthor={Zihan Wang and Ziqi Zhao and Zhumin Chen and Pengjie Ren and Maarten de Rijke and Zhaochun Ren},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=tCEtFcrq8n}\n}", "github": "", "project": "", "reviewers": "vj1P;2X7Q;NpEC", "site": "https://openreview.net/forum?id=tCEtFcrq8n", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;4;4", "excitement": "2;4;4", "reproducibility": "3;4;4", "correctness": "3;3;3", "rating_avg": 2.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-0493-2668;0009-0008-3011-5745;0000-0003-4592-4074;;0000-0002-1086-0202;0000-0002-9076-6565", "linkedin": ";;;;;zhaochun-ren-460491296/?locale=nl_NL", "aff_unique_index": "0;1;2;0;2", "aff_unique_norm": "University of Amsterdam;Tongji University;Shandong University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.uva.nl;https://www.tongji.edu.cn;http://www.sdu.edu.cn", "aff_unique_abbr": "UvA;Tongji;SDU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "Netherlands;China" }, { "id": "tCGyM6CpRI", "title": "Optimizing Retrieval-augmented Reader Models via Token Elimination", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Fusion-in-Decoder (FiD) is an effective retrieval-augmented language model applied across a variety of open-domain tasks, such as\nquestion answering, fact checking, etc. In FiD, supporting passages are first retrieved and then processed using a generative model (Reader),\nwhich can cause a significant bottleneck in decoding time, particularly with long outputs. In this work, we analyze the contribution and necessity of all the retrieved passages to the performance of reader models, and propose eliminating some of the retrieved information, at the\ntoken level, that might not contribute essential information to the answer generation process. We demonstrate that our method can reduce\nrun-time by up to 62.2%, with only a 2% reduction in performance, and in some cases, even improve the performance results.", "keywords": "fusion in decoder;efficiency;long-form question answering", "primary_area": "", "supplementary_material": "", "author": "Moshe Berchansky;Peter Izsak;Avi Caciularu;Ido Dagan;Moshe Wasserblat", "authorids": "~Moshe_Berchansky1;~Peter_Izsak1;~Avi_Caciularu1;~Ido_Dagan1;~Moshe_Wasserblat1", "gender": "M;M;M;M;", "homepage": ";https://peteriz.github.io/;http://aviclu.github.io/;http://u.cs.biu.ac.il/~dagan/;", "dblp": ";143/2297;https://dblp.uni-trier.de/pid/207/8509;95/284;140/9024", "google_scholar": "WqJSJ6AAAAAJ;rTE9adkAAAAJ;https://scholar.google.co.il/citations?user=fPG_0aQAAAAJ;https://scholar.google.com.tw/citations?user=YzGAGtoAAAAJ;", "or_profile": "~Moshe_Berchansky1;~Peter_Izsak1;~Avi_Caciularu1;~Ido_Dagan1;~Moshe_Wasserblat1", "aff": "Bar-Ilan University;Intel Labs;Google;Bar-Ilan University;Intel", "aff_domain": "biu.ac.il;intel.com;google.com;biu.ac.il;intel.com", "position": "Researcher;Researcher;Researcher;Full Professor;Researcher", "bibtex": "@inproceedings{\nberchansky2023optimizing,\ntitle={Optimizing Retrieval-augmented Reader Models via Token Elimination},\nauthor={Moshe Berchansky and Peter Izsak and Avi Caciularu and Ido Dagan and Moshe Wasserblat},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=tCGyM6CpRI}\n}", "github": "", "project": "", "reviewers": "jDvi;g1LA;cWny", "site": "https://openreview.net/forum?id=tCGyM6CpRI", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "3;3;4", "reproducibility": "2;3;4", "correctness": "3;3;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-9227-8939;0000-0001-8354-6823;;;", "linkedin": "moshe-berchansky-446515142;peteriz/;avicaciularu/;;", "aff_unique_index": "0;1;2;0;1", "aff_unique_norm": "Bar-Ilan University;Intel;Google", "aff_unique_dep": ";Intel Labs;Google", "aff_unique_url": "https://www.biu.ac.il;https://www.intel.com;https://www.google.com", "aff_unique_abbr": "BIU;Intel;Google", "aff_campus_unique_index": "1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "Israel;United States" }, { "id": "tEN5ONyUre", "title": "Interpreting Indirect Answers to Yes-No Questions in Multiple Languages", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Yes-no questions expect a yes or no for an answer, but people often skip polar keywords. Instead, they answer with long explanations that must be interpreted. In this paper, we focus on this challenging problem and release new benchmarks in eight languages. We present a distant supervision approach to collect training data, and demonstrate that direct answers (i.e., with polar keywords) are useful to train models to interpret indirect answers (i.e., without polar keywords). We show that monolingual fine-tuning is beneficial if training data can be obtained via distant supervision for the language of interest (5 languages). Additionally, we show that cross-lingual fine-tuning is always beneficial (8 languages).", "keywords": "Multilinguality;Question Answering;Yes-no Question", "primary_area": "", "supplementary_material": "", "author": "Zijie Wang;Md Mosharaf Hossain;Shivam Mathur;Terry Cruz Melo;Kadir Bulut Ozler;Keun Hee Park;Jacob Quintero;MohammadHossein Rezaei;Shreya Nupur Shakya;Md Nayem Uddin;Eduardo Blanco", "authorids": "~Zijie_Wang3;~Md_Mosharaf_Hossain1;~Shivam_Mathur1;~Terry_Cruz_Melo1;~Kadir_Bulut_Ozler1;~Keun_Hee_Park1;~Jacob_Quintero1;~MohammadHossein_Rezaei1;~Shreya_Nupur_Shakya1;~Md_Nayem_Uddin2;~Eduardo_Blanco1", "gender": "M;M;;M;;M;;M;F;;M", "homepage": "https://wang-zijie.github.io/;https://mosharafhossain.github.io/;;http://terry.pe;;;;https://mhrezaei.com;;;https://eduardoblanco.github.io/", "dblp": ";234/2994;;;;348/5286;;359/3280.html;;;32/369-2", "google_scholar": "g6wwJ6YAAAAJ;6wn-19gAAAAJ;AoD5dgEAAAAJ;;;-QseKLUAAAAJ;;https://scholar.google.com/citations?hl=en;;;AqGa3-MAAAAJ", "or_profile": "~Zijie_Wang3;~Md_Mosharaf_Hossain1;~Shivam_Mathur1;~Terry_Cruz_Melo1;~Kadir_Bulut_Ozler1;~Keun_Hee_Park1;~Jacob_Quintero1;~MohammadHossein_Rezaei1;~Shreya_Nupur_Shakya1;~Md_Nayem_Uddin2;~Eduardo_Blanco1", "aff": "University of Arizona;Amazon;Arizona State University;, University of Arizona;;Arizona State University;University of Arizona;University of Arizona;, University of Arizona;;University of Arizona", "aff_domain": "arizona.edu;amazon.com;asu.edu;cs.arizona.edu;;asu.edu;arizona.edu;arizona.edu;cs.arizona.edu;;arizona.edu", "position": "PhD student;Researcher;MS student;Researcher;;MS student;PhD student;Undergrad student;PhD student;;Associate Professor", "bibtex": "@inproceedings{\nwang2023interpreting,\ntitle={Interpreting Indirect Answers to Yes-No Questions in Multiple Languages},\nauthor={Zijie Wang and Md Mosharaf Hossain and Shivam Mathur and Terry Cruz Melo and Kadir Bulut Ozler and Keun Hee Park and Jacob Quintero and MohammadHossein Rezaei and Shreya Nupur Shakya and Md Nayem Uddin and Eduardo Blanco},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=tEN5ONyUre}\n}", "github": "", "project": "", "reviewers": "KQFS;jtqq;DJgs", "site": "https://openreview.net/forum?id=tEN5ONyUre", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;2;4", "excitement": "2;4;4", "reproducibility": "4;4;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 9, "authors#_avg": 11, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-4448-7151;;;;;0009-0004-2912-7248;;;;;", "linkedin": ";md-mosharaf-hossain-phd-490b5751/;shivam--mathur/;;;keun-park-b08411a9/;jacob-quintero-53b66b139/;brianrezaei/;shreya-nupur-shakya-5227a9b6;;", "aff_unique_index": "0;1;2;0;2;0;0;0;0", "aff_unique_norm": "University of Arizona;Amazon;Arizona State University", "aff_unique_dep": ";Amazon.com, Inc.;", "aff_unique_url": "https://www.arizona.edu;https://www.amazon.com;https://www.asu.edu", "aff_unique_abbr": "UA;Amazon;ASU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "tJt1v8eugw", "title": "Multi-Defendant Legal Judgment Prediction via Hierarchical Reasoning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Multiple defendants in a criminal fact description generally exhibit complex interactions, and cannot be well handled by existing Legal Judgment Prediction (LJP) methods which focus on predicting judgment results (e.g., law articles, charges, and terms of penalty) for single-defendant cases. To address this problem, we propose the task of multi-defendant LJP, which aims to automatically predict the judgment results for each defendant of multi-defendant cases. Two challenges arise with the task of multi-defendant LJP: (1) indistinguishable judgment results among various defendants; and (2) the lack of a real-world dataset for training and evaluation. To tackle the first challenge, we formalize the multi-defendant judgment process as hierarchical reasoning chains and introduce a multi-defendant LJP method, named Hierarchical Reasoning Network (HRN), which follows the hierarchical reasoning chains to determine criminal relationships, sentencing circumstances, law articles, charges, and terms of penalty for each defendant. To tackle the second challenge, we collect a real-world multi-defendant LJP dataset, namely MultiLJP, to accelerate the relevant research in the future. Extensive experiments on MultiLJP verify the effectiveness of our proposed HRN.", "keywords": "legal text mining;legal judgment prediction;legal reasoning", "primary_area": "", "supplementary_material": "", "author": "Yougang Lyu;Jitai Hao;Zihan Wang;Kai Zhao;Shen Gao;Pengjie Ren;Zhumin Chen;Fang Wang;Zhaochun Ren", "authorids": "~Yougang_Lyu1;~Jitai_Hao1;~Zihan_Wang13;~Kai_Zhao13;~Shen_Gao1;~Pengjie_Ren1;~Zhumin_Chen1;~Fang_Wang7;~Zhaochun_Ren1", "gender": ";M;M;;M;;;F;M", "homepage": ";https://github.com/CURRENTF;https://wzh-nlp.github.io/;;https://shengaopku.github.io/;;https://ir.sdu.edu.cn/~zhuminchen/~zhuminchen_en.htm;https://dsi.sdu.edu.cn/szdw1/szjs.htm;https://renzhaochun.github.io/", "dblp": ";362/8635;152/5077-2;;85/7967;;88/1081;;58/10440", "google_scholar": ";https://scholar.google.com.hk/citations?user=mnCgkvYAAAAJ;npvYA9MAAAAJ;;Xb5yz-YAAAAJ;;;;fPcIPt0AAAAJ", "or_profile": "~Yougang_Lyu1;~Jitai_Hao1;~Zihan_Wang13;~Kai_Zhao13;~Shen_Gao1;~Pengjie_Ren1;~Zhumin_Chen1;~Fang_Wang7;~Zhaochun_Ren1", "aff": ";Shandong University;University of Amsterdam;;Shandong University;;Shandong University;Shandong University;Shandong University", "aff_domain": ";sdu.edu.cn;uva.nl;;sdu.edu.cn;;sdu.edu.cn;sdu.edu.cn;sdu.edu.cn", "position": ";MS student;PhD student;;Assistant Professor;;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nlyu2023multidefendant,\ntitle={Multi-Defendant Legal Judgment Prediction via Hierarchical Reasoning},\nauthor={Yougang Lyu and Jitai Hao and Zihan Wang and Kai Zhao and Shen Gao and Pengjie Ren and Zhumin Chen and Fang Wang and Zhaochun Ren},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=tJt1v8eugw}\n}", "github": "", "project": "", "reviewers": "6vXn;udyE;TtcG", "site": "https://openreview.net/forum?id=tJt1v8eugw", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;5;4", "excitement": "4;4;3", "reproducibility": "4;4;3", "correctness": "4;4;3", "rating_avg": 3.0, "confidence_avg": 4.666666666666667, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-0493-2668;;0000-0003-1301-3700;;0000-0003-4592-4074;;0000-0002-9076-6565", "linkedin": ";;;;;;;;zhaochun-ren-460491296/?locale=nl_NL", "aff_unique_index": "0;1;0;0;0;0", "aff_unique_norm": "Shandong University;University of Amsterdam", "aff_unique_dep": ";", "aff_unique_url": "http://www.sdu.edu.cn;https://www.uva.nl", "aff_unique_abbr": "SDU;UvA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0;0;0", "aff_country_unique": "China;Netherlands" }, { "id": "tL7hS11keH", "title": "CoAnnotating: Uncertainty-Guided Work Allocation between Human and Large Language Models for Data Annotation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Annotated data plays a critical role in Natural Language Processing (NLP) in training models and evaluating their performance. Given recent developments in Large Language Models (LLMs), models such as ChatGPT demonstrate zero-shot capability on many text-annotation tasks, comparable with or even exceeding human annotators. Such LLMs can serve as alternatives for manual annotation, due to lower costs and higher scalability. However, limited work has leveraged LLMs as complementary annotators, nor explored how annotation work is best allocated among humans and LLMs to achieve both quality and cost objectives. We propose CoAnnotating, a novel paradigm for Human-LLM co-annotation of unstructured texts at scale. Under this framework, we utilize uncertainty to estimate LLMs' annotation capability. Our empirical study shows CoAnnotating to be an effective means to allocate work from results on different datasets, with up to 21% performance improvement over random baseline. For code implementation, see https://github.com/SALT-NLP/CoAnnotating.", "keywords": "Human\u2013Artificial Intelligence Collaboration;Large Language Model;Data Annotation;Weak Supervision", "primary_area": "", "supplementary_material": "", "author": "Minzhi Li;Taiwei Shi;Caleb Ziems;Min-Yen Kan;Nancy F. Chen;Zhengyuan Liu;Diyi Yang", "authorids": "~Minzhi_Li1;~Taiwei_Shi1;~Caleb_Ziems1;~Min-Yen_Kan1;~Nancy_F._Chen1;~Zhengyuan_Liu2;~Diyi_Yang2", "gender": "F;M;M;M;;M;F", "homepage": "https://www.linkedin.com/in/minzhi-li-b16930183/;https://maksimstw.github.io/;http://calebziems.com/;https://www.comp.nus.edu.sg/~kanmy/;http://alum.mit.edu/www/nancychen;;https://cs.stanford.edu/~diyiy/", "dblp": ";336/2150;252/5058;k/MinYenKan;84/8761;229/9236;70/11145", "google_scholar": ";yv6nCnMAAAAJ;Hm4XL1AAAAAJ;https://scholar.google.com.tw/citations?user=aNVcd3EAAAAJ;https://scholar.google.com.sg/citations?user=K3Z9UiAAAAAJ;;j9jhYqQAAAAJ", "or_profile": "~Minzhi_Li1;~Taiwei_Shi1;~Caleb_Ziems1;~Min-Yen_Kan1;~Nancy_F._Chen1;~Zhengyuan_Liu2;~Diyi_Yang2", "aff": "I2R, A*STAR;Georgia Institute of Technology;Georgia Institute of Technology;National University of Singapore;I2R, A*STAR;I2R;Stanford University", "aff_domain": "i2r.a-star.edu.sg;gatech.edu;gatech.edu;nus.edu.sg;i2r.a-star.edu.sg;astar.edu.sg;stanford.edu", "position": "PhD student;Undergrad student;PhD student;Associate Professor;Principal Researcher;Researcher;Assistant Professor", "bibtex": "@inproceedings{\nli2023coannotating,\ntitle={CoAnnotating: Uncertainty-Guided Work Allocation between Human and Large Language Models for Data Annotation},\nauthor={Minzhi Li and Taiwei Shi and Caleb Ziems and Min-Yen Kan and Nancy F. Chen and Zhengyuan Liu and Diyi Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=tL7hS11keH}\n}", "github": "", "project": "", "reviewers": "HARE;Tt3q;rzwv;mCqk", "site": "https://openreview.net/forum?id=tL7hS11keH", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "5;4;3;4", "excitement": "4;4;4;4", "reproducibility": "4;4;3;4", "correctness": "3;5;3;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 3.75, "correctness_avg": 3.75, "replies_avg": 12, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0003-0872-5877;;", "linkedin": ";maksimstw/;caleb-ziems-4b1283126/;;nancy-chen-4644865/?originalSubdomain=sg;;", "aff_unique_index": "0;1;1;2;0;3;4", "aff_unique_norm": "A*STAR;Georgia Institute of Technology;National University of Singapore;Institute for Infocomm Research;Stanford University", "aff_unique_dep": "Institute for Infocomm Research;;;;", "aff_unique_url": "https://www.a-star.edu.sg;https://www.gatech.edu;https://www.nus.edu.sg;https://www.i2r.a-star.edu.sg;https://www.stanford.edu", "aff_unique_abbr": "A*STAR;Georgia Tech;NUS;I2R;Stanford", "aff_campus_unique_index": "1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;1;1;0;0;0;1", "aff_country_unique": "Singapore;United States" }, { "id": "tNN3ToWzCM", "title": "Smart \u201cChef\u201d: Verifying the Effect of Role-based Paraphrasing for Aspect Term Extraction", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "We tackle Aspect Term Extraction (ATE), a task of automatically extracting aspect terms from sentences. The current Pretrained Language Model (PLM) based extractors have achieved significant improvements. They primarily benefit from context-aware encoding. However, a considerable number of sentences in ATE corpora contain uninformative or low-quality contexts. Such sentences frequently act as \"troublemakers\" during test. In this study, we explore the context-oriented quality improvement method. Specifically, we propose to automatically rewrite the sentences from the perspectives of virtual experts with different roles, such as a \"chef\" in the restaurant domain. On this basis, we perform ATE over the paraphrased sentences during test, using the well-trained extractors without any change. In the experiments, we leverage ChatGPT to determine virtual experts in the considered domains, and induce ChatGPT to generate paraphrases conditioned on the roles of virtual experts. We experiment on the benchmark SemEval datasets, including Laptop-domain L14 and Restaurant-domain R14-16. The experimental results show that our approach effectively recalls the inconspicuous aspect terms like \"al di la\", although it reduces the precision. In addition, it is proven that our approach can be substantially improved by redundancy elimination and multi-role voting. More importantly, our approach can be used to expand the predictions obtained on the original sentences. This yields state-of-the-art performance (i.e., F1-scores of 86.2%, 89.3%, 77.7%, 82.7% on L14 and R14-16) without retraining or fine-tuning the baseline extractors.", "keywords": "Information Extraction;Aspect Term Extraction;ChatGPT", "primary_area": "", "supplementary_material": "", "author": "Jiaxiang Chen;Yu Hong;Qingting Xu;Jianmin YAO", "authorids": "~Jiaxiang_Chen1;~Yu_Hong1;~Qingting_Xu1;~Jianmin_YAO2", "gender": "M;M;;M", "homepage": "https://github.com/chenjx56;;https://blog.csdn.net/weixin_41862755;", "dblp": ";66/5306;;07/176-1.html", "google_scholar": ";;;", "or_profile": "~Jiaxiang_Chen1;~Yu_Hong1;~Qingting_Xu1;~Jianmin_YAO2", "aff": "Suzhou University;Suzhou University;Suzhou University;Soochow University, China", "aff_domain": "suda.edu.cn;suda.edu.cn;suda.edu.cn;suda.edu.cn", "position": "MS student;Full Professor;PhD student;Researcher", "bibtex": "@inproceedings{\nchen2023smart,\ntitle={Smart {\\textquotedblleft}Chef{\\textquotedblright}: Verifying the Effect of Role-based Paraphrasing for Aspect Term Extraction},\nauthor={Jiaxiang Chen and Yu Hong and Qingting Xu and Jianmin YAO},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=tNN3ToWzCM}\n}", "github": "", "project": "", "reviewers": "FoJG;1MTq;m3uM;p6WA", "site": "https://openreview.net/forum?id=tNN3ToWzCM", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;3;3;4", "excitement": "3;3;4;4", "reproducibility": "4;4;3;4", "correctness": "3;3;3;3", "rating_avg": 3.0, "confidence_avg": 3.5, "excitement_avg": 3.5, "reproducibility_avg": 3.75, "correctness_avg": 3.0, "replies_avg": 13, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Suzhou University;Soochow University", "aff_unique_dep": ";", "aff_unique_url": "https://www.suda.edu.cn;https://www.soochow.edu.cn", "aff_unique_abbr": "Suda;Soochow U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "tPJDg5G9SR", "title": "Attack Prompt Generation for Red Teaming and Defending Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models (LLMs) are susceptible to red teaming attacks, which can induce LLMs to generate harmful content. Previous research constructs attack prompts via manual or automatic methods, which have their own limitations on construction cost and quality. To address these issues, we propose an integrated approach that combines manual and automatic methods to economically generate high-quality attack prompts. Specifically, considering the impressive capabilities of newly emerged LLMs, we propose an attack framework to instruct LLMs to mimic human-generated prompts through in-context learning. Furthermore, we propose a defense framework that fine-tunes victim LLMs through iterative interactions with the attack framework to enhance their safety against red teaming attacks. Extensive experiments on different LLMs validate the effectiveness of our proposed attack and defense frameworks. Additionally, we release a series of attack prompts datasets named SAP with varying sizes, facilitating the safety evaluation and enhancement of more LLMs.", "keywords": "Large Language Models;Red Teaming Attack;Defense;Safety", "primary_area": "", "supplementary_material": "", "author": "Boyi Deng;Wenjie Wang;Fuli Feng;Yang Deng;Qifan Wang;Xiangnan He", "authorids": "~Boyi_Deng1;~Wenjie_Wang1;~Fuli_Feng1;~Yang_Deng4;~Qifan_Wang2;~Xiangnan_He1", "gender": ";M;M;M;M;M", "homepage": ";https://wenjiewwj.github.io/;https://fulifeng.github.io/;https://dengyang17.github.io/;https://wqfcr.github.io/;http://staff.ustc.edu.cn/~hexn", "dblp": "359/3170;38/1956-7;183/9198;115/6282-2;33/8610;59/1007", "google_scholar": "ADSff4oAAAAJ;Ma5DtmoAAAAJ;https://scholar.google.com.sg/citations?user=QePM4u8AAAAJ;https://scholar.google.com.hk/citations?user=OshWT3UAAAAJ;LrSyLosAAAAJ;https://scholar.google.com.sg/citations?user=X45Go24AAAAJ", "or_profile": "~Boyi_Deng1;~Wenjie_Wang1;~Fuli_Feng1;~Yang_Deng4;~Qifan_Wang2;~Xiangnan_He1", "aff": "University of Science and Technology of China;National University of Singapore;University of Science and Technology of China;The Chinese University of Hong Kong;Meta AI;University of Science and Technology of China", "aff_domain": "ustc.edu.cn;nus.edu.sg;ustc.edu.cn;cuhk.edu.hk;fb.com;ustc.edu.cn", "position": "Undergrad student;PhD student;Full Professor;PhD student;Principal Researcher;Professor", "bibtex": "@inproceedings{\ndeng2023attack,\ntitle={Attack Prompt Generation for Red Teaming and Defending Large Language Models},\nauthor={Boyi Deng and Wenjie Wang and Fuli Feng and Yang Deng and Qifan Wang and Xiangnan He},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=tPJDg5G9SR}\n}", "github": "", "project": "", "reviewers": "NAMT;sck6;8KS1", "site": "https://openreview.net/forum?id=tPJDg5G9SR", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;4;3", "reproducibility": "2;4;4", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-5199-1428;0000-0002-5828-9842;;0000-0002-7570-5756;0000-0001-8472-7992", "linkedin": ";;;;;", "aff_unique_index": "0;1;0;2;3;0", "aff_unique_norm": "University of Science and Technology of China;National University of Singapore;Chinese University of Hong Kong;Meta", "aff_unique_dep": ";;;Meta AI", "aff_unique_url": "http://www.ustc.edu.cn;https://www.nus.edu.sg;https://www.cuhk.edu.hk;https://meta.com", "aff_unique_abbr": "USTC;NUS;CUHK;Meta", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;1;0;0;2;0", "aff_country_unique": "China;Singapore;United States" }, { "id": "tQOncmMEVO", "title": "G-SPEED: General SParse Efficient Editing MoDel", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large Language Models (LLMs) have demonstrated incredible capabilities in understanding, generating, and manipulating languages.\nThrough human-model interactions, LLMs can automatically understand human-issued instructions and output the expected contents, which can significantly increase working efficiency.\nIn various types of real-world demands, editing-oriented tasks account for a considerable proportion, which involves an interactive process that entails the continuous refinement of existing texts to meet specific criteria.\nDue to the need for multi-round human-model interaction and the generation of complicated editing tasks, there is an emergent need for efficient general editing models.\nIn this paper, we propose \\underline{\\textbf{G}}eneral \\underline{\\textbf{SP}}arse \\underline{\\textbf{E}}fficient \\underline{\\textbf{E}}diting Mo\\underline{\\textbf{D}}el~(\\textbf{G-SPEED}), which can fulfill diverse editing requirements through a single model while maintaining low computational costs.\nSpecifically, we first propose a novel unsupervised text editing data clustering algorithm to deal with the data scarcity problem.\nSubsequently, we introduce a sparse editing model architecture to mitigate the inherently limited learning capabilities of small language models.\nThe experimental outcomes indicate that G-SPEED, with its 508M parameters, can surpass LLMs equipped with 175B parameters.\nOur code and model checkpoints are available at \\url{https://github.com/Banner-Z/G-SPEED}.", "keywords": "Text Editing", "primary_area": "", "supplementary_material": "", "author": "Haoke Zhang;Yue Wang;Juntao Li;Xiabing Zhou;Min Zhang", "authorids": "~Haoke_Zhang1;~Yue_Wang17;~Juntao_Li2;~Xiabing_Zhou1;~Min_Zhang9", "gender": "M;M;M;F;M", "homepage": "https://banner-z.github.io/;https://wangyuenlp.github.io/;https://lijuntaopku.github.io/;;https://zhangmin-nlp-ai.github.io/", "dblp": ";33/4822-39.html;;161/0414;83/5342-5", "google_scholar": ";;sZSygsYAAAAJ;;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Haoke_Zhang1;~Yue_Wang17;~Juntao_Li2;~Xiabing_Zhou1;~Min_Zhang9", "aff": "Baidu;Soochow University, China;Soochow University, China;Soochow University;Harbin Institute of Technology, Shenzhen", "aff_domain": "baidu.com;suda.edu.cn;suda.edu.cn;suda.edu.cn;hit.edu.cn", "position": "Intern;PhD student;Associate Professor;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nzhang2023gspeed,\ntitle={G-{SPEED}: General {SP}arse Efficient Editing MoDel},\nauthor={Haoke Zhang and Yue Wang and Juntao Li and Xiabing Zhou and Min Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=tQOncmMEVO}\n}", "github": "", "project": "", "reviewers": "VHDk;XAag;qsvy", "site": "https://openreview.net/forum?id=tQOncmMEVO", "pdf_size": 0, "rating": "2;2;2", "confidence": "3;2;4", "excitement": "3;4;4", "reproducibility": "3;4;2", "correctness": "2;4;4", "rating_avg": 2.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-6286-7529;;", "linkedin": ";;;;", "aff_unique_index": "0;1;1;1;2", "aff_unique_norm": "Baidu;Soochow University;Harbin Institute of Technology", "aff_unique_dep": "Baidu, Inc.;;", "aff_unique_url": "https://www.baidu.com;https://www.soochow.edu.cn;http://en.hhit.edu.cn/", "aff_unique_abbr": "Baidu;Soochow U;HIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "tRYqTsaSyZ", "title": "Causal Intervention for Abstractive Related Work Generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Abstractive related work generation has attracted increasing attention in generating coherent related work that helps readers grasp the current research. However, most existing models ignore the inherent causality during related work generation, leading to spurious correlations which downgrade the models\u2019 generation quality and generalizability. In this study, we argue that causal intervention can address such limitations and improve the quality and coherence of generated related work. To this end, we propose a novel Causal Intervention Module for Related Work Generation (CaM) to effectively capture causalities in the generation process. Specifically, we first model the relations among the sentence order, document (reference) correlations, and transitional content in related work generation using a causal graph. Then, to implement causal interventions and mitigate the negative impact of spurious correlations, we use do-calculus to derive ordinary conditional probabilities and identify causal effects through CaM. Finally, we subtly fuse CaM with Transformer to obtain an end-to-end related work generation framework. Extensive experiments on two real-world datasets show that CaM can effectively promote the model to learn causal relations and thus produce related work of higher quality and coherence.", "keywords": "Related work generation;text summarization;causal intervention", "primary_area": "", "supplementary_material": "", "author": "Jiachang Liu;Qi Zhang;Chongyang Shi;Usman Naseem;Shoujin Wang;Liang Hu;Ivor Tsang", "authorids": "~Jiachang_Liu2;~Qi_Zhang25;~Chongyang_Shi1;~Usman_Naseem1;~Shoujin_Wang1;~Liang_Hu1;~Ivor_Tsang1", "gender": "M;M;M;;M;M;M", "homepage": "https://dm4m.github.io/;https://sites.google.com/view/qizhang-bit-uts/home;https://cs.bit.edu.cn/szdw/jsml2/rjznyrjgcyjs2/3c137ad5c6484e8d931719b1612dd35c.htm;https://usmaann.github.io/;https://shoujinwang1.github.io/;https://sites.google.com/view/lianghu/home;https://www.a-star.edu.sg/cfar/about-cfar/management/prof-ivor-tsang", "dblp": ";52/323-20;68/7942-1.html;253/6972.html;16/8492;48/5388-4;35/5873", "google_scholar": ";8UAk1p4AAAAJ;;https://scholar.google.com.au/citations?hl=en;BQ0mBRIAAAAJ;https://scholar.google.com.au/citations?user=cj6wAgYAAAAJ;rJMOlVsAAAAJ", "or_profile": "~Jiachang_Liu2;~Qi_Zhang25;~Chongyang_Shi1;~Usman_Naseem1;~Shoujin_Wang1;~Liang_Hu1;~Ivor_W_Tsang1", "aff": "Beijing Institute of Technology;Tongji University;Beijing Institute of Technology;University of Sydney;University of Technology Sydney;Tongji University;University of Technology Sydney", "aff_domain": "bit.edu.cn;tongji.edu.cn;bit.edu.cn;sydney.edu.au;uts.edu.au;tongji.edu.cn;uts.edu.au", "position": "MS student;Researcher;Associate Professor;PhD student;Lecturer;Full Professor;Full Professor", "bibtex": "@inproceedings{\nliu2023causal,\ntitle={Causal Intervention for Abstractive Related Work Generation},\nauthor={Jiachang Liu and Qi Zhang and Chongyang Shi and Usman Naseem and Shoujin Wang and Liang Hu and Ivor Tsang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=tRYqTsaSyZ}\n}", "github": "", "project": "", "reviewers": "WopL;DEd8;nTMu", "site": "https://openreview.net/forum?id=tRYqTsaSyZ", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;3", "excitement": "2;3;4", "reproducibility": "2;3;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-1037-1361;;0000-0003-0191-7171;0000-0003-1133-9379;;", "linkedin": ";;;usman-naseem-a1568a139/;;;", "aff_unique_index": "0;1;0;2;3;1;3", "aff_unique_norm": "Beijing Institute of Technology;Tongji University;University of Sydney;University of Technology Sydney", "aff_unique_dep": ";;;", "aff_unique_url": "http://www.bit.edu.cn/;https://www.tongji.edu.cn;https://www.sydney.edu.au;https://www.uts.edu.au", "aff_unique_abbr": "BIT;Tongji;USYD;UTS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;1;0;1", "aff_country_unique": "China;Australia" }, { "id": "tSfZo6nSN1", "title": "RECAP: Towards Precise Radiology Report Generation via Dynamic Disease Progression Reasoning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Automating radiology report generation can significantly alleviate radiologists\u2019 workloads. Previous research has primarily focused on realizing highly concise observations while neglecting the precise attributes that determine the severity of diseases (e.g., small pleural effusion). Since incorrect attributes will lead to imprecise radiology reports, strengthening the generation process with precise attribute modeling becomes necessary. Additionally, the temporal information contained in the historical records, which is crucial in evaluating a patient\u2019s current condition (e.g., heart size is unchanged), has also been largely disregarded. To address these issues, we propose RECAP, which generates precise and accurate radiology reports via dynamic disease progression reasoning. Specifically, RECAP first predicts the observations and progressions (i.e., spatiotemporal information) given two consecutive radiographs. It then combines the historical records, spatiotemporal information, and radiographs for report generation, where a disease progression graph and dynamic progression reasoning mechanism are devised to accurately select the attributes of each observation and progression. Extensive experiments on two publicly available datasets demonstrate the effectiveness of our model.", "keywords": "radiology report generation;text generation grounded on vision", "primary_area": "", "supplementary_material": "", "author": "Wenjun Hou;Yi Cheng;Kaishuai Xu;Wenjie Li;Jiang Liu", "authorids": "~Wenjun_Hou1;~Yi_Cheng3;~Kaishuai_Xu1;~Wenjie_Li1;~Jiang_Liu5", "gender": "M;F;M;F;M", "homepage": "https://wjhou.github.io/;;https://kaishxu.github.io/;https://web.comp.polyu.edu.hk/cswjli/;https://faculty.sustech.edu.cn/liuj/", "dblp": ";;295/3979.html;33/3999-2.html;23/108-1.html", "google_scholar": "https://scholar.google.com/citations?hl=en;4FWRdrAAAAAJ;qfJ9u80AAAAJ;Rx5swD4AAAAJ;NHt3fUcAAAAJ", "or_profile": "~Wenjun_Hou1;~Yi_Cheng3;~Kaishuai_Xu1;~Wenjie_Li1;~Jiang_Liu5", "aff": "Southern University of Science and Technology;The Hong Kong Polytechnic University;Hong Kong Polytechnic University;The Hong Kong Polytechnic University, The Hong Kong Polytechnic University;Southern University of Science and Technology", "aff_domain": "sustech.edu.cn;polyu.edu.hk;polyu.edu.hk;comp.polyu.edu.hk;sustech.edu.cn", "position": "PhD student;PhD student;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nhou2023recap,\ntitle={{RECAP}: Towards Precise Radiology Report Generation via Dynamic Disease Progression Reasoning},\nauthor={Wenjun Hou and Yi Cheng and Kaishuai Xu and Wenjie Li and Jiang Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=tSfZo6nSN1}\n}", "github": "", "project": "", "reviewers": "7aGE;6ocQ;Uawd", "site": "https://openreview.net/forum?id=tSfZo6nSN1", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "2;3;4", "reproducibility": "4;3;4", "correctness": "2;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-9747-5894;;;0000-0002-7360-8864;", "linkedin": ";;;;", "aff_unique_index": "0;1;1;1;0", "aff_unique_norm": "Southern University of Science and Technology;Hong Kong Polytechnic University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sustech.edu.cn;https://www.polyu.edu.hk", "aff_unique_abbr": "SUSTech;PolyU", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "tZXaHWfsXB", "title": "Transcending Scaling Laws with 0.1% Extra Compute", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Scaling language models improves performance but comes with significant computational costs. This paper proposes UL2R, a method that substantially improves existing language models and their scaling curves with a relatively tiny amount of extra compute. The key idea is to continue training a state-of-the-art large language model on a few more steps with UL2's mixture-of-denoiser objective. We show that, with almost negligible extra computational costs and no new sources of data, we are able to substantially improve the scaling properties of large language models on downstream metrics. In this paper, we continue training a baseline language model, PaLM, with ULR2, introducing a new set of models at 8B, 62B, and 540B scale which we call U-PaLM. Impressively, at 540B scale, we show an approximately 2x computational savings rate where U-PaLM achieves the same performance as the final PaLM 540B model at around half its computational budget (i.e., saving ~4.4 million TPUv4 hours). We further show that this improved scaling curve leads to \"emergent abilities\" on challenging BIG-Bench tasks---for instance, U-PaLM does much better on some tasks or demonstrates better quality at much smaller scale (62B as opposed to 540B). Overall, we show that U-PaLM outperforms PaLM on many few-shot setups, including reasoning tasks with chain-of-thought (e.g., GSM8K), multilingual tasks (MGSM, TydiQA), MMLU and challenging BIG-Bench tasks.", "keywords": "language models;scaling laws;emergent abilities;efficiency;pretraining", "primary_area": "", "supplementary_material": "", "author": "Yi Tay;Jason Wei;Hyung Won Chung;Vinh Q. Tran;David So;Siamak Shakeri;Xavier Garcia;Steven Zheng;Jinfeng Rao;Aakanksha Chowdhery;Denny Zhou;Donald Metzler;Slav Petrov;Neil Houlsby;Quoc V Le;Mostafa Dehghani", "authorids": "~Yi_Tay1;~Jason_Wei1;~Hyung_Won_Chung1;~Vinh_Q._Tran1;~David_So1;~Siamak_Shakeri1;~Xavier_Garcia1;~Steven_Zheng1;~Jinfeng_Rao2;~Aakanksha_Chowdhery1;~Denny_Zhou1;~Donald_Metzler1;~Slav_Petrov1;~Neil_Houlsby1;~Quoc_V_Le1;~Mostafa_Dehghani1", "gender": "M;M;M;M;M;M;;;;;;M;M;M;M;M", "homepage": "http://yitay.net;https://jasonwei20.github.io;;https://vqtran.github.io;https://www.davidrso.com/;;;;;http://www.achowdhery.com;;https://research.google/people/DonaldMetzler/;;https://neilhoulsby.github.io/;;http://mostafadehghani.com/", "dblp": ";02/11220.html;;77/2885-2.html;;;;;;;;95/2272;18/5906;91/10669;29/6166;125/4062", "google_scholar": "VBclY_cAAAAJ;;1CAlXvYAAAAJ;ot3WsOwAAAAJ;;tG6MWNgAAAAJ;;;;7KDSCpQAAAAJ;;bmXpOd8AAAAJ;ipb9-GEAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.nl/citations?user=MiHOX3QAAAAJ", "or_profile": "~Yi_Tay1;~Jason_Wei1;~Hyung_Won_Chung1;~Vinh_Q._Tran1;~David_So1;~Siamak_Shakeri1;~Xavier_Garcia1;~Steven_Zheng1;~Jinfeng_Rao2;~Aakanksha_Chowdhery1;~Denny_Zhou1;~Donald_Metzler1;~Slav_Petrov1;~Neil_Houlsby1;~Quoc_V_Le1;~Mostafa_Dehghani1", "aff": "Google;OpenAI;Google Brain;Google;Google DeepMind;Research, Google;;;;Google;;Google;Google;Google;Google;Google DeepMind", "aff_domain": "google.com;openai.com;google.com;google.com;google.com;research.google.com;;;;google.com;;google.com;google.com;google.com;google.com;google.com", "position": "Research Scientist;Researcher;Researcher;Researcher;Research Engineer;Software Engineer;;;;Researcher;;Research Scientist;Researcher;Researcher;Scientist;Research Scientist", "bibtex": "@inproceedings{\ntay2023transcending,\ntitle={Transcending Scaling Laws with 0.1\\% Extra Compute},\nauthor={Yi Tay and Jason Wei and Hyung Won Chung and Vinh Q. Tran and David So and Siamak Shakeri and Xavier Garcia and Steven Zheng and Jinfeng Rao and Aakanksha Chowdhery and Denny Zhou and Donald Metzler and Slav Petrov and Neil Houlsby and Quoc V Le and Mostafa Dehghani},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=tZXaHWfsXB}\n}", "github": "", "project": "", "reviewers": "upoq;zMDr;jpZa;8nct", "site": "https://openreview.net/forum?id=tZXaHWfsXB", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "4;3;4;3", "excitement": "5;4;4;4", "reproducibility": "2;2;2;4", "correctness": "4;5;4;5", "rating_avg": 5.0, "confidence_avg": 3.5, "excitement_avg": 4.25, "reproducibility_avg": 2.5, "correctness_avg": 4.5, "replies_avg": 13, "authors#_avg": 16, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;;;;;0000-0003-4276-6269;;;;", "linkedin": ";;;vinh-tran-32597468/;;;;;;;;donmetzler/;slavpetrov/;;;", "aff_unique_index": "0;1;0;0;0;0;0;0;0;0;0;0", "aff_unique_norm": "Google;OpenAI", "aff_unique_dep": "Google;", "aff_unique_url": "https://www.google.com;https://openai.com", "aff_unique_abbr": "Google;OpenAI", "aff_campus_unique_index": "0;0;0;0;0;0;0;0;0", "aff_campus_unique": "Mountain View;", "aff_country_unique_index": "0;0;0;0;1;0;0;0;0;0;0;1", "aff_country_unique": "United States;United Kingdom" }, { "id": "taXJRZs43y", "title": "Where to start? Analyzing the potential value of intermediate models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Previous studies observed that finetuned models may be better base models than the vanilla pretrained model. Such a model, finetuned on some source dataset, may provide a better starting point for a new finetuning process on a desired target dataset.\nHere, we perform a systematic analysis of this \\emph{intertraining} scheme, over a wide range of English classification tasks. Surprisingly, our analysis suggests that the potential intertraining gain can be analyzed \\emph{independently} for the target dataset under consideration, and for a base model being considered as a starting point. Hence, a performant model is generally strong, even if its training data was not aligned with the target dataset. Furthermore, we leverage our analysis to propose a practical and efficient approach to determine if and how to select a base model in real-world settings. Last, we release an updating ranking of best models in the HuggingFace hub per architecture.", "keywords": "Intertraining;fine tuned;intermediate;finetune", "primary_area": "", "supplementary_material": "", "author": "Leshem Choshen;Elad Venezian;Shachar Don-Yehiya;Noam Slonim;Yoav Katz", "authorids": "~Leshem_Choshen1;~Elad_Venezian1;~Shachar_Don-Yehiya1;~Noam_Slonim1;~Yoav_Katz1", "gender": "Not Specified;M;;M;M", "homepage": "https://ktilana.wixsite.com/leshem-choshen;;;https://researcher.watson.ibm.com/researcher/view.php?person=il-NOAMS;https://researcher.watson.ibm.com/researcher/view.php?person=il-KATZ", "dblp": "218/5237;206/6812;;62/7001;40/21", "google_scholar": "https://scholar.google.com/citations?hl=en;;;https://scholar.google.co.il/citations?user=KjvrNGMAAAAJ;EfW-wnAAAAAJ", "or_profile": "~Leshem_Choshen1;~Elad_Venezian1;~Shachar_Don-Yehiya1;~Noam_Slonim1;~Yoav_Katz1", "aff": "International Business Machines;International Business Machines;;International Business Machines;International Business Machines", "aff_domain": "ibm.com;ibm.com;;ibm.com;ibm.com", "position": "Researcher;Researcher;;Principal Researcher;IBM", "bibtex": "@inproceedings{\nchoshen2023where,\ntitle={Where to start? Analyzing the potential value of intermediate models},\nauthor={Leshem Choshen and Elad Venezian and Shachar Don-Yehiya and Noam Slonim and Yoav Katz},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=taXJRZs43y}\n}", "github": "", "project": "", "reviewers": "mPxL;hzbj;seM1;H4LS", "site": "https://openreview.net/forum?id=taXJRZs43y", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "4;4;4;2", "excitement": "4;3;4;4", "reproducibility": "3;3;4;3", "correctness": "4;4;4;4", "rating_avg": 5.0, "confidence_avg": 3.5, "excitement_avg": 3.75, "reproducibility_avg": 3.25, "correctness_avg": 4.0, "replies_avg": 6, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-0085-6496;;;;", "linkedin": "leshemchoshen/;;;noam-slonim-28a80b63/;yoav-katz-0326b74/?originalSubdomain=il", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "International Business Machines Corporation", "aff_unique_dep": "", "aff_unique_url": "https://www.ibm.com", "aff_unique_abbr": "IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "tauoKi9IWO", "title": "LLMDet: A Third Party Large Language Models Generated Text Detection Tool", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Generated texts from large language models (LLMs) are remarkably close to high-quality human-authored text, raising concerns about their potential misuse in spreading false information and academic misconduct. Consequently, there is an urgent need for a highly practical detection tool capable of accurately identifying the source of a given text.\nHowever, existing detection tools typically rely on access to LLMs and can only differentiate between machine-generated and human-authored text, failing to meet the requirements of fine-grained tracing, intermediary judgment, and rapid detection. Therefore, we propose LLMDet, a model-specific, secure, efficient, and extendable detection tool, that can source text from specific LLMs, such as GPT-2, OPT, LLaMA, and others. In LLMDet, we record the next-token probabilities of salient n-grams as features to calculate proxy perplexity for each LLM. By jointly analyzing the proxy perplexities of LLMs, we can determine the source of the generated text. Experimental results show that LLMDet yields impressive detection performance while ensuring speed and security, achieving 98.54\\% precision and about $\\times 5.0$ faster for recognizing human-authored text. Additionally, LLMDet can effortlessly extend its detection capabilities to a new open-source model. We will provide an open-source tool at \\url{https://github.com/TrustedLLM/LLMDet}.", "keywords": "Text Detection;Large Language Model;Fine-grained Tracing;Proxy Perplexity", "primary_area": "", "supplementary_material": "", "author": "Kangxi Wu;Liang Pang;Huawei Shen;Xueqi Cheng;Tat-Seng Chua", "authorids": "~Kangxi_Wu2;~Liang_Pang1;~Huawei_Shen1;~Xueqi_Cheng1;~Tat-Seng_Chua2", "gender": "M;M;M;M;M", "homepage": "https://pl8787.github.io/;https://www.ict.ac.cn/sourcedb/cn/jssrck/201402/t20140221_4037648.html;https://people.ucas.ac.cn/~cxq?language=en;;http://www.comp.nus.edu.sg/~chuats/", "dblp": "37/11078;;44/912;337/9985;", "google_scholar": "1dgQHBkAAAAJ;;hY8aLqAAAAAJ;tbaFX48AAAAJ;https://scholar.google.com.tw/citations?user=Z9DWCBEAAAAJ", "or_profile": "~Liang_Pang1;~Huawei_Shen1;~Xueqi_Cheng1;~kangxi_wu1;~Tat-seng_Chua1", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy;Institute of Computing Technology, Chinese Academy of Sciences;National University of Singapore", "aff_domain": "ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;nus.edu.sg", "position": "Associate Professor;Full Professor;Full Professor;MS student;Full Professor", "bibtex": "@inproceedings{\nwu2023llmdet,\ntitle={{LLMD}et: A Third Party Large Language Models Generated Text Detection Tool},\nauthor={Kangxi Wu and Liang Pang and Huawei Shen and Xueqi Cheng and Tat-Seng Chua},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=tauoKi9IWO}\n}", "github": "", "project": "", "reviewers": "eKex;d37J;WGR6;RV13;Nwjg", "site": "https://openreview.net/forum?id=tauoKi9IWO", "pdf_size": 0, "rating": "3;3;3;3;3", "confidence": "3;4;4;4;4", "excitement": "4;3;3;3;3", "reproducibility": "4;4;4;5;4", "correctness": "3;3;3;3;3", "rating_avg": 3.0, "confidence_avg": 3.8, "excitement_avg": 3.2, "reproducibility_avg": 4.2, "correctness_avg": 3.0, "replies_avg": 17, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-1161-8546;0000-0002-1081-8119;;;0000-0001-6097-7807", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Chinese Academy of Sciences;National University of Singapore", "aff_unique_dep": "Institute of Computing Technology;", "aff_unique_url": "http://www.ict.ac.cn;https://www.nus.edu.sg", "aff_unique_abbr": "CAS;NUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "China;Singapore" }, { "id": "tbHe97ENFD", "title": "Exploring the Impact of Corpus Diversity on Financial Pretrained Language Models", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Over the past few years, various domain-specific pretrained language models (PLMs) have been proposed and have outperformed general-domain PLMs in specialized areas such as biomedical, scientific, and clinical domains. In addition, financial PLMs have been studied because of the high economic impact of financial data analysis. However, we found that financial PLMs were not pretrained on sufficiently diverse financial data. This lack of diverse training data leads to a subpar generalization performance, resulting in general-purpose PLMs, including BERT, often outperforming financial PLMs on many downstream tasks. To address this issue, we collected a broad range of financial corpus and trained the Financial Language Model (FiLM) on these diverse datasets. Our experimental results confirm that FiLM outperforms not only existing financial PLMs but also general domain PLMs. Furthermore, we provide empirical evidence that this improvement can be achieved even for unseen corpus groups.", "keywords": "Financial NLP;Pre-trained Language Model;Generalization", "primary_area": "", "supplementary_material": "", "author": "Jaeyoung Choe;Keonwoong Noh;Nayeon Kim;Seyun Ahn;Woohwan Jung", "authorids": "~Jaeyoung_Choe1;~Keonwoong_Noh1;~Nayeon_Kim2;~Seyun_Ahn1;~Woohwan_Jung1", "gender": "M;M;F;F;M", "homepage": ";https://nkw011.github.io/;https://github.com/na2na8;https://github.com/yunniya097;https://sites.google.com/view/whjung/", "dblp": ";359/3372;;;193/7295", "google_scholar": ";;;https://scholar.google.co.kr/citations?user=l9SCPhMAAAAJ;KsU7NzIAAAAJ", "or_profile": "~Jaeyoung_Choe1;~Keonwoong_Noh1;~Nayeon_Kim2;~Seyun_Ahn1;~Woohwan_Jung1", "aff": "Hanyang University;Hanyang University;Hanyang University;Hanyang University;Hanyang University", "aff_domain": "hanyang.ac.kr;hanyang.ac.kr;hanyang.ac.kr;hanyang.ac.kr;hanyang.ac.kr", "position": "PhD student;Undergrad student;MS student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nchoe2023exploring,\ntitle={Exploring the Impact of Corpus Diversity on Financial Pretrained Language Models},\nauthor={Jaeyoung Choe and Keonwoong Noh and Nayeon Kim and Seyun Ahn and Woohwan Jung},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=tbHe97ENFD}\n}", "github": "", "project": "", "reviewers": "ptSf;7icf;dzK5;8cJY", "site": "https://openreview.net/forum?id=tbHe97ENFD", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "5;3;2;5", "excitement": "4;2;4;3", "reproducibility": "4;4;5;4", "correctness": "3;3;4;3", "rating_avg": 3.0, "confidence_avg": 3.75, "excitement_avg": 3.25, "reproducibility_avg": 4.25, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0003-4561-2214", "linkedin": ";keonwoong-noh-661188305;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Hanyang University", "aff_unique_dep": "", "aff_unique_url": "https://www.hanyang.ac.kr", "aff_unique_abbr": "HYU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "South Korea" }, { "id": "tbRPPWDy76", "title": "MEEP: Is this Engaging? Prompting Large Language Models for Dialogue Evaluation in Multilingual Settings", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "As dialogue systems become more popular, evaluation of their response quality gains importance. Engagingness highly correlates with overall quality and creates a sense of connection that gives human participants a more fulfilling experience. Although qualities like coherence and fluency are readily measured with well-worn automatic metrics, evaluating engagingness often relies on human assessment, which is a costly and time-consuming process. Existing automatic engagingness metrics evaluate the response without the conversation history, are designed for one dataset, or have limited correlation with human annotations. Furthermore, they have been tested exclusively on English conversations. Given that dialogue systems are increasingly available in languages beyond English, multilingual evaluation capabilities are essential. We propose that large language models (LLMs) may be used for evaluation of engagingness in dialogue through prompting, and ask how prompt constructs and translated prompts compare in a multilingual setting. We provide a prompt-design taxonomy for engagingness and find that using selected prompt elements with LLMs, including our comprehensive definition of engagingness, outperforms state-of-the-art methods on evaluation of engagingness in dialogue across multiple languages.", "keywords": "automatic evaluation of dialogue;dialogue evaluation;multilingual;metrics;engagingness;prompting;LLM;large language model;multilinguality", "primary_area": "", "supplementary_material": "", "author": "Amila Ferron;Amber Shore;Ekata Mitra;Ameeta Agrawal", "authorids": "~Amila_Ferron1;~Amber_Shore1;~Ekata_Mitra1;~Ameeta_Agrawal1", "gender": "Not Specified;F;F;", "homepage": ";;;", "dblp": ";362/8606;;", "google_scholar": "aKIZSRQAAAAJ;;GjmC2wsAAAAJ;", "or_profile": "~Amila_Ferron1;~Amber_Shore1;~Ekata_Mitra1;~Ameeta_Agrawal1", "aff": "Portland State University;Portland State University;Portland State University;", "aff_domain": "pdx.edu;pdx.edu;pdx.edu;", "position": "MS student;MS student;PhD student;", "bibtex": "@inproceedings{\nferron2023meep,\ntitle={{MEEP}: Is this Engaging? Prompting Large Language Models for Dialogue Evaluation in Multilingual Settings},\nauthor={Amila Ferron and Amber Shore and Ekata Mitra and Ameeta Agrawal},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=tbRPPWDy76}\n}", "github": "", "project": "", "reviewers": "F5G1;amui;ppYc", "site": "https://openreview.net/forum?id=tbRPPWDy76", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;3;3", "excitement": "2;2;3", "reproducibility": "3;4;2", "correctness": "3;3;3", "rating_avg": 2.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "amila-ferron/;amber-shore-15469b116/;ekatamitra/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Portland State University", "aff_unique_dep": "", "aff_unique_url": "https://www.pdx.edu", "aff_unique_abbr": "PSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "te3pXuiVk3", "title": "MemeCap: A Dataset for Captioning and Interpreting Memes", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Memes are a widely popular tool for web users to express their thoughts using visual metaphors. Understanding memes requires recognizing and interpreting visual metaphors with respect to the text inside or around the meme, often while employing background knowledge and reasoning abilities. We present the task of meme captioning and release a new dataset, MemeCap. Our dataset contains 6.3K memes along with the title of the post containing the meme, the meme captions, the literal image caption, and the visual metaphors. Despite the recent success of vision and language (VL) models on tasks such as image captioning and visual question answering, our extensive experiments using state-of-the-art VL models show that they still struggle with visual metaphors, and perform substantially worse than humans.", "keywords": "meme;captioning;image;large multimodal model", "primary_area": "", "supplementary_material": "", "author": "EunJeong Hwang;Vered Shwartz", "authorids": "~EunJeong_Hwang1;~Vered_Shwartz1", "gender": "F;F", "homepage": "https://eujhwang.github.io/;https://www.cs.ubc.ca/~vshwartz/", "dblp": ";166/2038", "google_scholar": "Z0TA4NEAAAAJ;bbe4ResAAAAJ", "or_profile": "~EunJeong_Hwang1;~Vered_Shwartz1", "aff": "University of British Columbia;University of British Columbia", "aff_domain": "cs.ubc.ca;ubc.ca", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nhwang2023memecap,\ntitle={MemeCap: A Dataset for Captioning and Interpreting Memes},\nauthor={EunJeong Hwang and Vered Shwartz},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=te3pXuiVk3}\n}", "github": "", "project": "", "reviewers": "XU7o;sFDe;Bzu6", "site": "https://openreview.net/forum?id=te3pXuiVk3", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;4;3", "reproducibility": "4;4;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";vered-shwartz-99548633/", "aff_unique_index": "0;0", "aff_unique_norm": "University of British Columbia", "aff_unique_dep": "", "aff_unique_url": "https://www.ubc.ca", "aff_unique_abbr": "UBC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "id": "tfHJ9uLNlR", "title": "BiSPN: Generating Entity Set and Relation Set Coherently in One Pass", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "By modeling the interaction among instances and avoiding error propagation, Set Prediction Networks (SPNs) achieve state-of-the-art performance on the tasks of named entity recognition and relation triple extraction respectively. However, how to jointly extract entities and relation triples via SPNs remains an unexplored problem, where the main challenge is the maintenance of coherence between the predicted entity/relation sets during one-pass generation. In this work, we present Bipartite Set Prediction Network (BiSPN), a novel joint entity-relation extraction model that can efficiently generate entity set and relation set in parallel. To overcome the challenge of coherence, BiSPN is equipped with a novel bipartite consistency loss as well as an entity-relation linking loss during training. Experiments on three biomedical/clinical datasets and a general-domain dataset show that BiSPN achieves new state of the art in knowledge-intensive scene and performs competitively in general-domain, while being more efficient than two-stage joint extraction methods.", "keywords": "Information Extraction;Joint Entity-Relation Extraction;Non-autoregressive Generation", "primary_area": "", "supplementary_material": "", "author": "Yuxin He;Buzhou Tang", "authorids": "~Yuxin_He1;~Buzhou_Tang1", "gender": "M;M", "homepage": "https://stardust-hyx.github.io/;", "dblp": ";00/7437", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Yuxin_He1;~Buzhou_Tang1", "aff": "Harbin Institute of Technology;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;hit.edu.cn", "position": "MS student;Full Professor", "bibtex": "@inproceedings{\nhe2023bispn,\ntitle={Bi{SPN}: Generating Entity Set and Relation Set Coherently in One Pass},\nauthor={Yuxin He and Buzhou Tang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=tfHJ9uLNlR}\n}", "github": "", "project": "", "reviewers": "6vNJ;w2i7;DE1H", "site": "https://openreview.net/forum?id=tfHJ9uLNlR", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;2;4", "reproducibility": "4;4;4", "correctness": "4;3;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-1105-0014;", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Harbin Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hit.edu.cn/", "aff_unique_abbr": "HIT", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Harbin", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "tkY0l8mHii", "title": "A Query-Parallel Machine Reading Comprehension Framework for Low-resource NER", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Named entity recognition (NER) is a fundamental task in natural language processing. Recently, NER has been formulated as a machine reading comprehension (MRC) task, in which manually-crafted queries are used to extract entities of different types. However, current MRC-based NER techniques are limited to extracting a single type of entities at a time and are largely geared towards resource-rich settings. This renders them inefficient during the inference phase, while also leaving their potential untapped for utilization in low-resource settings. We suggest a query-parallel MRC-based approach to address these issues, which is capable of extracting multiple entity types concurrently and is applicable to both resource-rich and resource-limited settings. Specifically, we propose a query-parallel encoder which uses a query-segmented attention mechanism to isolate the semantics of queries and model the query-context interaction with a unidirectional flow. This allows for easier generalization to new entity types or transfer to new domains. After obtaining the query and context representations through the encoder, they are fed into a query-conditioned biaffine predictor to extract multiple entities at once. The model is trained with parameter-efficient tuning technique, making it more data-efficient. We conduct extensive experiments and demonstrate that our model performs competitively against strong baseline methods in resource-rich settings, and achieves state-of-the-art results in low-resource settings, including training-from-scratch, in-domain transfer and cross-domain transfer tasks.", "keywords": "NER;low-resource;in-domain transfer;cross-domain transfer", "primary_area": "", "supplementary_material": "", "author": "Yuhao Zhang;Yongliang Wang", "authorids": "~Yuhao_Zhang5;~Yongliang_Wang2", "gender": ";M", "homepage": ";https://github.com/wangyongliang", "dblp": ";", "google_scholar": ";", "or_profile": "~Yuhao_Zhang5;~Yongliang_Wang2", "aff": "Alibaba Group;", "aff_domain": "alibaba-inc.com;", "position": "Researcher;", "bibtex": "@inproceedings{\nzhang2023a,\ntitle={A Query-Parallel Machine Reading Comprehension Framework for Low-resource {NER}},\nauthor={Yuhao Zhang and Yongliang Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=tkY0l8mHii}\n}", "github": "", "project": "", "reviewers": "HR2U;Km8b;Nr7p", "site": "https://openreview.net/forum?id=tkY0l8mHii", "pdf_size": 0, "rating": "2;2;2", "confidence": "3;3;4", "excitement": "3;4;3", "reproducibility": "3;4;3", "correctness": "3;4;3", "rating_avg": 2.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0", "aff_unique_norm": "Alibaba Group", "aff_unique_dep": "", "aff_unique_url": "https://www.alibaba.com", "aff_unique_abbr": "Alibaba", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "tm5UxNFrlD", "title": "Location-Aware Visual Question Generation with Lightweight Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "This work introduces a novel task, location-aware visual question generation (LocaVQG), which aims to generate engaging questions from data relevant to a particular geographical location. Specifically, we represent such location-aware information with surrounding images and a GPS coordinate. To tackle this task, we present a dataset generation pipeline that leverages GPT-4 to produce diverse and sophisticated questions. Then, we aim to learn a lightweight model that can address the LocaVQG task and fit on an edge device, such as a mobile phone. To this end, we propose a method which can reliably generate engaging questions from location-aware information. Our proposed method outperforms baselines regarding human evaluation (e.g., engagement, grounding, coherence) and automatic evaluation metrics (e.g., BERTScore, ROUGE-2). Moreover, we conduct extensive ablation studies to justify our proposed techniques for both generating the dataset and solving the task.", "keywords": "Location-aware Visual Question Generation;Visual Question Generation;Question Generation;Lightweight Models", "primary_area": "", "supplementary_material": "", "author": "Nicholas Collin Suwono;Justin Chen;Tun Min Hung;Ting-Hao Kenneth Huang;I-Bin Liao;Yung-Hui Li;Lun-Wei Ku;Shao-Hua Sun", "authorids": "~Nicholas_Collin_Suwono1;~Justin_Chen1;~Tun_Min_Hung1;~Ting-Hao_Kenneth_Huang1;~I-Bin_Liao1;~Yung-Hui_Li3;~Lun-Wei_Ku1;~Shao-Hua_Sun1", "gender": "M;M;M;F;M;M;M;M", "homepage": ";https://allenhung1025.github.io/;https://dblp.org/pid/83/1564.html;http://www.lunweiku.com/;http://shaohua0116.github.io;https://dinobby.github.io/;http://kennethhuang.cc/;https://www.hh-ri.com/ai/", "dblp": ";;83/1564.html;82/2054;158/9680;248/8754.html;215/4581;", "google_scholar": ";;;SzcLXlkAAAAJ;uXsfnaQAAAAJ;https://scholar.google.com.tw/citations?user=ODoG9isAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=0yBU2_YAAAAJ", "or_profile": "~Nicholas_Collin_Suwono1;~Tun_Min_Hung1;~I-Bin_Liao1;~Lun-Wei_Ku1;~Shao-Hua_Sun1;~Chih_Yao_Chen1;~Ting-Hao_Huang1;~Yung-hui_Li1", "aff": "Academia Sinica;;Chunghwa Telecom laboratories.;Academia Sinica;National Taiwan University;University of North Carolina at Chapel Hill;Pennsylvania State University;Hon Hai Research Institute", "aff_domain": "sinica.edu.tw;;cht.com.tw;sinica.edu.tw;ntu.edu.tw;unc.edu;psu.edu;foxconn.com", "position": "MS student;;Researcher;Researcher;Assistant Professor;PhD student;Assistant Professor;Senior Director", "bibtex": "@inproceedings{\nsuwono2023locationaware,\ntitle={Location-Aware Visual Question Generation with Lightweight Models},\nauthor={Nicholas Collin Suwono and Justin Chen and Tun Min Hung and Ting-Hao Kenneth Huang and I-Bin Liao and Yung-Hui Li and Lun-Wei Ku and Shao-Hua Sun},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=tm5UxNFrlD}\n}", "github": "", "project": "", "reviewers": "BPM2;Gg8Z;BRvQ", "site": "https://openreview.net/forum?id=tm5UxNFrlD", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;3", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-2691-5404;0000-0001-7579-6734;0009-0006-4125-6418;0000-0001-7021-4627;0000-0002-0475-3689", "linkedin": "nicholas-collin-suwono/;tun-min-hung-9057871a9/;;lun-wei-ku/;shaohua0116/;;;yung-hui-li-8a363120/", "aff_unique_index": "0;1;0;2;3;4;5", "aff_unique_norm": "Academia Sinica;Chunghwa Telecom;National Taiwan University;University of North Carolina;Pennsylvania State University;Hon Hai Research Institute", "aff_unique_dep": ";Chunghwa Telecom laboratories;;;;", "aff_unique_url": "https://www.sinica.edu.tw;https://www.cht.com.tw;https://www.ntu.edu.tw;https://www.unc.edu;https://www.psu.edu;https://www.honhai.com/", "aff_unique_abbr": "Academia Sinica;CHT;NTU;UNC;PSU;HRI", "aff_campus_unique_index": "0;0;0;0;1;0", "aff_campus_unique": "Taiwan;Chapel Hill;", "aff_country_unique_index": "0;0;0;0;1;1;0", "aff_country_unique": "China;United States" }, { "id": "toUPGCAMic", "title": "ALCUNA: Large Language Models Meet New Knowledge", "track": "main", "status": "Long Main", "tldr": "", "abstract": "With the rapid development of NLP, large-scale language models (LLMs) excel in various tasks across multiple domains now. However, existing benchmarks may not adequately measure these models' capabilities, especially when faced with new knowledge.\nIn this paper, we address the lack of benchmarks to evaluate LLMs' ability to handle new knowledge, an important and challenging aspect in the rapidly evolving world. We propose an approach called KnowGen that generates new knowledge by altering existing entity attributes and relationships, resulting in artificial entities that are distinct from real-world entities. With KnowGen, we introduce a benchmark named ALCUNA to assess LLMs' abilities in knowledge understanding, differentiation, and association. \nWe benchmark several LLMs, reveals that their performance in face of new knowledge is not satisfactory, particularly in reasoning between new and internal knowledge. We also explore the impact of entity similarity on the model's understanding of entity knowledge and the influence of contextual entities. \nWe appeal to the need for caution when using LLMs in new scenarios or with new knowledge, and hope that our benchmarks can help drive the development of LLMs in face of new knowledge.", "keywords": "Large Language Model;Model Evaluation;Knowledge", "primary_area": "", "supplementary_material": "", "author": "Xunjian Yin;Baizhou Huang;Xiaojun Wan", "authorids": "~Xunjian_Yin1;~Baizhou_Huang1;~Xiaojun_Wan1", "gender": ";M;M", "homepage": "https://xunjianyin.github.io/;;https://wanxiaojun.github.io", "dblp": "320/5519;329/4291;07/1521", "google_scholar": "PociQ5EAAAAJ;1Zx1wi8AAAAJ;lTTeBdkAAAAJ", "or_profile": "~Xunjian_Yin1;~Baizhou_Huang1;~Xiaojun_Wan1", "aff": "Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "MS student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nyin2023alcuna,\ntitle={{ALCUNA}: Large Language Models Meet New Knowledge},\nauthor={Xunjian Yin and Baizhou Huang and Xiaojun Wan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=toUPGCAMic}\n}", "github": "", "project": "", "reviewers": "Hv6T;NN51;wiV6", "site": "https://openreview.net/forum?id=toUPGCAMic", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;4;4", "reproducibility": "3;3;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "tquKyw04gE", "title": "MultiCoNER v2: a Large Multilingual dataset for Fine-grained and Noisy Named Entity Recognition", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "We present MULTICONER V2, a dataset for fine-grained Named Entity Recognition covering 33 entity classes across 12 languages, in both monolingual and multilingual settings. This dataset aims to tackle the following practical challenges in NER: (i) effective handling of fine-grained classes that include complex entities like movie titles, and (ii) performance degradation due to noise generated from typing mistakes or OCR errors. The dataset is compiled from open resources like Wikipedia and Wikidata, and is publicly available. Evaluation based on the XLM-RoBERTa baseline highlights the unique challenges posed by MULTICONER V2: (i) the fine-grained taxonomy is challenging, where the scores are low with macro-F1=0.63 (across all languages), and (ii) the corruption strategy significantly impairs performance, with entity corruption resulting in 9% lower performance relative to non-entity corruptions across all languages. This highlights the greater impact of entity noise in contrast to context noise.", "keywords": "ner;multilingual ner;fine-grained ner;noisy ner", "primary_area": "", "supplementary_material": "", "author": "Besnik Fetahu;Zhiyu Chen;Sudipta Kar;Oleg Rokhlenko;Shervin Malmasi", "authorids": "~Besnik_Fetahu2;~Zhiyu_Chen4;~Sudipta_Kar1;~Oleg_Rokhlenko1;~Shervin_Malmasi1", "gender": "M;M;M;M;", "homepage": "https://l3s.de/~fetahu;https://zhiyuchen.com/;http://sudiptakar.info;;https://www.amazon.science/author/shervin-malmasi", "dblp": "117/4348.html;71/1661-1;186/7220;31/5148;148/4567", "google_scholar": "-CQlI8EAAAAJ;KSBmL64AAAAJ;MK_tdhEAAAAJ;rgKKn-kAAAAJ;https://scholar.google.com.au/citations?user=lT1-HZsAAAAJ", "or_profile": "~Besnik_Fetahu2;~Zhiyu_Chen4;~Sudipta_Kar1;~Oleg_Rokhlenko1;~Shervin_Malmasi1", "aff": "Amazon;Amazon;Amazon;Amazon;Amazon", "aff_domain": "amazon.com;amazon.com;amazon.com;amazon.com;amazon.com", "position": "Applied Scientist;Researcher;Researcher;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nfetahu2023multiconer,\ntitle={MultiCo{NER} v2: a Large Multilingual dataset for Fine-grained and Noisy Named Entity Recognition},\nauthor={Besnik Fetahu and Zhiyu Chen and Sudipta Kar and Oleg Rokhlenko and Shervin Malmasi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=tquKyw04gE}\n}", "github": "", "project": "", "reviewers": "SHVo;iV2c;kR1G", "site": "https://openreview.net/forum?id=tquKyw04gE", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;5;4", "excitement": "4;4;2", "reproducibility": "5;4;3", "correctness": "3;3;2", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 2.6666666666666665, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-3096-7912;;;", "linkedin": ";zhiyu-chen-ir-nlp/;%F0%9F%A4%9F-sudipta-kar-85180621/;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Amazon", "aff_unique_dep": "Amazon.com, Inc.", "aff_unique_url": "https://www.amazon.com", "aff_unique_abbr": "Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "tueh30tKiv", "title": "Length is a Curse and a Blessing for Document-level Semantics", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In recent years, contrastive learning (CL) has been extensively utilized to recover sentence and document-level encoding capability from pre-trained language models.\nIn this work, we question the length generalizability of CL-based models, i.e., their vulnerability towards length-induced semantic shift. \nWe verify not only that length vulnerability is a significant yet overlooked research gap, but we can devise unsupervised CL methods solely depending on the semantic signal provided by document length. \nWe first derive the theoretical foundations underlying length attacks, showing that elongating a document would intensify the high intra-document similarity that is already brought by CL. Moreover, we found that isotropy promised by CL is highly dependent on the length range of text exposed in training.\nInspired by these findings, we introduce a simple yet universal document representation learning framework, \n**LA(SER)$^3$**: length-agnostic self-reference for semantically robust sentence representation learning, achieving state-of-the-art unsupervised performance on the standard information retrieval benchmark. [Our code is publicly available.](https://github.com/gowitheflow-1998/LA-SER-cubed)", "keywords": "contrastive learning;sentence representation learning;document representation learning;semantics;document length", "primary_area": "", "supplementary_material": "", "author": "Chenghao Xiao;Yizhi LI;G Thomas Hudson;Chenghua Lin;Noura Al Moubayed", "authorids": "~Chenghao_Xiao1;~Yizhi_LI1;~G_Thomas_Hudson1;~Chenghua_Lin1;~Noura_Al_Moubayed1", "gender": "M;M;M;;F", "homepage": ";https://yizhilll.github.io;;;https://www.durham.ac.uk/staff/noura-al-moubayed/", "dblp": ";;;;27/8509", "google_scholar": "1GtWLmIAAAAJ;l5NEL4wAAAAJ;S_bOPrsAAAAJ;;https://scholar.google.co.uk/citations?user=GHecv14AAAAJ", "or_profile": "~Chenghao_Xiao1;~Yizhi_LI1;~G_Thomas_Hudson1;~Chenghua_Lin1;~Noura_Al_Moubayed1", "aff": "Durham University;University of Manchester ;University of Durham;;Durham University", "aff_domain": "durham.ac.uk;manchester.ac.uk;dur.ac.uk;;durham.ac.uk", "position": "PhD student;PhD student;Postdoc;;Associate Professor", "bibtex": "@inproceedings{\nxiao2023length,\ntitle={Length is a Curse and a Blessing for Document-level Semantics},\nauthor={Chenghao Xiao and Yizhi LI and G Thomas Hudson and Chenghua Lin and Noura Al Moubayed},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=tueh30tKiv}\n}", "github": "", "project": "", "reviewers": "ZBQ8;wNeu;oY3S;omiY", "site": "https://openreview.net/forum?id=tueh30tKiv", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;3;4;5", "excitement": "4;3;3;4", "reproducibility": "3;2;3;4", "correctness": "3;4;3;5", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 3.5, "reproducibility_avg": 3.0, "correctness_avg": 3.75, "replies_avg": 13, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-3932-9706;;;0000-0001-8942-355X", "linkedin": ";;;;noura-al-moubayed-6832b424?utm_source=share&utm_campaign=share_via&utm_content=profile&utm_medium=ios_app", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Durham University;University of Manchester", "aff_unique_dep": ";", "aff_unique_url": "https://www.dur.ac.uk;https://www.manchester.ac.uk", "aff_unique_abbr": "Durham;UoM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "u03xn1COsO", "title": "Is ChatGPT a General-Purpose Natural Language Processing Task Solver?", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Spurred by advancements in scale, large language models (LLMs) have demonstrated the ability to perform a variety of natural language processing (NLP) tasks zero-shot---i.e., without adaptation on downstream data. Recently, the debut of ChatGPT has drawn a great deal of attention from the natural language processing (NLP) community due to the fact that it can generate high-quality responses to human input and self-correct previous mistakes based on subsequent conversations. However, it is not yet known whether ChatGPT can serve as a generalist model that can perform many NLP tasks zero-shot. In this work, we empirically analyze the zero-shot learning ability of ChatGPT by evaluating it on 20 popular NLP datasets covering 7 representative task categories. With extensive empirical studies, we demonstrate both the effectiveness and limitations of the current version of ChatGPT. We find that ChatGPT performs well on many tasks favoring reasoning capabilities (e.g., arithmetic reasoning) while it still faces challenges when solving specific tasks such as sequence tagging. We additionally provide in-depth analysis through qualitative case studies.", "keywords": "Chatgpt evaluation;general-purpose task solver;zero-shot learning", "primary_area": "", "supplementary_material": "", "author": "Chengwei Qin;Aston Zhang;Zhuosheng Zhang;Jiaao Chen;Michihiro Yasunaga;Diyi Yang", "authorids": "~Chengwei_Qin1;~Aston_Zhang2;~Zhuosheng_Zhang1;~Jiaao_Chen2;~Michihiro_Yasunaga1;~Diyi_Yang2", "gender": "M;;M;M;;F", "homepage": ";;https://bcmi.sjtu.edu.cn/~zhangzs/;https://cs.stanford.edu/people/jiaaoc/;;https://cs.stanford.edu/~diyiy/", "dblp": "195/2732;;06/9708;230/3663;202/1809;70/11145", "google_scholar": ";;https://scholar.google.co.jp/citations?user=63LTQhgAAAAJ;Pi9IVvUAAAAJ;SieJYoEAAAAJ;j9jhYqQAAAAJ", "or_profile": "~Chengwei_Qin1;~Aston_Zhang2;~Zhuosheng_Zhang1;~Jiaao_Chen2;~Michihiro_Yasunaga1;~Diyi_Yang2", "aff": "Nanyang Technological University;;Shanghai Jiaotong University;Georgia Institute of Technology;Stanford University;Stanford University", "aff_domain": "ntu.edu.sg;;sjtu.edu.cn;gatech.edu;stanford.edu;stanford.edu", "position": "PhD student;;PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nqin2023is,\ntitle={Is Chat{GPT} a General-Purpose Natural Language Processing Task Solver?},\nauthor={Chengwei Qin and Aston Zhang and Zhuosheng Zhang and Jiaao Chen and Michihiro Yasunaga and Diyi Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=u03xn1COsO}\n}", "github": "", "project": "", "reviewers": "ZytF;uDLT;DEqi;JJED", "site": "https://openreview.net/forum?id=u03xn1COsO", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;5;2;3", "excitement": "3;3;3;3", "reproducibility": "1;3;2;4", "correctness": "3;4;5;4", "rating_avg": 4.0, "confidence_avg": 3.5, "excitement_avg": 3.0, "reproducibility_avg": 2.5, "correctness_avg": 4.0, "replies_avg": 13, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-4183-3645;;;", "linkedin": "chengwei-qin-3401a1107/;;;;;", "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "Nanyang Technological University;Shanghai Jiao Tong University;Georgia Institute of Technology;Stanford University", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ntu.edu.sg;https://www.sjtu.edu.cn;https://www.gatech.edu;https://www.stanford.edu", "aff_unique_abbr": "NTU;SJTU;Georgia Tech;Stanford", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Stanford", "aff_country_unique_index": "0;1;2;2;2", "aff_country_unique": "Singapore;China;United States" }, { "id": "u14dVx4rMW", "title": "ImageNetVC: Zero- and Few-Shot Visual Commonsense Evaluation on 1000 ImageNet Categories", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recently, Large Language Models (LLMs) have been serving as general-purpose interfaces, posing a significant demand for comprehensive visual knowledge. However, it remains unclear how well current LLMs and their visually augmented counterparts (VaLMs) can master visual commonsense knowledge. To investigate this, we propose ImageNetVC, a human-annotated dataset specifically designed for zero- and few-shot visual commonsense evaluation across 1,000 ImageNet categories. Utilizing ImageNetVC, we benchmark the fundamental visual commonsense knowledge of both unimodal LLMs and VaLMs. Furthermore, we analyze the factors affecting the visual commonsense knowledge of large-scale models, providing insights into the development of language models enriched with visual commonsense knowledge. Our code and dataset are available at https://github.com/hemingkx/ImageNetVC.", "keywords": "visual commonsense;large language model;visually-augmented language model", "primary_area": "", "supplementary_material": "", "author": "Heming Xia;Qingxiu Dong;Lei Li;Jingjing Xu;Tianyu Liu;Ziwei Qin;Zhifang Sui", "authorids": "~Heming_Xia1;~Qingxiu_Dong1;~Lei_Li14;~Jingjing_Xu1;~Tianyu_Liu3;~Ziwei_Qin1;~Zhifang_Sui1", "gender": "M;F;F;M;M;F;M", "homepage": "https://hemingkx.github.io/;https://dqxiu.github.io/;;;https://www.notion.so/Personal-Home-bc262a7887a94a45ba90166bae4bf973;http://eecs.pku.edu.cn/EN/People/Faculty/Detail/?ID=6024;https://lilei-nlp.github.io", "dblp": "278/2940;284/0673;25/624;134/1099-1;;;13/7007-39", "google_scholar": "6r2ESKkAAAAJ;ibcR7VkAAAAJ;;https://scholar.google.com.hk/citations?user=6hHbBwwAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;MeV4GGsAAAAJ", "or_profile": "~Heming_Xia1;~Qingxiu_Dong1;~Jingjing_Xu1;~Tianyu_Liu3;~Ziwei_Qin1;~Zhifang_Sui1;~Tobias_Lee1", "aff": "Peking University;Peking University;;Tencent Cloud AI (LLM);Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;;tencent.com;pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "MS student;PhD student;;Senior Researcher;MS student;Full Professor;MS student", "bibtex": "@inproceedings{\nxia2023imagenetvc,\ntitle={ImageNet{VC}: Zero- and Few-Shot Visual Commonsense Evaluation on 1000 ImageNet Categories},\nauthor={Heming Xia and Qingxiu Dong and Lei Li and Jingjing Xu and Tianyu Liu and Ziwei Qin and Zhifang Sui},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=u14dVx4rMW}\n}", "github": "", "project": "", "reviewers": "MrcQ;eD4G;xCkP", "site": "https://openreview.net/forum?id=u14dVx4rMW", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "4;3;4", "reproducibility": "5;4;4", "correctness": "4;3;4", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-5074-3441;;;;;;0009-0008-6984-5104", "linkedin": ";qingxiu-dong-a3758a199/;;;;;", "aff_unique_index": "0;0;1;0;0;0", "aff_unique_norm": "Peking University;Tencent", "aff_unique_dep": ";LLM", "aff_unique_url": "http://www.pku.edu.cn;https://cloud.tencent.com", "aff_unique_abbr": "Peking U;Tencent AI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "u69aCtohTC", "title": "Unveiling the Implicit Toxicity in Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The open-endedness of large language models (LLMs) combined with their impressive capabilities may lead to new safety issues when being exploited for malicious use. While recent studies primarily focus on probing toxic outputs that can be easily detected with existing toxicity classifiers, we show that LLMs can generate diverse implicit toxic outputs that are exceptionally difficult to detect via simply zero-shot prompting. Moreover, we propose a reinforcement learning (RL) based attacking method to further induce the implicit toxicity in LLMs. Specifically, we optimize the language model with a reward that prefers implicit toxic outputs to explicit toxic and non-toxic ones. Experiments on five widely-adopted toxicity classifiers demonstrate that the attack success rate can be significantly improved through RL fine-tuning. For instance, the RL-finetuned LLaMA-13B model achieves an attack success rate of 90.04\\% on BAD and 62.85\\% on Davinci003. Our findings suggest that LLMs pose a significant threat in generating undetectable implicit toxic outputs. We further show that fine-tuning toxicity classifiers on the annotated examples from our attacking method can effectively enhance their ability to detect LLM-generated implicit toxic language.", "keywords": "Large Language Model;Toxicity;Safety;Reinforcement Learning", "primary_area": "", "supplementary_material": "", "author": "Jiaxin Wen;Pei Ke;Hao Sun;Zhexin Zhang;Chengfei Li;Jinfeng Bai;Minlie Huang", "authorids": "~Jiaxin_Wen2;~Pei_Ke2;~Hao_Sun7;~Zhexin_Zhang2;~Chengfei_Li1;~Jinfeng_Bai1;~Minlie_Huang1", "gender": "M;M;M;M;;M;M", "homepage": "https://jiaxin-wen.github.io/;https://kepei1106.github.io/;;https://github.com/nonstopfor;;;http://coai.cs.tsinghua.edu.cn/hml", "dblp": "189/3085;10/2179;;225/5264;;120/7270.html;", "google_scholar": "jVRL96IAAAAJ;W_zPCtEAAAAJ;;I-Cn8gkAAAAJ;;;https://scholar.google.com/citations?hl=zh-CN", "or_profile": "~Jiaxin_Wen2;~Pei_Ke2;~Hao_Sun7;~Zhexin_Zhang2;~Chengfei_Li1;~Jinfeng_Bai1;~Minlie_Huang1", "aff": "Tsinghua University;Tsinghua University;Tsinghua University;Tsinghua University;;TAL;Tsinghua University", "aff_domain": "tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;tsinghua.edu.cn;;tal.com;tsinghua.edu.cn", "position": "MS student;Postdoc;MS student;PhD student;;Researcher;Full Professor", "bibtex": "@inproceedings{\nwen2023unveiling,\ntitle={Unveiling the Implicit Toxicity in Large Language Models},\nauthor={Jiaxin Wen and Pei Ke and Hao Sun and Zhexin Zhang and Chengfei Li and Jinfeng Bai and Minlie Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=u69aCtohTC}\n}", "github": "", "project": "", "reviewers": "CkEn;j5Tw;BA3Z;PPqG", "site": "https://openreview.net/forum?id=u69aCtohTC", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;4;2;4", "excitement": "4;3;3;3", "reproducibility": "3;4;4;4", "correctness": "4;3;3;4", "rating_avg": 4.0, "confidence_avg": 3.25, "excitement_avg": 3.25, "reproducibility_avg": 3.75, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0009-0002-9601-3991;;;", "linkedin": ";;;;;;", "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Tsinghua University;TAL", "aff_unique_dep": ";", "aff_unique_url": "https://www.tsinghua.edu.cn;", "aff_unique_abbr": "THU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China;" }, { "id": "u9Fvsy8Brx", "title": "mmT5: Modular Multilingual Pre-Training Solves Source Language Hallucinations", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Multilingual sequence-to-sequence models perform poorly with increased language coverage and fail to consistently generate text in the correct target language in few-shot settings. To address these challenges, we propose mmT5, a modular multilingual sequence-to-sequence model. mmT5 utilizes language-specific modules during pre-training, which disentangle language-specific information from language-agnostic information. We identify representation drift during fine-tuning as a key limitation of modular generative models and develop strategies that enable effective zero-shot transfer. Our model outperforms mT5 at the same parameter sizes by a large margin on representative natural language understanding and generation tasks in 40+ languages. Compared to mT5, mmT5 raises the rate of generating text in the correct language under zero-shot settings from 7% to 99%, thereby greatly alleviating the source language hallucination problem.", "keywords": "Modularity;Multilinguality;Adapters;Parameter-efficiency", "primary_area": "", "supplementary_material": "", "author": "Jonas Pfeiffer;Francesco Piccinno;Massimo Nicosia;Xinyi Wang;Machel Reid;Sebastian Ruder", "authorids": "~Jonas_Pfeiffer1;~Francesco_Piccinno1;~Massimo_Nicosia1;~Xinyi_Wang1;~Machel_Reid1;~Sebastian_Ruder2", "gender": "M;M;M;F;;M", "homepage": "https://pfeiffer.ai;;https://maxnicosia.com;;https://machelreid.github.io/;http://sebastianruder.com/", "dblp": "222/9866.html;151/3088;136/8001;;260/6668;186/7066", "google_scholar": "https://scholar.google.com/citations?hl=en;KE62hDMAAAAJ;lcYzgzsAAAAJ;https://scholar.google.com/citations?view_op=list_works;N8ctPiIAAAAJ;https://scholar.google.de/citations?user=8ONXPV8AAAAJ", "or_profile": "~Jonas_Pfeiffer1;~Francesco_Piccinno1;~Massimo_Nicosia1;~Xinyi_Wang1;~Machel_Reid1;~Sebastian_Ruder1", "aff": "Google DeepMind;Google;Google;Google;Google DeepMind;Google", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com", "position": "Researcher;SWE;Researcher;Researcher;Research Scientist;Research scientist", "bibtex": "@inproceedings{\npfeiffer2023mmt,\ntitle={mmT5: Modular Multilingual Pre-Training Solves Source Language Hallucinations},\nauthor={Jonas Pfeiffer and Francesco Piccinno and Massimo Nicosia and Xinyi Wang and Machel Reid and Sebastian Ruder},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=u9Fvsy8Brx}\n}", "github": "", "project": "", "reviewers": "Uj9i;14xE;QXjY", "site": "https://openreview.net/forum?id=u9Fvsy8Brx", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "4;3;2", "reproducibility": "4;4;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": "jonas-pfeiffer/;;massimonicosia/;;;sebastianruder", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Google", "aff_unique_dep": "Google DeepMind", "aff_unique_url": "https://deepmind.com", "aff_unique_abbr": "DeepMind", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;1;0;1", "aff_country_unique": "United Kingdom;United States" }, { "id": "u9gI4JlOSj", "title": "How Does Generative Retrieval Scale to Millions of Passages?", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The emerging paradigm of generative retrieval re-frames the classic information retrieval problem into a sequence-to-sequence modeling task, forgoing external indices and encoding an entire document corpus within a single Transformer.\nAlthough many different approaches have been proposed to improve the effectiveness of generative retrieval, they have only been evaluated on document corpora on the order of 100K in size.\nWe conduct the first empirical study of generative retrieval techniques across various corpus scales, ultimately scaling up to the entire MS MARCO passage ranking task with a corpus of 8.8M passages and evaluating model sizes up to 11B parameters.\nWe uncover several findings about scaling generative retrieval to millions of passages; notably, the central importance of using synthetic queries as document representations during indexing, the ineffectiveness of existing proposed architecture modifications when accounting for compute cost, and the limits of naively scaling model parameters with respect to retrieval performance.\nWhile we find that generative retrieval is competitive with state-of-the-art dual encoders on small corpora, scaling to millions of passages remains an important and unsolved challenge.\nWe believe these findings will be valuable for the community to clarify the current state of generative retrieval, highlight the unique challenges, and inspire new research directions.", "keywords": "generative retrieval;differentiable search index;information retrieval", "primary_area": "", "supplementary_material": "", "author": "Ronak Pradeep;Kai Hui;Jai Gupta;Adam D Lelkes;Honglei Zhuang;Jimmy Lin;Donald Metzler;Vinh Q. Tran", "authorids": "~Ronak_Pradeep1;~Kai_Hui1;~Jai_Gupta1;~Adam_D_Lelkes1;~Honglei_Zhuang1;~Jimmy_Lin2;~Donald_Metzler1;~Vinh_Q._Tran1", "gender": "M;M;M;M;M;;M;M", "homepage": "https://ronakice.github.io/;https://khui.github.io/;;https://research.google/people/AdamLelkes/;https://hongleizhuang.github.io/;https://cs.uwaterloo.ca/~jimmylin/;https://research.google/people/DonaldMetzler/;https://vqtran.github.io", "dblp": "270/1757;37/10077;154/6787-1;147/5184;10/9988;00/7739;95/2272;77/2885-2.html", "google_scholar": "xH7uDXgAAAAJ;VorTj3AAAAAJ;;PAAAaI4AAAAJ;FxEDj4wAAAAJ;;bmXpOd8AAAAJ;ot3WsOwAAAAJ", "or_profile": "~Ronak_Pradeep1;~Kai_Hui1;~Jai_Gupta1;~Adam_D_Lelkes1;~Honglei_Zhuang1;~Jimmy_Lin2;~Donald_Metzler1;~Vinh_Q._Tran1", "aff": "University of Waterloo;Google;Google Inc;Google;Google DeepMind;University of Waterloo;Google;Google", "aff_domain": "uwaterloo.ca;google.com;google.com;google.com;google.com;waterloo.ca;google.com;google.com", "position": "PhD student;Software Engineer;Researcher;Researcher;Research Scientist;Full Professor;Research Scientist;Researcher", "bibtex": "@inproceedings{\npradeep2023how,\ntitle={How Does Generative Retrieval Scale to Millions of Passages?},\nauthor={Ronak Pradeep and Kai Hui and Jai Gupta and Adam D Lelkes and Honglei Zhuang and Jimmy Lin and Donald Metzler and Vinh Q. Tran},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=u9gI4JlOSj}\n}", "github": "", "project": "", "reviewers": "JoZg;Gmvd;vuyp", "site": "https://openreview.net/forum?id=u9gI4JlOSj", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;5;2", "excitement": "3;3;4", "reproducibility": "4;4;3", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-6296-601X;0000-0002-3110-7404;;;0000-0001-8134-1509;;0000-0003-4276-6269;", "linkedin": ";;;adamlelkes;;;donmetzler/;vinh-tran-32597468/", "aff_unique_index": "0;1;1;1;1;0;1;1", "aff_unique_norm": "University of Waterloo;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://uwaterloo.ca;https://www.google.com", "aff_unique_abbr": "UW;Google", "aff_campus_unique_index": "1;1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;1;2;0;1;1", "aff_country_unique": "Canada;United States;United Kingdom" }, { "id": "uB9ZnBCBX6", "title": "Text-Transport: Toward Learning Causal Effects of Natural Language", "track": "main", "status": "Long Main", "tldr": "", "abstract": "As language technologies gain prominence in real-world settings, it is important to understand *how* changes to language affect reader perceptions. This can be formalized as the *causal effect* of varying a linguistic attribute (e.g., sentiment) on a reader\u2019s response to the text. In this paper, we introduce Text-Transport, a method for estimation of causal effects from natural language under any text distribution. Current approaches for valid causal effect estimation require strong assumptions about the data, meaning the data from which one *can* estimate valid causal effects often is not representative of the actual target domain of interest. To address this issue, we leverage the notion of distribution shift to describe an estimator that *transports* causal effects between domains, bypassing the need for strong assumptions in the target domain. We derive statistical guarantees on the uncertainty of this estimator, and we report empirical results and analyses that support the validity of Text-Transport across data settings. Finally, we use Text-Transport to study a realistic setting\u2014hate speech on social media\u2014in which causal effects do shift significantly between text domains, demonstrating the necessity of transport when conducting causal inference on natural language.", "keywords": "causal inference;causal effects;distribution shift;domain adaptation;transportability", "primary_area": "", "supplementary_material": "", "author": "Victoria Lin;Louis-Philippe Morency;Eli Ben-Michael", "authorids": "~Victoria_Lin2;~Louis-Philippe_Morency1;~Eli_Ben-Michael1", "gender": "F;M;M", "homepage": "https://torylin.github.io/;https://www.cs.cmu.edu/~morency/;https://ebenmichael.github.io", "dblp": "184/3732;31/739;", "google_scholar": "_rHMG-0AAAAJ;https://scholar.google.com.tw/citations?user=APgaFK0AAAAJ;", "or_profile": "~Victoria_Lin2;~Louis-Philippe_Morency1;~Eli_Ben-Michael1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cmu.edu;cmu.edu", "position": "PhD student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nlin2023texttransport,\ntitle={Text-Transport: Toward Learning Causal Effects of Natural Language},\nauthor={Victoria Lin and Louis-Philippe Morency and Eli Ben-Michael},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=uB9ZnBCBX6}\n}", "github": "", "project": "", "reviewers": "g3EB;aAKu;VqXU", "site": "https://openreview.net/forum?id=uB9ZnBCBX6", "pdf_size": 0, "rating": "5;5;5", "confidence": "2;3;3", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-6376-7696;", "linkedin": ";morency?challengeId=AQELGK_OvMa0vwAAAY72L-VV4X9hW8juuY80VHVeeSGHZ1PJHeeEa5LTFoeTmDGU0t1OL07MXJTYC9EAi6qgPDd2z9ztnbdFYA&submissionId=09a0ff34-04ac-c717-bef7-8c9c8811b463&challengeSource=AgFhxWkU3q7v4wAAAY72L-1xRE0eG-BnZUNE9e3eAG95pgOCZ9u1nxEg-1dK2Dw&challegeType=AgHMzV0lqKgEFwAAAY72L-11X6DHMd3V_A3Iur8XZeyYF2-oBzoufs8&memberId=AgH4yz7pZ_riCgAAAY72L-146jmR2pdr3dmhy2icxBtEQzQ&recognizeDevice=AgFDCNyrhKiFSAAAAY72L-16m7z2EH2t0ueWmMKjyk1_ZJAkfFVe;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "uBnIvIcAFx", "title": "Vera: A General-Purpose Plausibility Estimation Model for Commonsense Statements", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Today's language models can be remarkably intelligent yet still produce text that contains trivial commonsense errors. Therefore, we seek a retrospective verification approach that can reflect on the commonsense plausibility of the machine text, and introduce Vera, a general-purpose model that learns to estimate the commonsense plausibility of declarative statements. To support diverse commonsense domains, \nVera is trained on $\\sim$7M commonsense statements that are automatically converted from 19 QA datasets and two commonsense knowledge bases, and using a combination of three training objectives. When applied to solving commonsense problems in the verification format, Vera substantially outperforms existing models that can be repurposed for commonsense verification, even including GPT-3.5/ChatGPT/GPT-4, and it further exhibits generalization capabilities to unseen tasks and provides well-calibrated outputs. We find that Vera excels at filtering machine-generated commonsense knowledge and is useful in detecting erroneous commonsense statements generated by models like ChatGPT in real-world settings.", "keywords": "commonsense;verification;plausibility", "primary_area": "", "supplementary_material": "", "author": "Jiacheng Liu;Wenya Wang;Dianzhuo Wang;Noah A. Smith;Yejin Choi;Hannaneh Hajishirzi", "authorids": "~Jiacheng_Liu2;~Wenya_Wang1;~Dianzhuo_Wang1;~Noah_A._Smith2;~Yejin_Choi1;~Hannaneh_Hajishirzi1", "gender": "M;F;;F;F;M", "homepage": "https://github.com/liujch1998;https://personal.ntu.edu.sg/wangwy/;;https://yejinc.github.io/;https://homes.cs.washington.edu/~hannaneh/;https://homes.cs.washington.edu/~nasmith/", "dblp": "289/6273;;;89/579-1;52/1296;90/5204.html", "google_scholar": "GJfoBZAAAAAJ;https://scholar.google.com.sg/citations?user=eOKISncAAAAJ;y0HlEPQAAAAJ;vhP-tlcAAAAJ;LOV6_WIAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Jiacheng_Liu2;~Wenya_Wang1;~Dianzhuo_Wang1;~Yejin_Choi1;~Hannaneh_Hajishirzi1;~Noah_Smith1", "aff": "Meta Facebook;University of Washington;Harvard University, Harvard University;Department of Computer Science, University of Washington;University of Washington;Allen Institute for Artificial Intelligence", "aff_domain": "meta.com;cs.washington.edu;g.harvard.edu;cs.washington.edu;uw.edu;allenai.org", "position": "Intern;Postdoc;PhD student;Full Professor;Associate Professor;Senior Director of NLP Research", "bibtex": "@inproceedings{\nliu2023vera,\ntitle={Vera: A General-Purpose Plausibility Estimation Model for Commonsense Statements},\nauthor={Jiacheng Liu and Wenya Wang and Dianzhuo Wang and Noah A. Smith and Yejin Choi and Hannaneh Hajishirzi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=uBnIvIcAFx}\n}", "github": "", "project": "", "reviewers": "HDaL;HcPV;FtwG", "site": "https://openreview.net/forum?id=uBnIvIcAFx", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;5;4", "excitement": "4;4;3", "reproducibility": "3;5;3", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-3308-2869;0000-0001-5612-7818;;;;0000-0002-2310-6380", "linkedin": "liujch1998/;;;;;", "aff_unique_index": "0;1;2;1;1;3", "aff_unique_norm": "Meta;University of Washington;Harvard University;Allen Institute for Artificial Intelligence", "aff_unique_dep": "Meta Platforms, Inc.;;;", "aff_unique_url": "https://meta.com;https://www.washington.edu;https://www.harvard.edu;https://allenai.org", "aff_unique_abbr": "Meta;UW;Harvard;AI2", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "uDMyRJw6ty", "title": "Clinical Contradiction Detection", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Detecting contradictions in text is essential in determining the validity of the literature and sources that we consume. Medical corpora are riddled with conflicting statements. This is due to the large throughput of new studies and the difficulty in replicating experiments, such as clinical trials. \nDetecting contradictions in this domain is hard since it requires clinical expertise. \nWe present a distant supervision approach that leverages a medical ontology to build a seed of potential clinical contradictions over 22 million medical abstracts. \n We automatically build a labeled training dataset consisting of paired clinical sentences that are grounded in an ontology and represent potential medical contradiction. The dataset is used to weakly-supervise state-of-the-art deep learning models showing significant empirical improvements across multiple medical contradiction datasets.", "keywords": "clinical contradiction detection;medical ontologies;clinical distant supervision", "primary_area": "", "supplementary_material": "", "author": "Dave Makhervaks;Plia Gillis;Kira Radinsky", "authorids": "~Dave_Makhervaks1;~Plia_Gillis1;~Kira_Radinsky1", "gender": "M;F;F", "homepage": ";;http://www.kiraradinsky.com", "dblp": "251/5579;;08/6560", "google_scholar": "mShVsKEAAAAJ;https://scholar.google.com/citations?view_op=list_works;", "or_profile": "~Dave_Makhervaks1;~Plia_Gillis1;~Kira_Radinsky1", "aff": "Computer Science Departmen, Technion-Israel Institute of Technology;Tel Aviv University;Computer Science Departmen, Technion-Israel Institute of Technology", "aff_domain": "cs.technion.ac.il;tau.post.ac.il;cs.technion.ac.il", "position": "MS student;MS student;Associate Professor", "bibtex": "@inproceedings{\nmakhervaks2023clinical,\ntitle={Clinical Contradiction Detection},\nauthor={Dave Makhervaks and Plia Gillis and Kira Radinsky},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=uDMyRJw6ty}\n}", "github": "", "project": "", "reviewers": "G3pb;Kpwr;ufkc", "site": "https://openreview.net/forum?id=uDMyRJw6ty", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "3;4;4", "reproducibility": "2;3;5", "correctness": "3;4;5", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0009-0007-7918-2204", "linkedin": ";;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Technion-Israel Institute of Technology;Tel Aviv University", "aff_unique_dep": "Computer Science Department;", "aff_unique_url": "https://www.technion.ac.il;https://www.tau.ac.il", "aff_unique_abbr": "Technion;TAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Israel" }, { "id": "uEAFmlWYig", "title": "Towards Robust Pruning: An Adaptive Knowledge-Retention Pruning Strategy for Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The pruning objective has recently extended beyond accuracy and sparsity to robustness in language models. Despite this, existing methods struggle to enhance robustness against adversarial attacks when continually increasing model sparsity and require a retraining process. As humans step into the era of large language models, these issues become increasingly prominent. This paper proposes that the robustness of language models is proportional to the extent of pre-trained knowledge they encompass. Accordingly, we introduce a post-training pruning strategy designed to faithfully replicate the embedding space and feature space of dense language models, aiming to conserve more pre-trained knowledge during the pruning process. In this setup, each layer's reconstruction error not only originates from itself but also includes cumulative error from preceding layers, followed by an adaptive rectification. Compared to other state-of-art baselines, our approach demonstrates a superior balance between accuracy, sparsity, robustness, and pruning cost with BERT on datasets SST2, IMDB, and AGNews, marking a significant stride towards robust pruning in language models.", "keywords": "Robust Pruning;Language Models", "primary_area": "", "supplementary_material": "", "author": "Jianwei Li;Qi Lei;Wei Cheng;Dongkuan Xu", "authorids": "~Jianwei_Li8;~Qi_Lei1;~Wei_Cheng1;~Dongkuan_Xu2", "gender": "M;F;M;M", "homepage": "https://jianwei.gatsbyjs.io/;https://cecilialeiqi.github.io/;https://chengw07.github.io/;https://dongkuanx27.github.io/", "dblp": ";;89/2506-2.html;142/8139", "google_scholar": ";kGOgaowAAAAJ;PRrGVmoAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Jianwei_Li8;~Qi_Lei1;~Wei_Cheng1;~Dongkuan_Xu2", "aff": "North Carolina State University;New York University;NEC-Labs;North Carolina State University", "aff_domain": "ncsu.edu;nyu.edu;nec-labs.com;ncsu.edu", "position": "PhD student;Assistant Professor;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nli2023towards,\ntitle={Towards Robust Pruning: An Adaptive Knowledge-Retention Pruning Strategy for Language Models},\nauthor={Jianwei Li and Qi Lei and Wei Cheng and Dongkuan Xu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=uEAFmlWYig}\n}", "github": "", "project": "", "reviewers": "1hiE;YNs7;VnKg;MLxF;q9t9", "site": "https://openreview.net/forum?id=uEAFmlWYig", "pdf_size": 0, "rating": "5;5;5;5;5", "confidence": "2;2;2;4;2", "excitement": "3;4;3;4;4", "reproducibility": "3;3;3;4;4", "correctness": "3;3;3;4;4", "rating_avg": 5.0, "confidence_avg": 2.4, "excitement_avg": 3.6, "reproducibility_avg": 3.4, "correctness_avg": 3.4, "replies_avg": 17, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-5372-223X;;;0000-0002-1456-9658", "linkedin": "fourteenljw/;;wei-cheng-ml/;dongkuan-dk-xu-%F0%9F%87%BA%F0%9F%87%A6-05038087/", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "North Carolina State University;New York University;NEC Laboratories", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ncsu.edu;https://www.nyu.edu;https://www.nec-labs.com", "aff_unique_abbr": "NCSU;NYU;NEC-Labs", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "uPz5a2NvrG", "title": "Normal-Abnormal Decoupling Memory for Medical Report Generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The automatic generation of medical reports plays a crucial role in clinical automation. In contrast to natural images, radiological images exhibit a high degree of similarity, while medical data are prone to data bias and complex noise, posing challenges for existing methods in capturing nuanced visual information. To address these challenges, we introduce a novel normal-abnormal semantic decoupling network that utilizes abnormal pattern memory. Different from directly optimizing the network using medical reports, we optimize visual extraction through the extraction of abnormal semantics from the reports. Moreover, we independently learn normal semantics based on abnormal semantics, ensuring that the optimization of the visual network remains unaffected by normal semantics learning. Then, we divided the words in the report into four parts: normal/abnormal sentences and normal/abnormal semantics, optimizing the network with distinct weights for each partition. The two semantic components, along with visual information, are seamlessly integrated to facilitate the generation of precise and coherent reports. This approach mitigates the impact of noisy normal semantics and reports. Moreover, we develop a novel encoder for abnormal pattern memory, which improves the network's ability to detect anomalies by capturing and embedding the abnormal patterns of images in the visual encoder. This approach demonstrates excellent performance on the benchmark MIMIC-CXR, surpassing the current state-of-the-art methods.", "keywords": "Medical Report Generation;Normal-Abnormal Decoupling;Semantic Extraction;Abnormal Mode Memory", "primary_area": "", "supplementary_material": "", "author": "Guosheng Zhao;Yan Yan;Zijian Zhao", "authorids": "~Guosheng_Zhao2;~Yan_Yan6;~Zijian_Zhao1", "gender": "M;M;M", "homepage": ";;", "dblp": ";13/3953-2;42/2820", "google_scholar": ";;", "or_profile": "~Guosheng_Zhao2;~Yan_Yan6;~Zijian_Zhao1", "aff": "Shandong University;;", "aff_domain": "sdu.edu.cn;;", "position": "MS student;;", "bibtex": "@inproceedings{\nzhao2023normalabnormal,\ntitle={Normal-Abnormal Decoupling Memory for Medical Report Generation},\nauthor={Guosheng Zhao and Yan Yan and Zijian Zhao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=uPz5a2NvrG}\n}", "github": "", "project": "", "reviewers": "VhyG;dM6t;6Vvb", "site": "https://openreview.net/forum?id=uPz5a2NvrG", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;2;5", "excitement": "4;4;3", "reproducibility": "4;4;4", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-9643-7538;;", "linkedin": ";;", "aff_unique_index": "0", "aff_unique_norm": "Shandong University", "aff_unique_dep": "", "aff_unique_url": "http://www.sdu.edu.cn", "aff_unique_abbr": "SDU", "aff_country_unique_index": "0", "aff_country_unique": "China" }, { "id": "uQfyuhhHBq", "title": "Penalty Decoding: Well Suppress the Self-Reinforcement Effect in Open-Ended Text Generation", "track": "main", "status": "Short Main", "tldr": "", "abstract": "The decoding algorithm is critical for open-ended text generation, transforming latent representations into coherent and meaningful outputs. This paper investigates the self-reinforcement effect in text generation and the effectiveness of a repetition penalty to mitigate it. However, determining the optimal repetition penalty value is challenging. To tackle this, we propose a forgetting mechanism that disregards distant tokens, reducing the burden of penalty selection. In addition, we introduce a length penalty to address overly short sentences caused by excessive penalties. Our penalty decoding approach incorporating three strategies helps resolve issues with sampling methods deviating from factual information. Experimental results demonstrate the efficacy of our approach in generating high-quality sentences resembling human output.", "keywords": "decoding algorithm;open-ended text generation;self-reinforcement;repetition penalty", "primary_area": "", "supplementary_material": "", "author": "Wenhong Zhu;Hongkun Hao;Rui Wang", "authorids": "~Wenhong_Zhu1;~Hongkun_Hao1;~Rui_Wang10", "gender": "M;M;M", "homepage": "https://github.com/zwhong714;https://hongkunhao.github.io/;https://wangruinlp.github.io/", "dblp": ";349/2933;w/RuiWang15", "google_scholar": "psCdg8EAAAAJ;a3UulbMAAAAJ;oTU0v5IAAAAJ", "or_profile": "~Wenhong_Zhu1;~Hongkun_Hao1;~Rui_Wang7", "aff": ";Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": ";sjtu.edu.cn;sjtu.edu.cn", "position": ";MS student;Associate Professor", "bibtex": "@inproceedings{\nzhu2023penalty,\ntitle={Penalty Decoding: Well Suppress the Self-Reinforcement Effect in Open-Ended Text Generation},\nauthor={Wenhong Zhu and Hongkun Hao and Rui Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=uQfyuhhHBq}\n}", "github": "", "project": "", "reviewers": "FifM;tmfW;rLC5", "site": "https://openreview.net/forum?id=uQfyuhhHBq", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;5", "excitement": "3;3;3", "reproducibility": "5;4;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 4.666666666666667, "excitement_avg": 3.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-8007-2503", "linkedin": ";hongkun-hao-372090172/;", "aff_unique_index": "0;0", "aff_unique_norm": "Shanghai Jiao Tong University", "aff_unique_dep": "", "aff_unique_url": "https://www.sjtu.edu.cn", "aff_unique_abbr": "SJTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "uRIFDS3gtG", "title": "LACMA: Language-Aligning Contrastive Learning with Meta-Actions for Embodied Instruction Following", "track": "main", "status": "Long Main", "tldr": "", "abstract": "End-to-end Transformers have demonstrated an impressive success rate for Embodied Instruction Following when the environment has been seen in training. However, they tend to struggle when deployed in an unseen environment. This lack of generalizability is due to\nthe agent\u2019s insensitivity to subtle changes in natural language instructions. To mitigate this issue, we propose explicitly aligning the agent\u2019s hidden states with the instructions via contrastive learning. Nevertheless, the semantic gap between high-level language instructions and the agent\u2019s low-level action space remains an obstacle. Therefore, we further introduce a novel concept of meta-actions to bridge the gap. Meta-actions are ubiquitous action patterns that can be parsed from the original action sequence. These patterns represent higher-level semantics that are intuitively aligned closer to the instructions. When meta-actions are applied as additional training signals, the agent generalizes better to unseen environments. Compared to a strong multi-modal Transformer baseline, we achieve a significant 4.5% absolute gain in success rate in unseen environments of ALFRED Embodied Instruction Following. Additional\u00a0analysis shows that the contrastive objective and meta-actions are complementary in achieving the best results, and the resulting agent better aligns its states with corresponding instructions, making it more suitable for real-world embodied agents.", "keywords": "Embodied AI;Vision and Language Navigation", "primary_area": "", "supplementary_material": "", "author": "Cheng-Fu Yang;Yen-Chun Chen;Jianwei Yang;Xiyang Dai;Lu Yuan;Yu-Chiang Frank Wang;Kai-Wei Chang", "authorids": "~Cheng-Fu_Yang1;~Yen-Chun_Chen1;~Jianwei_Yang1;~Xiyang_Dai4;~Lu_Yuan1;~Yu-Chiang_Frank_Wang2;~Kai-Wei_Chang1", "gender": "M;M;M;M;M;M;M", "homepage": "https://joeyy5588.github.io/;;https://www.microsoft.com/en-us/research/people/luyuan/;http://vllab.ee.ntu.edu.tw/ycwang.html;http://kwchang.net;https://jwyang.github.io/;https://sites.google.com/site/xiyangdai/", "dblp": "51/8564;160/0623-1;;30/1690;18/2428;;176/5470", "google_scholar": "https://scholar.google.com.tw/citations?user=cJ5oowQAAAAJ;Gptgy4YAAAAJ;k9TsUVsAAAAJ;HSGvdtoAAAAJ;fqDBtzYAAAAJ;Cl9byD8AAAAJ;QC8RwcoAAAAJ", "or_profile": "~Cheng-Fu_Yang1;~Yen-Chun_Chen1;~Lu_Yuan1;~Yu-Chiang_Frank_Wang2;~Kai-Wei_Chang1;~Jianwei_Yang2;~Xiyang_Dai2", "aff": "University of California, Los Angeles;Microsoft;Microsoft;National Taiwan University;Amazon;Microsoft;Microsoft", "aff_domain": "cs.ucla.edu;microsoft.com;microsoft.com;ntu.edu.tw;amazon.com;microsoft.com;microsoft.com", "position": "PhD student;Researcher;Principal Research Manager;Full Professor;Researcher;Researcher;Researcher", "bibtex": "@inproceedings{\nyang2023lacma,\ntitle={{LACMA}: Language-Aligning Contrastive Learning with Meta-Actions for Embodied Instruction Following},\nauthor={Cheng-Fu Yang and Yen-Chun Chen and Jianwei Yang and Xiyang Dai and Lu Yuan and Yu-Chiang Frank Wang and Kai-Wei Chang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=uRIFDS3gtG}\n}", "github": "", "project": "", "reviewers": "3Xca;UNnF;jyH4", "site": "https://openreview.net/forum?id=uRIFDS3gtG", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;3;4", "excitement": "3;3;3", "reproducibility": "4;4;4", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-2333-157X;0000-0001-5365-0072;;", "linkedin": ";;;;kai-wei-chang-41239040;;", "aff_unique_index": "0;1;1;2;3;1;1", "aff_unique_norm": "University of California, Los Angeles;Microsoft;National Taiwan University;Amazon", "aff_unique_dep": ";Microsoft Corporation;;Amazon.com, Inc.", "aff_unique_url": "https://www.ucla.edu;https://www.microsoft.com;https://www.ntu.edu.tw;https://www.amazon.com", "aff_unique_abbr": "UCLA;Microsoft;NTU;Amazon", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Los Angeles;;Taiwan", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "United States;China" }, { "id": "uUvlXyriM7", "title": "CLASS: A Design Framework for Building Intelligent Tutoring Systems Based on Learning Science principles", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We present a design framework called Conversational Learning with Analytical Step-by-Step Strategies (CLASS) for building advanced Intelligent Tutoring Systems (ITS) powered by high-performance Large Language Models (LLMs). The CLASS framework empowers ITS with two key capabilities. First, through a carefully curated scaffolding dataset, CLASS equips ITS with essential problem-solving strategies, enabling it to provide tutor-like, step-by-step guidance to students. Second, by using a dynamic conversational dataset, CLASS assists ITS in facilitating natural language interactions, fostering engaging student-tutor conversations. The CLASS framework also provides valuable insights into ITS\u2019s internal decision-making process which allows seamless integration of user feedback, thus enabling continuous refinement and improvement. We also present a proof-of-concept ITS, referred to as SPOCK, which is trained using the CLASS framework with a focus on introductory college level biology content. A carefully constructed protocol was developed for SPOCK\u2019s preliminary evaluation, examining aspects such as the factual accuracy and relevance of its responses. Experts in the field of biology offered favorable remarks, particularly highlighting SPOCK\u2019s capability to break down questions into manageable subproblems and provide encouraging responses to students.", "keywords": "Intelligent Tutoring Systems;NLP for education;AI Tutors;Learning Science", "primary_area": "", "supplementary_material": "", "author": "Shashank Sonkar;Naiming Liu;Debshila Basu Mallick;Richard Baraniuk", "authorids": "~Shashank_Sonkar1;~Naiming_Liu1;~Debshila_Basu_Mallick1;~Richard_Baraniuk1", "gender": "M;F;Non-Binary;", "homepage": "https://sites.google.com/view/shashanksonkar;;;http://richb.rice.edu/", "dblp": "266/1460;277/5856;;32/2804", "google_scholar": "4Rv56n4AAAAJ;J0sOe2gAAAAJ;Zjroh4UAAAAJ;https://scholar.google.com.tw/citations?user=N-BBA20AAAAJ", "or_profile": "~Shashank_Sonkar1;~Naiming_Liu1;~Debshila_Basu_Mallick1;~Richard_Baraniuk1", "aff": "Rice University;Rice University;Rice University;William Marsh Rice University", "aff_domain": "rice.edu;rice.edu;rice.edu;rice.edu", "position": "PhD student;PhD student;Researcher;C. Sidney Burrus Professor", "bibtex": "@inproceedings{\nsonkar2023class,\ntitle={{CLASS}: A Design Framework for Building Intelligent Tutoring Systems Based on Learning Science principles},\nauthor={Shashank Sonkar and Naiming Liu and Debshila Basu Mallick and Richard Baraniuk},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=uUvlXyriM7}\n}", "github": "", "project": "", "reviewers": "g1rc;c1QD;85Eg", "site": "https://openreview.net/forum?id=uUvlXyriM7", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;5", "excitement": "4;3;3", "reproducibility": "3;3;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";naiming-liu-lucy0817;;richard-baraniuk", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Rice University", "aff_unique_dep": "", "aff_unique_url": "https://www.rice.edu", "aff_unique_abbr": "Rice", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "uZp3i8yEs4", "title": "`Don't Get Too Technical with Me': A Discourse Structure-Based Framework for Automatic Science Journalism", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Science journalism refers to the task of reporting technical findings of a scientific paper as a less technical news article to the general public audience. We aim to design an automated system to support this real-world task (i.e., automatic science journalism ) by 1) introducing a newly-constructed and real-world dataset (SciTechNews), with tuples of a publicly-available scientific paper, its corresponding news article, and an expert-written short summary snippet; 2) proposing a novel technical framework that integrates a paper's discourse structure with its metadata to guide generation; and, 3) demonstrating with extensive automatic and human experiments that our model outperforms other baseline methods (e.g. Alpaca and ChatGPT) in elaborating a content plan meaningful for the target audience, simplify the information selected, and produce a coherent final report in a layman's style.", "keywords": "automatic scientific journalism;summarization;style transfer", "primary_area": "", "supplementary_material": "", "author": "Ronald Cardenas;Bingsheng Yao;Dakuo Wang;Yufang Hou", "authorids": "~Ronald_Cardenas1;~Bingsheng_Yao1;~Dakuo_Wang1;~Yufang_Hou2", "gender": "M;M;M;F", "homepage": "https://ronaldahmed.github.io/;;https://www.dakuowang.com;https://yufanghou.github.io/", "dblp": "222/9480;256/9562;161/3389;", "google_scholar": "QoPko3IAAAAJ;hJlsDfAAAAAJ;Uno8dugAAAAJ;-fBym-EAAAAJ", "or_profile": "~Ronald_Cardenas1;~Bingsheng_Yao1;~Dakuo_Wang1;~Yufang_Hou2", "aff": "University of Edinburgh;Rensselaer Polytechnic Institute;Northeastern University;IBM Research Ireland", "aff_domain": "ed.ac.uk;rpi.edu;northeastern.edu;ibm.com", "position": "PhD student;PhD student;Associate Professor;Principal Researcher", "bibtex": "@inproceedings{\ncardenas2023dont,\ntitle={`Don't Get Too Technical with Me': A Discourse Structure-Based Framework for Automatic Science Journalism},\nauthor={Ronald Cardenas and Bingsheng Yao and Dakuo Wang and Yufang Hou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=uZp3i8yEs4}\n}", "github": "", "project": "", "reviewers": "MWo1;qvrA;6Mgm", "site": "https://openreview.net/forum?id=uZp3i8yEs4", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "3;4;4", "reproducibility": "4;5;3", "correctness": "3;5;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0004-8329-4610;0000-0001-9371-9441;", "linkedin": "https://linkedin.com/in/ronald-cardenas-acosta-77b90580;;dakuowang/;", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "University of Edinburgh;Rensselaer Polytechnic Institute;Northeastern University;IBM", "aff_unique_dep": ";;;Research", "aff_unique_url": "https://www.ed.ac.uk;https://www.rpi.edu;https://www.northeastern.edu;https://www.ibm.com/research", "aff_unique_abbr": "Edinburgh;RPI;NEU;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;2", "aff_country_unique": "United Kingdom;United States;Ireland" }, { "id": "uaZQ21cuzW", "title": "From Wrong To Right: A Recursive Approach Towards Vision-Language Explanation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Addressing the challenge of adapting pre-trained vision-language models for generating insightful explanations for visual reasoning tasks with limited annotations, we present ReVisE: a Recursive Visual Explanation algorithm. Our method iteratively computes visual features (conditioned on the text input), an answer, and an explanation, to improve the explanation quality step by step until the answer converges. We find that this multi-step approach guides the model to correct its own answers and outperforms single-step explanation generation. Furthermore, explanations generated by ReVisE also serve as valuable annotations for few-shot self-training. Our approach outperforms previous methods while utilizing merely 5\\% of the human-annotated explanations across 10 metrics, demonstrating up to a 4.2 and 1.3 increase in BLEU-1 score on the VCR and VQA-X datasets, underscoring the efficacy and data-efficiency of our method.", "keywords": "Vision-Language Models;Visual Reasoning;Vision-Language Explanation;Self Training", "primary_area": "", "supplementary_material": "", "author": "Jiaxin Ge;Sanjay Subramanian;Trevor Darrell;Boyi Li", "authorids": "~Jiaxin_Ge1;~Sanjay_Subramanian1;~Trevor_Darrell2;~Boyi_Li1", "gender": "F;M;F;M", "homepage": "https://jiaxin.ge/;https://sanjayss34.github.io;https://sites.google.com/site/boyilics/home;https://people.eecs.berkeley.edu/~trevor/", "dblp": ";228/8258;;d/TrevorDarrell", "google_scholar": "I6P0SwgAAAAJ;7D1e-A0AAAAJ;;https://scholar.google.com.tw/citations?user=bh-uRFMAAAAJ", "or_profile": "~Jiaxin_Ge1;~Sanjay_Subramanian1;~Boyi_Li1;~trevor_darrell1", "aff": "Peking University;Snap Inc.;University of California, Berkeley;Electrical Engineering & Computer Science Department", "aff_domain": "pku.edu.cn;snap.com;berkeley.edu;eecs.berkeley.edu", "position": "Undergrad student;Intern;Postdoc;Professor", "bibtex": "@inproceedings{\nge2023from,\ntitle={From Wrong To Right: A Recursive Approach Towards Vision-Language Explanation},\nauthor={Jiaxin Ge and Sanjay Subramanian and Trevor Darrell and Boyi Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=uaZQ21cuzW}\n}", "github": "", "project": "", "reviewers": "86Pj;yJ8S;UABU", "site": "https://openreview.net/forum?id=uaZQ21cuzW", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;2;5", "excitement": "3;4;4", "reproducibility": "3;4;4", "correctness": "3;3;3", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;1;2;3", "aff_unique_norm": "Peking University;Snap Inc.;University of California, Berkeley;Electrical Engineering & Computer Science Department", "aff_unique_dep": ";;;Electrical Engineering & Computer Science", "aff_unique_url": "http://www.pku.edu.cn;https://www.snapinc.com;https://www.berkeley.edu;", "aff_unique_abbr": "Peking U;Snap;UC Berkeley;", "aff_campus_unique_index": "1", "aff_campus_unique": ";Berkeley", "aff_country_unique_index": "0;1;1", "aff_country_unique": "China;United States;" }, { "id": "ubXaboYnzN", "title": "QTSumm: Query-Focused Summarization over Tabular Data", "track": "main", "status": "Long Main", "tldr": "", "abstract": "People primarily consult tables to conduct data analysis or answer specific questions. Text generation systems that can provide accurate table summaries tailored to users' information needs can facilitate more efficient access to relevant data insights. \nMotivated by this, we define a new query-focused table summarization task, where text generation models have to perform human-like reasoning and analysis over the given table to generate a tailored summary. We introduce a new benchmark named QTSumm for this task, which contains 7,111 human-annotated query-summary pairs over 2,934 tables covering diverse topics.\nWe investigate a set of strong baselines on QTSumm, including text generation, table-to-text generation, and large language models. \nExperimental results and manual analysis reveal that the new task presents significant challenges in table-to-text generation for future research.\nMoreover, we propose a new approach named ReFactor, to retrieve and reason over query-relevant information from tabular data to generate several natural language facts. Experimental results demonstrate that ReFactor can bring effective improvements to baselines by concatenating the generated facts to the model input.\nOur data and code are publicly available at https://github.com/yale-nlp/QTSumm.", "keywords": "Summarization;Text Generation;Reasoning over Structured Data", "primary_area": "", "supplementary_material": "", "author": "Yilun Zhao;Zhenting Qi;Linyong Nan;Boyu Mi;Yixin Liu;Weijin Zou;SIMENG HAN;RUIZHE CHEN;Xiangru Tang;Yumo Xu;Dragomir Radev;Arman Cohan", "authorids": "~Yilun_Zhao1;~Zhenting_Qi1;~Linyong_Nan1;~Boyu_Mi1;~Yixin_Liu2;~Weijin_Zou1;~SIMENG_HAN1;~RUIZHE_CHEN4;~Xiangru_Tang2;~Yumo_Xu1;~Dragomir_Radev2;~Arman_Cohan1", "gender": ";M;M;M;;F;F;M;M;M;M;", "homepage": "https://yilunzhao.github.io/;https://zhentingqi.github.io/;https://linyongnan.github.io/;;https://yixinl7.github.io/;;https://shirleyhan6.github.io/;https://github.com/rrrrrzz/rrrrzzz;https://xiangrutang.github.io/;;http://www.armancohan.com;http://www.cs.yale.edu/~radev", "dblp": "271/8391;329/2118;;;140/7348-3;321/1052;;;246/8064;222/9446;160/1727;r/DragomirRRadev", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=en;b-HaNvYAAAAJ;https://scholar.google.com/citations?view_op=list_works;sFtxaMkAAAAJ;;D0dpploAAAAJ;;;;https://scholar.google.com/citations?hl=en;vIqWvgwAAAAJ", "or_profile": "~Yilun_Zhao1;~Zhenting_Qi1;~Linyong_Nan1;~Boyu_Mi1;~Yixin_Liu2;~Weijin_Zou1;~SIMENG_HAN1;~RUIZHE_CHEN4;~Xiangru_Tang2;~Yumo_Xu1;~Arman_Cohan1;~Dragomir_Radkov_Radev1", "aff": "Yale University;University of Illinois, Urbana Champaign;Yale University;Zhejiang University;Yale University;Yale University;Yale University;Zhejiang University;Yale University;University of Edinburgh, University of Edinburgh;Allen Institute for Artificial Intelligence;Yale University", "aff_domain": "yale.edu;illinois.edu;yale.edu;zju.edu.cn;yale.edu;yale.edu;yale.edu;zju.edu.cn;yale.edu;ed.ac.uk;allenai.org;yale.edu", "position": "PhD student;Undergrad student;PhD student;Undergrad student;PhD student;MS student;PhD student;Undergrad student;PhD student;Research Associate ;Research Scientist;Full Professor", "bibtex": "@inproceedings{\nzhao2023qtsumm,\ntitle={{QTS}umm: Query-Focused Summarization over Tabular Data},\nauthor={Yilun Zhao and Zhenting Qi and Linyong Nan and Boyu Mi and Yixin Liu and Weijin Zou and SIMENG HAN and RUIZHE CHEN and Xiangru Tang and Yumo Xu and Dragomir Radev and Arman Cohan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ubXaboYnzN}\n}", "github": "", "project": "", "reviewers": "Pktq;vhSV;PL53", "site": "https://openreview.net/forum?id=ubXaboYnzN", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;3;3", "excitement": "2;4;4", "reproducibility": "4;4;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 12, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;;;;;0000-0002-0213-7487", "linkedin": ";zhentingqi/;;;;weijin-vivian-zou;simeng-sophia-han-746135159/;;;;;dragomir-radev/", "aff_unique_index": "0;1;0;2;0;0;0;2;0;3;4;0", "aff_unique_norm": "Yale University;University of Illinois Urbana-Champaign;Zhejiang University;University of Edinburgh;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.yale.edu;https://illinois.edu;https://www.zju.edu.cn;https://www.ed.ac.uk;https://allenai.org", "aff_unique_abbr": "Yale;UIUC;ZJU;Edinburgh;AI2", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0;1;0;0;0;1;0;2;0;0", "aff_country_unique": "United States;China;United Kingdom" }, { "id": "uckh15CSS1", "title": "ExplainCPE: A Free-text Explanation Benchmark of Chinese Pharmacist Examination", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In the field of Large Language Models (LLMs), researchers are increasingly exploring their effectiveness across a wide range of tasks. \nHowever, a critical area that requires further investigation is the interpretability of these models, particularly the ability to generate rational explanations for their decisions. Most existing explanation datasets are limited to the English language and the general domain, which leads to a scarcity of linguistic diversity and a lack of resources in specialized domains, such as medical. To mitigate this, we propose ExplainCPE, a challenging medical dataset consisting of over 7K problems from Chinese Pharmacist Examination, specifically tailored to assess the model-generated explanations. From the overall results, only GPT-4 passes the pharmacist examination with a 75.7\\% accuracy, while other models like ChatGPT fail. Further detailed analysis of LLM-generated explanations reveals the limitations of LLMs in understanding medical text and executing computational reasoning. With the increasing importance of AI safety and trustworthiness, ExplainCPE takes a step towards improving and evaluating the interpretability of LLMs in the medical domain.", "keywords": "datasets;interpretability;large language model", "primary_area": "", "supplementary_material": "", "author": "Dongfang Li;Jindi Yu;Baotian Hu;Zhenran Xu;Min Zhang", "authorids": "~Dongfang_Li2;~Jindi_Yu1;~Baotian_Hu1;~Zhenran_Xu1;~Min_Zhang9", "gender": "M;M;;M;M", "homepage": "https://github.com/1ittlesnow;;;https://zhangmin-nlp-ai.github.io/;http://crazyofapple.github.io", "dblp": "348/0519;155/1902;322/2310;83/5342-5;https://dblp.uni-trier.de/pid/98/6118.html", "google_scholar": ";5NiJ1VoAAAAJ;1m5X_28AAAAJ;https://scholar.google.com/citations?hl=zh-CN;_OOzj40AAAAJ", "or_profile": "~Jindi_Yu1;~Baotian_Hu1;~Zhenran_Xu1;~Min_Zhang9;~dongfang_li1", "aff": "Harbin Institute of Technology;Harbin Institute of Technology, Shenzhen;Harbin Institute of Technology, Shenzhen;Harbin Institute of Technology, Shenzhen;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;hhit.edu.cn;hit.edu.cn;hit.edu.cn;hit.edu.cn", "position": "MS student;Associate Professor;PhD student;Full Professor;PhD student", "bibtex": "@inproceedings{\nli2023explaincpe,\ntitle={Explain{CPE}: A Free-text Explanation Benchmark of Chinese Pharmacist Examination},\nauthor={Dongfang Li and Jindi Yu and Baotian Hu and Zhenran Xu and Min Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=uckh15CSS1}\n}", "github": "", "project": "", "reviewers": "CyCb;xaZa;1fgk", "site": "https://openreview.net/forum?id=uckh15CSS1", "pdf_size": 0, "rating": "2;2;2", "confidence": "5;2;4", "excitement": "3;3;2", "reproducibility": "5;3;3", "correctness": "3;3;3", "rating_avg": 2.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-7490-684X;0000-0002-5536-806X;;", "linkedin": ";;zhenran-xu/;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Harbin Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hit.edu.cn/", "aff_unique_abbr": "HIT", "aff_campus_unique_index": "0;1;1;1;0", "aff_campus_unique": "Harbin;Shenzhen", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "udiNCxGKLl", "title": "Transformer-Based Language Model Surprisal Predicts Human Reading Times Best with About Two Billion Training Tokens", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Recent psycholinguistic studies have drawn conflicting conclusions about the relationship between the quality of a language model and the ability of its surprisal estimates to predict human reading times, which has been speculated to be due to the large gap in both the amount of training data and model capacity across studies. The current work aims to consolidate these findings by evaluating surprisal estimates from Transformer-based language model variants that vary systematically in the amount of training data and model capacity on their ability to predict human reading times. The results show that surprisal estimates from most variants with contemporary model capacities provide the best fit after seeing about two billion training tokens, after which they begin to diverge from humanlike expectations. Additionally, newly-trained smaller model variants reveal a 'tipping point' at convergence, after which the decrease in language model perplexity begins to result in poorer fits to human reading times. These results suggest that the massive amount of training data is mainly responsible for the poorer fit achieved by surprisal from larger pre-trained language models, and that a certain degree of model capacity is necessary for Transformer-based language models to capture humanlike expectations.", "keywords": "surprisal theory;human sentence processing;large language models", "primary_area": "", "supplementary_material": "", "author": "Byung-Doh Oh;William Schuler", "authorids": "~Byung-Doh_Oh1;~William_Schuler1", "gender": ";M", "homepage": "https://byungdoh.github.io;https://www.asc.ohio-state.edu/schuler.77/", "dblp": "297/4206;21/41", "google_scholar": "61AeLAEAAAAJ;", "or_profile": "~Byung-Doh_Oh1;~William_Schuler1", "aff": "Ohio State University, Columbus;Ohio State University, Columbus", "aff_domain": "osu.edu;osu.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\noh2023transformerbased,\ntitle={Transformer-Based Language Model Surprisal Predicts Human Reading Times Best with About Two Billion Training Tokens},\nauthor={Byung-Doh Oh and William Schuler},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=udiNCxGKLl}\n}", "github": "", "project": "", "reviewers": "AxD9;vi7K;nrTc", "site": "https://openreview.net/forum?id=udiNCxGKLl", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;5;5", "excitement": "4;3;3", "reproducibility": "5;3;1", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 5.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Ohio State University", "aff_unique_dep": "", "aff_unique_url": "https://www.osu.edu", "aff_unique_abbr": "OSU", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Columbus", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "udl5f2seyU", "title": "DiFair: A Benchmark for Disentangled Assessment of Gender Knowledge and Bias", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Numerous debiasing techniques have been proposed to mitigate the gender bias that is prevalent in pretrained language models. \nThese are often evaluated on datasets that check the extent to which the model is gender-neutral in its predictions.\nImportantly, this evaluation protocol overlooks the possible adverse impact of bias mitigation on useful gender knowledge. \nTo fill this gap, we propose **DiFair**, a manually curated dataset based on masked language modeling objectives.\n**DiFair** allows us to introduce a unified metric, *gender invariance score*, that not only quantifies a model's biased behavior, but also checks if useful gender knowledge is preserved.\nWe use **DiFair** as a benchmark for a number of widely-used pretained language models and debiasing techniques.\nExperimental results corroborate previous findings on the existing gender biases, while also demonstrating that although debiasing techniques ameliorate the issue of gender bias, this improvement usually comes at the price of lowering useful gender knowledge of the model.", "keywords": "Bias;Gender-Bias;Transformers;Task;Dataset;Language Modeling", "primary_area": "", "supplementary_material": "", "author": "Mahdi Zakizadeh;Kaveh Eskandari Miandoab;Mohammad Taher Pilehvar", "authorids": "~Mahdi_Zakizadeh1;~Kaveh_Eskandari_Miandoab1;~Mohammad_Taher_Pilehvar2", "gender": "M;M;M", "homepage": ";https://www.linkedin.com/in/kaveh-eskandari-ab9564145/;http://pilehvar.github.io", "dblp": "359/3865;359/3829.html;", "google_scholar": "UEv0uSsAAAAJ;GmQ7n6EAAAAJ;P-c9CsIAAAAJ", "or_profile": "~Mahdi_Zakizadeh1;~Kaveh_Eskandari_Miandoab1;~Mohammad_Taher_Pilehvar2", "aff": "Tehran Institute for Advanced Studies;Worcester Polytechnic Institute;TeIAS", "aff_domain": "teias.institute;wpi.edu;teias.institute", "position": "MS student;MS student;Assistant Professor", "bibtex": "@inproceedings{\nzakizadeh2023difair,\ntitle={DiFair: A Benchmark for Disentangled Assessment of Gender Knowledge and Bias},\nauthor={Mahdi Zakizadeh and Kaveh Eskandari Miandoab and Mohammad Taher Pilehvar},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=udl5f2seyU}\n}", "github": "", "project": "", "reviewers": "118u;Mwxm;kmvZ", "site": "https://openreview.net/forum?id=udl5f2seyU", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;3;4", "reproducibility": "3;4;4", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "mahdizakizadeh/;kaveh-eskandari-ab9564145/;", "aff_unique_index": "0;1;2", "aff_unique_norm": "Tehran Institute for Advanced Studies;Worcester Polytechnic Institute;TeIAS", "aff_unique_dep": ";;", "aff_unique_url": "http://www.tias.ir;https://www.wpi.edu;", "aff_unique_abbr": ";WPI;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Iran;United States;" }, { "id": "uemYdRTVvP", "title": "SeqXGPT: Sentence-Level AI-Generated Text Detection", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Widely applied large language models (LLMs) can generate human-like content, raising concerns about the abuse of LLMs. Therefore, it is important to build strong AI-generated text (AIGT) detectors. Current works only consider document-level AIGT detection, therefore, in this paper, we first introduce a sentence-level detection challenge by synthesizing a dataset that contains documents that are polished with LLMs, that is, the documents contain sentences written by humans and sentences modified by LLMs. Then we propose \\textbf{Seq}uence \\textbf{X} (Check) \\textbf{GPT}, a novel method that utilizes log probability lists from white-box LLMs as features for sentence-level AIGT detection. These features are composed like \\textit{waves} in speech processing and cannot be studied by LLMs. Therefore, we build SeqXGPT based on convolution and self-attention networks. We test it in both sentence and document-level detection challenges. Experimental results show that previous methods struggle in solving sentence-level AIGT detection, while our method not only significantly surpasses baseline methods in both sentence and document-level detection challenges but also exhibits strong generalization capabilities.", "keywords": "AI generated text detection;security in NLP;LLM", "primary_area": "", "supplementary_material": "", "author": "Pengyu Wang;Linyang Li;Ke Ren;Botian Jiang;Dong Zhang;Xipeng Qiu", "authorids": "~Pengyu_Wang2;~Linyang_Li1;~Ke_Ren2;~Botian_Jiang1;~Dong_Zhang9;~Xipeng_Qiu1", "gender": "M;M;M;M;M;M", "homepage": ";https://github.com/LinyangLee;https://github.com/renke999;;;https://xpqiu.github.io/", "dblp": "14/3832-6;228/8051;;;;69/1395", "google_scholar": "https://scholar.google.co.jp/citations?user=NGniJS0AAAAJ;T6eEqcMAAAAJ;https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com/citations?hl=zh-CN;ScVbeu0AAAAJ;Pq4Yp_kAAAAJ", "or_profile": "~Pengyu_Wang2;~Linyang_Li1;~Ke_Ren2;~Botian_Jiang1;~Dong_Zhang9;~Xipeng_Qiu1", "aff": "Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "PhD student;PhD student;MS student;MS student;MS student;Full Professor", "bibtex": "@inproceedings{\nwang2023seqxgpt,\ntitle={Seq{XGPT}: Sentence-Level {AI}-Generated Text Detection},\nauthor={Pengyu Wang and Linyang Li and Ke Ren and Botian Jiang and Dong Zhang and Xipeng Qiu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=uemYdRTVvP}\n}", "github": "", "project": "", "reviewers": "PKrT;nDFk;8nB6", "site": "https://openreview.net/forum?id=uemYdRTVvP", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;4", "excitement": "4;3;3", "reproducibility": "1;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0001-7163-5247", "linkedin": ";;;%E5%8D%9A%E5%A4%A9-%E5%A7%9C-01a120227?lipi=urn%3Ali%3Apage%3Ad_flagship3_profile_view_base_contact_details%3BDQvpyioVTMKEM8AgLhbJKQ%3D%3D;;", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "ufu4C0bTwB", "title": "Target-oriented Proactive Dialogue Systems with Personalization: Problem Formulation and Dataset Curation", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Target-oriented dialogue systems, designed to proactively steer conversations toward predefined targets or accomplish specific system-side goals, are an exciting area in conversational AI. In this work, by formulating a pair as the conversation target, we explore a novel problem of personalized target-oriented dialogue by considering personalization during the target accomplishment process. However, there remains an emergent need for high-quality datasets, and building one from scratch requires tremendous human effort. To address this, we propose an automatic dataset curation framework using a role-playing approach. Based on this framework, we construct a large-scale personalized target-oriented dialogue dataset, TopDial, which comprises about 18K multi-turn dialogues. The experimental results show that this dataset is of high quality and could contribute to exploring personalized target-oriented dialogue.", "keywords": "Target-oriented dialogue;Proactive dialogue;Personalization;Dataset curation", "primary_area": "", "supplementary_material": "", "author": "Jian Wang;Yi Cheng;Dongding Lin;Chak Tou Leong;Wenjie Li", "authorids": "~Jian_Wang18;~Yi_Cheng3;~Dongding_Lin1;~Chak_Tou_Leong1;~Wenjie_Li1", "gender": "M;F;M;M;F", "homepage": "https://iwangjian.github.io/;;https://lindongding.com;;https://web.comp.polyu.edu.hk/cswjli/", "dblp": "39/449-54.html;;221/1510.html;358/9146;33/3999-2.html", "google_scholar": "HUtas_0gHGIC;4FWRdrAAAAAJ;JM4i0R8AAAAJ;https://scholar.google.com/citations?view_op=list_works;Rx5swD4AAAAJ", "or_profile": "~Jian_Wang18;~Yi_Cheng3;~Dongding_Lin1;~Chak_Tou_Leong1;~Wenjie_Li1", "aff": "The Hong Kong Polytechnic University;The Hong Kong Polytechnic University;Hong Kong Polytechnic University;Hong Kong Polytechnic University;The Hong Kong Polytechnic University, The Hong Kong Polytechnic University", "aff_domain": "polyu.edu.hk;polyu.edu.hk;polyu.edu.hk;polyu.edu.hk;comp.polyu.edu.hk", "position": "PhD student;PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nwang2023targetoriented,\ntitle={Target-oriented Proactive Dialogue Systems with Personalization: Problem Formulation and Dataset Curation},\nauthor={Jian Wang and Yi Cheng and Dongding Lin and Chak Tou Leong and Wenjie Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ufu4C0bTwB}\n}", "github": "", "project": "", "reviewers": "kAUS;CF2z;4n1y", "site": "https://openreview.net/forum?id=ufu4C0bTwB", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;2", "excitement": "3;4;4", "reproducibility": "4;4;3", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-8992-8336;;0000-0002-8846-8622;;0000-0002-7360-8864", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Hong Kong Polytechnic University", "aff_unique_dep": "", "aff_unique_url": "https://www.polyu.edu.hk", "aff_unique_abbr": "PolyU", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "uh5euNmL7t", "title": "Hyperpolyglot LLMs: Cross-Lingual Interpretability in Token Embeddings", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Cross-lingual transfer learning is an important property of multilingual large language models (LLMs). But how do LLMs represent relationships between languages?\nEvery language model has an input layer that maps tokens to vectors. This ubiquitous layer of language models is often overlooked. \nWe find that similarities between these input embeddings are highly interpretable and that the geometry of these embeddings differs between model families. In one case (XLM-RoBERTa), embeddings encode language: tokens in different writing systems can be linearly separated with an average of 99.2\\% accuracy. Another family (mT5) represents cross-lingual semantic similarity: the 50 nearest neighbors for any token represent an average of 7.61 writing systems, and are frequently translations. \nThis result is surprising given that there is no explicit parallel cross-lingual training corpora and no explicit incentive for translations in pre-training objectives. \nOur research opens the door for investigations in 1) The effect of pre-training and model architectures on representations of languages and 2) The applications of cross-lingual representations embedded in language models.", "keywords": "Multilingual models;cross-lingual representations", "primary_area": "", "supplementary_material": "", "author": "Andrea W Wen-Yi;David Mimno", "authorids": "~Andrea_W_Wen-Yi1;~David_Mimno1", "gender": "M;", "homepage": "https://mimno.infosci.cornell.edu/;https://andreawwenyi.github.io/", "dblp": "39/5487;", "google_scholar": "uBFV6SUAAAAJ;TE24FMUAAAAJ", "or_profile": "~David_Mimno1;~Andrea_W_Wang1", "aff": "Cornell University;Cornell University", "aff_domain": "cornell.edu;cornell.edu", "position": "Associate Professor;PhD student", "bibtex": "@inproceedings{\nwen-yi2023hyperpolyglot,\ntitle={Hyperpolyglot {LLM}s: Cross-Lingual Interpretability in Token Embeddings},\nauthor={Andrea W Wen-Yi and David Mimno},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=uh5euNmL7t}\n}", "github": "", "project": "", "reviewers": "E6Zp;KmTx;Eu6p", "site": "https://openreview.net/forum?id=uh5euNmL7t", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;2", "excitement": "3;4;4", "reproducibility": "3;4;3", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Cornell University", "aff_unique_dep": "", "aff_unique_url": "https://www.cornell.edu", "aff_unique_abbr": "Cornell", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "uhVJ3SLq80", "title": "BioT5: Enriching Cross-modal Integration in Biology with Chemical Knowledge and Natural Language Associations", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recent advancements in biological research leverage the integration of molecules, proteins, and natural language to enhance drug discovery. However, current models exhibit several limitations, such as the generation of invalid molecular SMILES, underutilization of contextual information, and equal treatment of structured and unstructured knowledge. To address these issues, we propose BioT5, a comprehensive pre-training framework that enriches cross-modal integration in biology with chemical knowledge and natural language associations. BioT5 utilizes SELFIES for 100\\% robust molecular representations and extracts knowledge from the surrounding context of bio-entities in unstructured biological literature. Furthermore, BioT5 distinguishes between structured and unstructured knowledge, leading to more effective utilization of information. After fine-tuning, BioT5 shows superior performance across a wide range of tasks, demonstrating its strong capability of capturing underlying relations and properties of bio-entities. Our code is available at https://github.com/QizhiPei/BioT5.", "keywords": "cross-modal;biology;text;molecule;protein", "primary_area": "", "supplementary_material": "", "author": "Qizhi Pei;Wei Zhang;Jinhua Zhu;Kehan Wu;Kaiyuan Gao;Lijun Wu;Yingce Xia;Rui Yan", "authorids": "~Qizhi_Pei1;~Wei_Zhang72;~Jinhua_Zhu1;~Kehan_Wu1;~Kaiyuan_Gao1;~Lijun_Wu1;~Yingce_Xia1;~Rui_Yan2", "gender": ";M;M;F;M;M;M;M", "homepage": "https://qizhipei.github.io/;;https://github.com/teslacool;https://github.com/HankerWu;https://kygao.github.io;https://apeterswu.github.io/;https://www.microsoft.com/en-us/research/people/yinxia/;https://gsai.ruc.edu.cn/english/ruiyan", "dblp": "322/9716;;18/1965-1;;180/6731;68/1284-3;http://dblp.uni-trier.de/pers/hd/x/Xia:Yingce;19/2405-1", "google_scholar": "sf3xGU8AAAAJ;;https://scholar.google.com.hk/citations?user=FvGy0LQAAAAJ;;Or77MPQAAAAJ;https://scholar.google.com/citations?hl=en;GS5wRxYAAAAJ;eLw6g-UAAAAJ", "or_profile": "~Qizhi_Pei1;~Wei_Zhang72;~Jinhua_Zhu1;~Kehan_Wu1;~Kaiyuan_Gao1;~Lijun_Wu1;~Yingce_Xia1;~Rui_Yan2", "aff": "Microsoft;University of Science and Technology of China;University of Science and Technology of China;University of Science and Technology of China;Huazhong University of Science and Technology;Microsoft Research;Microsoft;Renmin University of China", "aff_domain": "microsoft.com;ustc.edu.cn;ustc.edu.cn;ustc.edu.cn;hust.edu.cn;microsoft.com;microsoft.com;ruc.edu.cn", "position": "Intern;Undergrad student;PhD student;PhD student;PhD student;Researcher;Researcher;Associate Professor", "bibtex": "@inproceedings{\npei2023biot,\ntitle={BioT5: Enriching Cross-modal Integration in Biology with Chemical Knowledge and Natural Language Associations},\nauthor={Qizhi Pei and Wei Zhang and Jinhua Zhu and Kehan Wu and Kaiyuan Gao and Lijun Wu and Yingce Xia and Rui Yan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=uhVJ3SLq80}\n}", "github": "", "project": "", "reviewers": "tZxv;AqiU;fMEa;BcEQ", "site": "https://openreview.net/forum?id=uhVJ3SLq80", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "2;4;4;4", "excitement": "3;4;3;4", "reproducibility": "3;4;3;4", "correctness": "4;4;3;4", "rating_avg": 4.0, "confidence_avg": 3.5, "excitement_avg": 3.5, "reproducibility_avg": 3.5, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-7242-422X;;0000-0003-2157-9077;;0009-0002-8862-8320;0000-0002-3530-590X;;0000-0002-3356-6823", "linkedin": "%E5%90%AF%E6%99%BA-%E8%A3%B4-680192218/en?trk=people-guest_people_search-card;%E6%9C%AA-%E7%AB%A0-a8793124a/;;;;lijun-wu-59340478/;;", "aff_unique_index": "0;1;1;1;2;0;0;3", "aff_unique_norm": "Microsoft;University of Science and Technology of China;Huazhong University of Science and Technology;Renmin University of China", "aff_unique_dep": "Microsoft Corporation;;;", "aff_unique_url": "https://www.microsoft.com;http://www.ustc.edu.cn;http://www.hust.edu.cn;http://www.ruc.edu.cn", "aff_unique_abbr": "Microsoft;USTC;HUST;RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;0;0;1", "aff_country_unique": "United States;China" }, { "id": "ul47tFdRn6", "title": "DiffuVST: Narrating Fictional Scenes with Global-History-Guided Denoising Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recent advances in image and video creation, especially AI-based image synthesis, have led to the production of numerous visual scenes that exhibit a high level of abstractness and diversity. Consequently, Visual Storytelling (VST), a task that involves generating meaningful and coherent narratives from a collection of images, has become even more challenging and is increasingly desired beyond real-world imagery. While existing VST techniques, which typically use autoregressive decoders, have made significant progress, they suffer from low inference speed and are not well-suited for synthetic scenes. To this end, we propose a novel diffusion-based system DiffuVST, which models the generation of a series of visual descriptions as a single conditional denoising process. The stochastic and non-autoregressive nature of DiffuVST at inference time allows it to generate highly diverse narratives more efficiently. In addition, DiffuVST features a unique design with bi-directional text history guidance and multimodal adapter modules, which effectively improve inter-sentence coherence and image-to-text fidelity. Extensive experiments on the story generation task covering four fictional visual-story datasets demonstrate the superiority of DiffuVST over traditional autoregressive models in terms of both text quality and inference speed.", "keywords": "visual storytelling;diffusion language models;global history guidance", "primary_area": "", "supplementary_material": "", "author": "Shengguang Wu;Mei Yuan;Qi Su", "authorids": "~Shengguang_Wu1;~Mei_Yuan1;~Qi_Su1", "gender": "M;F;F", "homepage": "https://cs.stanford.edu/~shgwu;https://github.com/yumiko424;", "dblp": ";;25/221-1", "google_scholar": "QZmepnEAAAAJ;;9f4JUrUAAAAJ", "or_profile": "~Shengguang_Wu1;~Mei_Yuan1;~Qi_Su1", "aff": "Peking University;Peking University;Peking University", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn", "position": "MS student;MS student;Associate Professor", "bibtex": "@inproceedings{\nwu2023diffuvst,\ntitle={Diffu{VST}: Narrating Fictional Scenes with Global-History-Guided Denoising Models},\nauthor={Shengguang Wu and Mei Yuan and Qi Su},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ul47tFdRn6}\n}", "github": "", "project": "", "reviewers": "Xen1;sjRb;AmK5", "site": "https://openreview.net/forum?id=ul47tFdRn6", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;3", "excitement": "2;4;3", "reproducibility": "2;3;4", "correctness": "2;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Peking University", "aff_unique_dep": "", "aff_unique_url": "http://www.pku.edu.cn", "aff_unique_abbr": "Peking U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "ulqYwmcUnL", "title": "XTREME-UP: A User-Centric Scarce-Data Benchmark for Under-Represented Languages", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Data scarcity is a crucial issue for the development of highly multilingual NLP systems. Yet for many under-represented languages (ULs) --- languages for which NLP research is particularly far behind in meeting user needs --- it is feasible to annotate small amounts of data. Motivated by this, we propose XTREME-UP, a benchmark defined by: its focus on the scarce-data scenario rather than zero-shot; its focus on user-centric tasks --- tasks with broad adoption by speakers of high-resource languages; and its focus on under-represented languages where this scarce-data scenario tends to be most realistic. XTREME-UP evaluates the capabilities of language models across 88 under-represented languages over 9 key user-centric technologies including ASR, OCR, MT, and information access tasks that are of general utility. We create new datasets for OCR, autocomplete, semantic parsing, and transliteration, and build on and refine existing datasets for other tasks. XTREME-UP provides methodology for evaluating many modeling scenarios including text only, multi-modal (vision, audio, and text), supervised parameter tuning, and in-context learning. We evaluate commonly used models on the benchmark. We release all code and scripts to train and evaluate models.", "keywords": "nlp;low-resource;under-represented;scarce data;benchmark", "primary_area": "", "supplementary_material": "", "author": "Sebastian Ruder;Jonathan H. Clark;Alexander Gutkin;Mihir Kale;Min Ma;Massimo Nicosia;Shruti Rijhwani;Parker Riley;Jean Michel Amath Sarr;Xinyi Wang;John Frederick Wieting;Nitish Gupta;Anna Katanova;Christo Kirov;Dana L Dickinson;Brian Roark;Bidisha Samanta;Connie Tao;David Ifeoluwa Adelani;Vera Axelrod;Isaac Rayburn Caswell;Colin Cherry;Dan Garrette;Reeve Ingle;Melvin Johnson;Dmitry Panteleev;Partha Talukdar", "authorids": "~Sebastian_Ruder2;~Jonathan_H._Clark1;~Alexander_Gutkin2;~Mihir_Kale1;~Min_Ma1;~Massimo_Nicosia1;~Shruti_Rijhwani1;~Parker_Riley1;~Jean_Michel_Amath_Sarr1;~Xinyi_Wang1;~John_Frederick_Wieting1;~Nitish_Gupta1;~Anna_Katanova1;~Christo_Kirov2;~Dana_L_Dickinson1;~Brian_Roark1;~Bidisha_Samanta2;~Connie_Tao1;~David_Ifeoluwa_Adelani1;~Vera_Axelrod1;~Isaac_Rayburn_Caswell1;~Colin_Cherry1;~Dan_Garrette1;~Reeve_Ingle1;~Melvin_Johnson1;~Dmitry_Panteleev1;~Partha_Talukdar1", "gender": "M;;M;Not Specified;M;;M;M;F;M;M;;M;;M;F;;M;F;;M;M;M;M;M;M;M", "homepage": ";;;https://www.linkedin.com/in/min-ma-b5a1345a/;https://maxnicosia.com;https://shrutirij.github.io;;;;;http://nitishgupta.github.io/;https://research.google/people/AnnaKatanova/;;;http://www.lanzaroark.org/brian-roark;https://sites.google.com/view/bidisha-samanta/;https://www.linkedin.com/in/connietao/;https://dadelani.github.io/;;;https://sites.google.com/site/colinacherry/;http://www.dhgarrette.com/;;;;https://parthatalukdar.github.io/;http://sebastianruder.com/", "dblp": "02/786;33/5120;;;136/8001;188/9080;222/9463;;;156/0158;45/10343;266/1034;33/11059.html;;09/246;198/0754;;230/6973;228/8002.html;236/5919.html;99/6601;117/4050;63/9389;186/7972;;282/0169.html;186/7066", "google_scholar": "WfWxwlIAAAAJ;yXlkfhoAAAAJ;UWfTXIkAAAAJ;;lcYzgzsAAAAJ;_MQ_lNgAAAAJ;yb7ah5sAAAAJ;qdrePlgAAAAJ;https://scholar.google.com/citations?view_op=list_works;;STiAua8AAAAJ;;OPQn46oAAAAJ;;xPjpSs4AAAAJ;https://scholar.google.co.in/citations?user=qJrVMhMAAAAJ;;https://scholar.google.ca/citations?user=W9sTkS0AAAAJ;-_wK-BUAAAAJ;myh9l2AAAAAJ;TNr_OWMAAAAJ;tT9mhNMAAAAJ;goH3JU8AAAAJ;g4oMRgsAAAAJ;8_sPHr4AAAAJ;https://scholar.google.com.tw/citations?user=CIZwXAcAAAAJ;https://scholar.google.de/citations?user=8ONXPV8AAAAJ", "or_profile": "~Jonathan_H._Clark1;~Alexander_Gutkin2;~Mihir_Kale1;~Min_Ma1;~Massimo_Nicosia1;~Shruti_Rijhwani1;~Parker_Riley1;~Jean_Michel_Amath_Sarr1;~Xinyi_Wang1;~John_Frederick_Wieting1;~Nitish_Gupta1;~Anna_Katanova1;~Christo_Kirov2;~Dana_L_Dickinson1;~Brian_Roark1;~Bidisha_Samanta2;~Connie_Tao1;~David_Ifeoluwa_Adelani1;~Vera_Axelrod1;~Isaac_Rayburn_Caswell1;~Colin_Cherry1;~Dan_Garrette1;~Reeve_Ingle1;~Melvin_Johnson1;~Dmitry_Panteleev1;~Partha_Talukdar1;~Sebastian_Ruder1", "aff": "Google DeepMind;Google;Google;Google;Google;Google DeepMind;Google;Google;Google;Google DeepMind;Google;Research, Google;Google;;Google;Research, Google;;University College London, University of London;Research, Google;Google;Google;Google DeepMind;Google;Google;Research, Google;Indian Institute of Science, Bangalore;Google", "aff_domain": "google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;google.com;research.google.com;google.com;;google.com;research.google.com;;ucl.ac.uk;research.google.com;google.com;google.com;google.com;google.com;google.com;research.google.com;iisc.ac.in;google.com", "position": "Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;Researcher;;Research Scientist;Researcher;;Postdoc;Researcher;Researcher;Researcher;Researcher;Researcher;Senior Software Engineer;Researcher;Associate Professor;Research scientist", "bibtex": "@inproceedings{\nruder2023xtremeup,\ntitle={{XTREME}-{UP}: A User-Centric Scarce-Data Benchmark for Under-Represented Languages},\nauthor={Sebastian Ruder and Jonathan H. Clark and Alexander Gutkin and Mihir Kale and Min Ma and Massimo Nicosia and Shruti Rijhwani and Parker Riley and Jean Michel Amath Sarr and Xinyi Wang and John Frederick Wieting and Nitish Gupta and Anna Katanova and Christo Kirov and Dana L Dickinson and Brian Roark and Bidisha Samanta and Connie Tao and David Ifeoluwa Adelani and Vera Axelrod and Isaac Rayburn Caswell and Colin Cherry and Dan Garrette and Reeve Ingle and Melvin Johnson and Dmitry Panteleev and Partha Talukdar},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ulqYwmcUnL}\n}", "github": "", "project": "", "reviewers": "qsF6;tT4X;jMj2", "site": "https://openreview.net/forum?id=ulqYwmcUnL", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;3;3", "excitement": "3;3;3", "reproducibility": "4;3;3", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 27, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0008-2036-2777;0000-0001-6327-4824;;;;;;;;;;;;;;;;0000-0002-0193-2083;;;;;;;;;", "linkedin": ";https://linkedin.com/in/alexandergutkin;;;massimonicosia/;;;;;;;;;;;bidisha-samanta-baa73b5b/;;david-adelani-7557b337/;vera-axelrod-044b5026/;;colincherry/;;;;;;sebastianruder", "aff_unique_index": "0;0;0;0;0;0;0;0;0;0;0;0;0;0;0;1;0;0;0;0;0;0;0;2;0", "aff_unique_norm": "Google;University College London;Indian Institute of Science", "aff_unique_dep": "Google DeepMind;;", "aff_unique_url": "https://deepmind.com;https://www.ucl.ac.uk;https://www.iisc.ac.in", "aff_unique_abbr": "DeepMind;UCL;IISc", "aff_campus_unique_index": "1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;2;1", "aff_campus_unique": ";Mountain View;Bangalore", "aff_country_unique_index": "0;1;1;1;1;0;1;1;1;0;1;1;1;1;1;0;1;1;1;0;1;1;1;2;1", "aff_country_unique": "United Kingdom;United States;India" }, { "id": "unKIy4mpnn", "title": "Making Body Movement in Sign Language Corpus Accessible for Linguists and Machines with Three-Dimensional Normalization of MediaPipe", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Linguists can access movement in the sign language video corpus through manual annotation or computational methods. The first relies on a predefinition of features, and the second requires technical knowledge. Methods like MediaPipe and OpenPose are now more often used in sign language processing. MediaPipe detects a two-dimensional (2D) body pose in a single image with a limited approximation of the depth coordinate. Such 2D projection of a three-dimensional (3D) body pose limits the potential application of the resulting models outside the capturing camera settings and position. 2D pose data does not provide linguists with direct and human-readable access to the collected movement data. We propose our four main contributions: A novel 3D normalization method for MediaPipe's 2D pose, a novel human-readable way of representing the 3D normalized pose data, an analysis of Japanese Sign Language (JSL) sociolinguistic features using the proposed techniques, where we show how an individual signer can be identified based on unique personal movement patterns suggesting a potential threat to anonymity. Our method outperforms the common 2D normalization on a small, diverse JSL dataset. We demonstrate its benefit for deep learning approaches by significantly outperforming the pose-based state-of-the-art models on the open sign language recognition benchmark.", "keywords": "sign language;pose processing;wlasl;movement processing", "primary_area": "", "supplementary_material": "", "author": "Victor Skobov;Mayumi Bono", "authorids": "~Victor_Skobov1;~Mayumi_Bono1", "gender": ";F", "homepage": ";http://research.nii.ac.jp/~bono/en/index.html", "dblp": ";", "google_scholar": ";", "or_profile": "~Victor_Skobov1;~Mayumi_Bono1", "aff": ";NII, Tokyo Institute of Technology", "aff_domain": ";nii.ac.jp", "position": ";Associate Professor", "bibtex": "@inproceedings{\nskobov2023making,\ntitle={Making Body Movement in Sign Language Corpus Accessible for Linguists and Machines with Three-Dimensional Normalization of MediaPipe},\nauthor={Victor Skobov and Mayumi Bono},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=unKIy4mpnn}\n}", "github": "", "project": "", "reviewers": "iboj;2kwG;ynFA", "site": "https://openreview.net/forum?id=unKIy4mpnn", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;5;4", "excitement": "3;2;4", "reproducibility": "4;3;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0", "aff_unique_norm": "Tokyo Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.titech.ac.jp", "aff_unique_abbr": "Titech", "aff_campus_unique_index": "0", "aff_campus_unique": "Tokyo", "aff_country_unique_index": "0", "aff_country_unique": "Japan" }, { "id": "up8EYzyrKV", "title": "Towards Mitigating LLM Hallucination via Self Reflection", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models (LLMs) have shown promise for generative and knowledge-intensive tasks including question-answering (QA) tasks. However, the practical deployment still faces challenges, notably the issue of \"hallucination\", where models generate plausible-sounding but unfaithful or nonsensical information. This issue becomes particularly critical in the medical domain due to the uncommon professional concepts and potential social risks involved. This paper analyses the phenomenon of hallucination in medical generative QA systems using widely adopted LLMs and datasets. Our investigation centers on the identification and comprehension of common problematic answers, with a specific emphasis on hallucination. To tackle this challenge, we present an interactive self-reflection methodology that incorporates knowledge acquisition and answer generation. Through this feedback process, our approach steadily enhances the factuality, consistency, and entailment of the generated answers. Consequently, we harness the interactivity and multitasking ability of LLMs and produce progressively more precise and accurate answers. Experimental results on both automatic and human evaluation demonstrate the superiority of our approach in hallucination reduction compared to baselines.", "keywords": "Hallucination;Large Language Model;Medical Question Answering;Generative Question Answering;Question Answering", "primary_area": "", "supplementary_material": "", "author": "Ziwei Ji;Tiezheng YU;Yan Xu;Nayeon Lee;Etsuko Ishii;Pascale Fung", "authorids": "~Ziwei_Ji2;~Tiezheng_YU1;~Yan_Xu6;~Nayeon_Lee1;~Etsuko_Ishii1;~Pascale_Fung1", "gender": "F;M;F;F;;F", "homepage": "https://ziweiji.github.io/;https://tysonyu.github.io;https://yana-xuyan.github.io/;;;http://pascale.home.ece.ust.hk/", "dblp": "176/4574;;03/4702-12;;;29/4187", "google_scholar": "oSnZ9mMAAAAJ;https://scholar.google.com.hk/citations?user=JK7nNekAAAAJ;j1t9_ScAAAAJ;https://scholar.google.com.hk/citations?user=HN6Y7z0AAAAJ;;", "or_profile": "~Ziwei_Ji2;~Tiezheng_YU1;~Yan_Xu6;~Nayeon_Lee1;~Etsuko_Ishii1;~Pascale_Fung1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;;HKUST", "aff_domain": "ust.hk;ust.hk;ust.hk;ust.hk;;ece.ust.hk", "position": "PhD student;PhD student;PhD student;PhD student;;Full Professor", "bibtex": "@inproceedings{\nji2023towards,\ntitle={Towards Mitigating {LLM} Hallucination via Self Reflection},\nauthor={Ziwei Ji and Tiezheng YU and Yan Xu and Nayeon Lee and Etsuko Ishii and Pascale Fung},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=up8EYzyrKV}\n}", "github": "", "project": "", "reviewers": "tvQQ;coTz;8mL5;5JJj", "site": "https://openreview.net/forum?id=up8EYzyrKV", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;3;5;4", "excitement": "4;2;3;4", "reproducibility": "5;4;3;4", "correctness": "4;3;3;3", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 3.25, "reproducibility_avg": 4.0, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-0206-7861;;;;;", "linkedin": "ziwei-ji-a516b91a7/;;yan-xu-4a822a172/;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "us7p0VsOhl", "title": "GLGR: Question-aware Global-to-Local Graph Reasoning for Multi-party Dialogue Reading Comprehension", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Graph reasoning contributes to the integration of discretely-distributed attentive information (clues) for Multi-party Dialogue Reading Comprehension (MDRC). This is attributed primarily to multi-hop reasoning over global conversational structures. However, existing approaches barely apply questions for anti-noise graph reasoning. More seriously, the local semantic structures in utterances are neglected, although they are beneficial for bridging across semantically-related clues. In this paper, we propose a question-aware global-to-local graph reasoning approach. It expands the canonical Interlocutor-Utterance graph by introducing a question node, enabling comprehensive global graph reasoning. More importantly, it constructs a semantic-role graph for each utterance, and accordingly performs local graph reasoning conditioned on the semantic relations. We design a two-stage encoder network to implement the progressive reasoning from the global graph to local. The experiments on the benchmark datasets Molweni and FriendsQA show that our approach yields significant improvements, compared to BERT and ELECTRA baselines. It achieves 73.6\\% and 77.2\\% F$1$-scores on Molweni and FriendsQA, respectively, outperforming state-of-the-art methods that employ different pretrained language models as backbones.", "keywords": "Multi-party dialogue reading comprehension;Global-to-local graph reasoning", "primary_area": "", "supplementary_material": "", "author": "Yanling Li;Bowei Zou;Yifan Fan;Xibo Li;AiTi Aw;Yu Hong", "authorids": "~Yanling_Li2;~Bowei_Zou1;~Yifan_Fan1;~Xibo_Li2;~AiTi_Aw1;~Yu_Hong1", "gender": "F;M;F;;;M", "homepage": "https://github.com/YanLingLi-AI;;;;;", "dblp": ";136/9191;https://dblp.org/;;;66/5306", "google_scholar": ";https://scholar.google.com/citations?hl=zh-CN;;;;", "or_profile": "~Yanling_Li2;~Bowei_Zou1;~Yifan_Fan1;~Xibo_Li2;~AiTi_Aw1;~Yu_Hong1", "aff": "Suzhou University;A*STAR;Suzhou University;;;Suzhou University", "aff_domain": "suda.edu.cn;a-star.edu.sg;suda.edu.cn;;;suda.edu.cn", "position": "MS student;Researcher;PhD student;;;Full Professor", "bibtex": "@inproceedings{\nli2023glgr,\ntitle={{GLGR}: Question-aware Global-to-Local Graph Reasoning for Multi-party Dialogue Reading Comprehension},\nauthor={Yanling Li and Bowei Zou and Yifan Fan and Xibo Li and AiTi Aw and Yu Hong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=us7p0VsOhl}\n}", "github": "", "project": "", "reviewers": "BDx5;wxqV;28uk;F64H", "site": "https://openreview.net/forum?id=us7p0VsOhl", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "2;4;3;4", "excitement": "3;3;3;3", "reproducibility": "4;4;3;3", "correctness": "4;3;3;4", "rating_avg": 3.0, "confidence_avg": 3.25, "excitement_avg": 3.0, "reproducibility_avg": 3.5, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";;;;;", "aff_unique_index": "0;1;0;0", "aff_unique_norm": "Suzhou University;Agency for Science, Technology and Research", "aff_unique_dep": ";", "aff_unique_url": "https://www.suda.edu.cn;https://www.a-star.edu.sg", "aff_unique_abbr": "Suda;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;Singapore" }, { "id": "usnEi3Bfnt", "title": "Structural generalization in COGS: Supertagging is (almost) all you need", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In many Natural Language Processing applications, neural networks have been found to fail to generalize on out-of-distribution examples.\nIn particular, several recent semantic parsing datasets have put forward important limitations of neural networks in cases where compositional generalization is required.\nIn this work, we extend a neural graph-based parsing framework in several ways to alleviate this issue, notably:\n(1) the introduction of a supertagging step with valency constraints, expressed as an integer linear program;\n(2) the reduction of the graph prediction problem to the maximum matching problem;\n(3) the design of an incremental early-stopping training strategy to prevent overfitting.\nExperimentally, our approach significantly improves results on examples that require structural generalization in the COGS dataset,\na known challenging benchmark for compositional generalization.\nOverall, these results confirm that structural constraints are important for generalization in semantic parsing.", "keywords": "semantic parsing;compositional generalization;graph-based parsing", "primary_area": "", "supplementary_material": "", "author": "Alban Petit;Caio Corro;Fran\u00e7ois Yvon", "authorids": "~Alban_Petit1;~Caio_Corro2;~Fran\u00e7ois_Yvon2", "gender": ";M;M", "homepage": "https://alban-petit.github.io/;http://caio-corro.fr/;http://cv.archives-ouvertes.fr/francois-yvon", "dblp": ";184/3727;05/2701.html", "google_scholar": ";Q_DmlucAAAAJ;https://scholar.google.fr/citations?hl=fr", "or_profile": "~Alban_Petit1;~Caio_Corro2;~Fran\u00e7ois_Yvon2", "aff": "LIMSI-CNRS / Universit\u00e9 Paris-Sud;LIMSI-CNRS / Universit\u00e9 Paris-Sud;LISN-CNRS / Universit\u00e9 Paris Saclay", "aff_domain": "limsi.fr;limsi.fr;lisn.fr", "position": "PhD student;Associate Professor;Senior Researcher", "bibtex": "@inproceedings{\npetit2023structural,\ntitle={Structural generalization in {COGS}: Supertagging is (almost) all you need},\nauthor={Alban Petit and Caio Corro and Fran{\\c{c}}ois Yvon},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=usnEi3Bfnt}\n}", "github": "", "project": "", "reviewers": "6hiA;KQiC;Gx4y", "site": "https://openreview.net/forum?id=usnEi3Bfnt", "pdf_size": 0, "rating": "5;5;5", "confidence": "2;3;4", "excitement": "4;4;2", "reproducibility": "4;3;3", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-7972-7442", "linkedin": ";;", "aff_unique_index": "0;0;1", "aff_unique_norm": "LIMSI-CNRS;Universit\u00e9 Paris Saclay", "aff_unique_dep": ";LISN-CNRS", "aff_unique_url": "https://www.limsi.fr;https://www.universite-paris-saclay.fr", "aff_unique_abbr": "LIMSI-CNRS;UPS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "France" }, { "id": "uu6Oq7MN7g", "title": "CodeT5+: Open Code Large Language Models for Code Understanding and Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large language models (LLMs) pretrained on vast source code have achieved prominent progress in code intelligence. However, existing code LLMs have two main limitations. First, they often adopt a specific architecture (encoder-only or decoder-only) or rely on a unified encoder-decoder network for different downstream tasks, lacking the flexibility to operate in the optimal architecture for a specific task. Secondly, they often employ a limited set of pretraining objectives which might not be relevant to some tasks and hence result in substantial performance degrade. To address these limitations, we propose \u201cCodeT5+\u201d, a family of encoder-decoder LLMs for code in which component modules can be flexibly combined to suit a wide range of code tasks. Such flexibility is enabled by our proposed mixture of pretraining objectives, which cover span denoising, contrastive learning, text-code matching, and causal LM pretraining tasks, on both unimodal and bimodal multilingual code corpora. Furthermore, we propose to initialize CodeT5+ with frozen off-the-shelf LLMs without training from scratch to efficiently scale up our models, and explore instruction-tuning to align with natural language instructions. We extensively evaluate CodeT5+ on over 20 code-related benchmarks in different settings, including zero-shot, finetuning, and instruction-tuning. We observe state-of-the-art (SoTA) performance on various code-related tasks, and our instruction-tuned CodeT5+ 16B achieves new SoTA results of 35.0% pass@1 and 54.5% pass@10 on the HumanEval code generation task against other open code LLMs, even surpassing the OpenAI code-cushman-001 model.", "keywords": "Large Language Model;Code Intelligence;Code Understanding and Generation", "primary_area": "", "supplementary_material": "", "author": "Yue Wang;Hung Le;Akhilesh Deepak Gotmare;Nghi D. Q. Bui;Junnan Li;Steven Hoi", "authorids": "~Yue_Wang19;~Hung_Le2;~Akhilesh_Deepak_Gotmare1;~Nghi_D._Q._Bui1;~Junnan_Li2;~Steven_Hoi2", "gender": "M;M;M;M;M;M", "homepage": "https://yuewang-cuhk.github.io/;https://akhileshgotmare.github.io/;https://bdqnghi.github.io/;http://stevenhoi.com;https://sites.google.com/view/henryle2018/home;https://sites.google.com/site/junnanlics/", "dblp": "60/9374-34;156/0933;207/7870;;;193/6773-1.html", "google_scholar": "iyxbtcEAAAAJ;https://scholar.google.ch/citations?user=2S-aFwIAAAAJ;QwybxYsAAAAJ;JoLjflYAAAAJ;https://scholar.google.com/citations?hl=en;MuUhwi0AAAAJ", "or_profile": "~Yue_Wang19;~Akhilesh_Deepak_Gotmare1;~Nghi_D._Q._Bui1;~Steven_Hoi2;~Henry_Le1;~Junnan_li1", "aff": "SalesForce.com;SalesForce.com;SalesForce.com;Singapore Management University;Salesforce Research;Salesforce Research", "aff_domain": "salesforce.com;salesforce.com;salesforce.com;smu.edu.sg;salesforce.com;salesforce.com", "position": "Researcher;Researcher;Researcher;Associate Professor;Researcher;Research Scientist", "bibtex": "@inproceedings{\nwang2023codet,\ntitle={CodeT5+: Open Code Large Language Models for Code Understanding and Generation},\nauthor={Yue Wang and Hung Le and Akhilesh Deepak Gotmare and Nghi D. Q. Bui and Junnan Li and Steven Hoi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=uu6Oq7MN7g}\n}", "github": "", "project": "", "reviewers": "kUr4;Ke7W;MqQi", "site": "https://openreview.net/forum?id=uu6Oq7MN7g", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "4;4;4", "reproducibility": "4;4;2", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 9, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": "yue-wang-37458795/;akhilesh-gotmare/;;;hungle2012;", "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Salesforce;Singapore Management University", "aff_unique_dep": ";", "aff_unique_url": "https://www.salesforce.com;https://www.smu.edu.sg", "aff_unique_abbr": "Salesforce;SMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "United States;Singapore" }, { "id": "uuUQraD4XX", "title": "Large Language Models Can Self-Improve", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large Language Models (LLMs) have achieved excellent performances in various tasks. However, fine-tuning an LLM requires extensive supervision. Human, on the other hand, may improve their reasoning abilities by self-thinking without external inputs. In this work, we demonstrate that an LLM is also capable of self-improving with only unlabeled datasets.\nWe use a pre-trained LLM to generate ``high-confidence'' rationale-augmented answers for unlabeled questions using Chain-of-Though (CoT) prompting and self-consistency, and fine-tune the LLM using those self-generated solutions as target outputs. We show that without any ground truth label, our approach improves the general reasoning ability of a 540B-parameter LLM (74.4\\%$\\rightarrow$82.1\\% on GSM8K, 90.0\\%$\\rightarrow$94.4\\% on OpenBookQA, and 63.4\\%$\\rightarrow$67.9\\% on ANLI-A3) and can also be adapted to extreme low-resource cases where even training questions and CoT prompts are limited. We conduct ablation studies and show that fine-tuning on diverse reasoning paths is critical for self-improvement.", "keywords": "Large Language Models;Commonsense Reasoning;Arithmetic Reasoning;Chain of Thought", "primary_area": "", "supplementary_material": "", "author": "Jiaxin Huang;Shixiang Shane Gu;Le Hou;Yuexin Wu;Xuezhi Wang;Hongkun Yu;Jiawei Han", "authorids": "~Jiaxin_Huang1;~Shixiang_Shane_Gu1;~Le_Hou1;~Yuexin_Wu1;~Xuezhi_Wang3;~Hongkun_Yu2;~Jiawei_Han1", "gender": "F;M;M;;M;M;M", "homepage": "https://teapot123.github.io/;http://vision.cs.stonybrook.edu/~lehhou/home/index.html;https://crickwu.github.io;https://research.google/people/105995/;;http://hanj.cs.illinois.edu/;https://sites.google.com/view/gugurus/home", "dblp": "187/2874-1;161/9892;09/1661;70/4090-2;;h/JiaweiHan.html;121/0550", "google_scholar": "DnxrVXgAAAAJ;kQ0HeQIAAAAJ;sd0nprMAAAAJ;ScLUQ-YAAAAJ;;https://scholar.google.com.tw/citations?user=Kv9AbjMAAAAJ;B8wslVsAAAAJ", "or_profile": "~Jiaxin_Huang1;~Le_Hou1;~Yuexin_Wu1;~Xuezhi_Wang3;~Hongkun_Yu2;~Jiawei_Han1;~Shixiang_Gu1", "aff": "University of Illinois, Urbana Champaign;Google Research;Google;Google DeepMind;;University of Illinois at Urbana-Champaign (UIUC);OpenAI", "aff_domain": "illinois.edu;google.com;google.com;google.com;;illinois.edu;openai.com", "position": "PhD student;Software Engineer;Software Engineer;Research Scientist;;Full Professor;Researcher", "bibtex": "@inproceedings{\nhuang2023large,\ntitle={Large Language Models Can Self-Improve},\nauthor={Jiaxin Huang and Shixiang Shane Gu and Le Hou and Yuexin Wu and Xuezhi Wang and Hongkun Yu and Jiawei Han},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=uuUQraD4XX}\n}", "github": "", "project": "", "reviewers": "g6YS;AVsp;p95e;jR8L", "site": "https://openreview.net/forum?id=uuUQraD4XX", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "5;4;4;4", "excitement": "3;4;4;4", "reproducibility": "2;3;3;2", "correctness": "3;4;4;3", "rating_avg": 5.0, "confidence_avg": 4.25, "excitement_avg": 3.75, "reproducibility_avg": 2.5, "correctness_avg": 3.5, "replies_avg": 14, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-7323-5300;;;;0000-0002-3629-2696;", "linkedin": ";;;;;;", "aff_unique_index": "0;1;1;1;0;2", "aff_unique_norm": "University of Illinois Urbana-Champaign;Google;OpenAI", "aff_unique_dep": ";Google Research;", "aff_unique_url": "https://illinois.edu;https://research.google;https://openai.com", "aff_unique_abbr": "UIUC;Google Research;OpenAI", "aff_campus_unique_index": "0;1;1;0", "aff_campus_unique": "Urbana-Champaign;Mountain View;", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "uvbbsn4l6y", "title": "Look-back Decoding for Open-Ended Text Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Given a prefix (context), open-ended generation aims to decode texts that are coherent, which do not abruptly drift from previous topics, and informative, which do not suffer from undesired repetitions. In this paper, we propose Look-back, an improved decoding algorithm that leverages the Kullback\u2013Leibler divergence to track the distribution distance between current and historical decoding steps. Thus Look-back can automatically predict potential repetitive phrase and topic drift, and remove tokens that may cause the failure modes, restricting the next token probability distribution within a plausible distance to the history. We perform decoding experiments on document continuation and story generation, and demonstrate that Look-back is able to generate more fluent and coherent text, outperforming other strong decoding methods significantly in both automatic and human evaluations.", "keywords": "open-ended text generation;decoding;story generation;document continuation", "primary_area": "", "supplementary_material": "", "author": "Nan Xu;Chunting Zhou;Asli Celikyilmaz;Xuezhe Ma", "authorids": "~Nan_Xu2;~Chunting_Zhou1;~Asli_Celikyilmaz1;~Xuezhe_Ma1", "gender": "F;F;F;M", "homepage": "https://sites.google.com/site/xunannancy;https://violet-zct.github.io/;https://asli.us;https://xuezhemax.github.io/", "dblp": ";161/2679;15/3724;127/0230", "google_scholar": "https://scholar.google.co.uk/citations?hl=en;mR5W7EgAAAAJ;https://scholar.google.com/citations?hl=en;6_MQLIcAAAAJ", "or_profile": "~Nan_Xu2;~Chunting_Zhou1;~Asli_Celikyilmaz1;~Xuezhe_Ma1", "aff": "University of Southern California;Meta AI;FAIR ;USC/ISI", "aff_domain": "usc.edu;meta.com;meta.com;isi.edu", "position": "PhD student;Researcher;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nxu2023lookback,\ntitle={Look-back Decoding for Open-Ended Text Generation},\nauthor={Nan Xu and Chunting Zhou and Asli Celikyilmaz and Xuezhe Ma},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=uvbbsn4l6y}\n}", "github": "", "project": "", "reviewers": "7Euy;Ch7J;q9mD", "site": "https://openreview.net/forum?id=uvbbsn4l6y", "pdf_size": 0, "rating": "5;5;5", "confidence": "2;3;3", "excitement": "4;4;4", "reproducibility": "4;3;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": "https://linkedin.com/in/nan-xu-b52777125;;aslicelikyilmaz/;xuezhe-ma-b5354731", "aff_unique_index": "0;1;1;0", "aff_unique_norm": "University of Southern California;Meta", "aff_unique_dep": ";Meta AI", "aff_unique_url": "https://www.usc.edu;https://meta.com", "aff_unique_abbr": "USC;Meta", "aff_campus_unique_index": "0;2", "aff_campus_unique": "Los Angeles;;ISI", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "ux826WlJtt", "title": "DemoSG: Demonstration-enhanced Schema-guided Generation for Low-resource Event Extraction", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Most current Event Extraction (EE) methods focus on the high-resource scenario, which requires a large amount of annotated data and can hardly be applied to low-resource domains. To address EE more effectively with limited resources, we propose the Demonstration-enhanced Schema-guided Generation (DemoSG) model, which benefits low-resource EE from two aspects: Firstly, we propose the demonstration-based learning paradigm for EE to fully use the annotated data, which transforms them into demonstrations to illustrate the extraction process and help the model learn effectively. Secondly, we formulate EE as a natural language generation task guided by schema-based prompts, thereby leveraging label semantics and promoting knowledge transfer in low-resource scenarios. We conduct extensive experiments under in-domain and domain adaptation low-resource settings on three datasets, and study the robustness of DemoSG. The results show that DemoSG significantly outperforms current methods in low-resource scenarios.", "keywords": "Low-resource;Event Extraction;Few-shot;Domain Adaptation;Demonstration-based Learning", "primary_area": "", "supplementary_material": "", "author": "Gang Zhao;Xiaocheng Gong;Xinjie Yang;Guanting Dong;Shudong Lu;Si Li", "authorids": "~Gang_Zhao2;~Xiaocheng_Gong1;~Xinjie_Yang2;~Guanting_Dong1;~Shudong_Lu1;~Si_Li5", "gender": "M;M;F;M;M;", "homepage": ";;https://github.com/yangxinjie;https://dongguanting.github.io/;;http://www.pris.net.cn/introduction/teacher/lisi", "dblp": ";;;;;54/6603-1.html", "google_scholar": ";;;amozZDkAAAAJ;;", "or_profile": "~Gang_Zhao2;~Xiaocheng_Gong1;~Xinjie_Yang2;~Guanting_Dong1;~Shudong_Lu1;~Si_Li5", "aff": "Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications;Beijing University of Posts and Telecommunications", "aff_domain": "bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn;bupt.edu.cn", "position": "MS student;MS student;MS student;MS student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nzhao2023demosg,\ntitle={Demo{SG}: Demonstration-enhanced Schema-guided Generation for Low-resource Event Extraction},\nauthor={Gang Zhao and Xiaocheng Gong and Xinjie Yang and Guanting Dong and Shudong Lu and Si Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ux826WlJtt}\n}", "github": "", "project": "", "reviewers": "99nK;7B7C;XB14", "site": "https://openreview.net/forum?id=ux826WlJtt", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;2", "excitement": "4;3;4", "reproducibility": "3;4;5", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-7965-8531;0009-0005-5028-2848;;;0000-0002-4214-186X;", "linkedin": ";;;;;", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications", "aff_unique_dep": "", "aff_unique_url": "http://www.bupt.edu.cn/", "aff_unique_abbr": "BUPT", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Beijing", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "uxzlH5bLrJ", "title": "Can Brain Signals Reveal Inner Alignment with Human Languages?", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Brain Signals, such as Electroencephalography (EEG), and human languages have been widely explored independently for many downstream tasks, however, the connection between them has not been well explored. In this study, we explore the relationship and dependency between EEG and language. To study at the representation level, we introduced \\textbf{MTAM}, a \\textbf{M}ultimodal \\textbf{T}ransformer \\textbf{A}lignment \\textbf{M}odel, to observe coordinated representations between the two modalities. We used various relationship alignment-seeking techniques, such as Canonical Correlation Analysis and Wasserstein Distance, as loss functions to transfigure features. On downstream applications, sentiment analysis and relation detection, we achieved new state-of-the-art results on two datasets, ZuCo and K-EmoCon. Our method achieved an F1-score improvement of 1.7\\% on K-EmoCon and 9.3\\% on Zuco datasets for sentiment analysis, and 7.4\\% on ZuCo for relation detection. In addition, we provide interpretations of the performance improvement: (1) feature distribution shows the effectiveness of the alignment module for discovering and encoding the relationship between EEG and language; (2) alignment weights show the influence of different language semantics as well as EEG frequency features; (3) brain topographical maps provide an intuitive demonstration of the connectivity in the brain regions. Our code is available at \\url{https://github.com/Jason-Qiu/EEG_Language_Alignment}.", "keywords": "EEG;Human Languages;Inner Alignment;Interpretation", "primary_area": "", "supplementary_material": "", "author": "Jielin Qiu;William Han;Jiacheng Zhu;Mengdi Xu;Douglas J Weber;Bo Li;Ding Zhao", "authorids": "~Jielin_Qiu2;~William_Han1;~Jiacheng_Zhu1;~Mengdi_Xu3;~Douglas_J_Weber1;~Bo_Li19;~Ding_Zhao1", "gender": "M;M;M;F;M;F;", "homepage": "https://www.cs.cmu.edu/~jielinq/;https://willxxy.github.io/;https://jiachengzhuml.github.io/;https://mxu34.github.io/;https://www.meche.engineering.cmu.edu/directory/bios/weber-douglas.html;http://boli.cs.illinois.edu/;https://safeai-lab.github.io", "dblp": "313/2707;326/6668;40/10195;;;50/3402-26;", "google_scholar": "2khNwjoAAAAJ;https://scholar.google.com/citations?hl=en;rKUnBPgAAAAJ;https://scholar.google.com/citations?hl=zh-CN;;K8vJkTcAAAAJ;z7tPc9IAAAAJ", "or_profile": "~Jielin_Qiu2;~William_Han1;~Jiacheng_Zhu1;~Mengdi_Xu3;~Douglas_J_Weber1;~Bo_Li19;~Ding_Zhao1", "aff": "Computer Science Department, Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;;University of Illinois, Urbana Champaign;Carnegie Mellon University", "aff_domain": "cs.cmu.edu;cmu.edu;andrew.cmu.edu;cmu.edu;;illinois.edu;cmu.edu", "position": "PhD student;PhD student;PhD student;PhD student;;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nqiu2023can,\ntitle={Can Brain Signals Reveal Inner Alignment with Human Languages?},\nauthor={Jielin Qiu and William Han and Jiacheng Zhu and Mengdi Xu and Douglas J Weber and Bo Li and Ding Zhao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=uxzlH5bLrJ}\n}", "github": "", "project": "", "reviewers": "HtV4;VHS7;9CNo;fE1A", "site": "https://openreview.net/forum?id=uxzlH5bLrJ", "pdf_size": 0, "rating": "1;1;1;1", "confidence": "4;3;4;3", "excitement": "3;3;2;4", "reproducibility": "4;5;4;5", "correctness": "3;3;2;3", "rating_avg": 1.0, "confidence_avg": 3.5, "excitement_avg": 3.0, "reproducibility_avg": 4.5, "correctness_avg": 2.75, "replies_avg": 14, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-9332-4175;;;", "linkedin": ";william-han327/;;;;;", "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "Carnegie Mellon University;University of Illinois Urbana-Champaign", "aff_unique_dep": "Computer Science Department;", "aff_unique_url": "https://www.cmu.edu;https://illinois.edu", "aff_unique_abbr": "CMU;UIUC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "uyUO80sbm0", "title": "Explain-then-translate: an analysis on improving program translation with self-generated explanations", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "This work explores the use of self-generated natural language explanations as an intermediate step for code-to-code translation with language models. Across three types of explanations and 19 programming languages constructed from the MultiPL-E dataset, we find the explanations to be particularly effective in the zero-shot case, improving performance by 12\\% on average. Improvements with natural language explanations are particularly pronounced on difficult programs. We release our dataset, code, and canonical solutions in all 19 languages.", "keywords": "code generation;machine translation;program translation;large language model;prompting;chain-of-thought", "primary_area": "", "supplementary_material": "", "author": "Zilu Tang;Mayank Agarwal;Alexander G Shypula;Bailin Wang;Derry Wijaya;Jie Chen;Yoon Kim", "authorids": "~Zilu_Tang3;~Mayank_Agarwal1;~Alexander_G_Shypula1;~Bailin_Wang3;~Derry_Wijaya1;~Jie_Chen1;~Yoon_Kim1", "gender": "M;M;F;;;M;M", "homepage": ";;https://derrywijaya.github.io/;https://jiechenjiechen.github.io;https://people.csail.mit.edu/yoonkim/;https://berlino.github.io/;https://pootiet.github.io/", "dblp": "38/5693;;https://dblp.org/pers/w/Wijaya:Derry;92/6289-7;;218/7334;266/2889", "google_scholar": ";;8lmWWD0AAAAJ;Z-lkme8AAAAJ;n_ts4eYAAAAJ;;E9g28XEAAAAJ", "or_profile": "~Mayank_Agarwal1;~Alexander_G_Shypula1;~Derry_Wijaya1;~Jie_Chen1;~Yoon_Kim1;~bailin_wang1;~Zilu_Tang1", "aff": "International Business Machines;University of Pennsylvania;Boston University;International Business Machines;Massachusetts Institute of Technology;Massachusetts Institute of Technology;Boston University, Boston University", "aff_domain": "ibm.com;seas.upenn.edu;bu.edu;ibm.com;mit.edu;mit.edu;bu.edu", "position": "Research Engineeer;PhD student;Assistant Professor;Research Staff Member;Assistant Professor;Postdoc;MS student", "bibtex": "@inproceedings{\ntang2023explainthentranslate,\ntitle={Explain-then-translate: an analysis on improving program translation with self-generated explanations},\nauthor={Zilu Tang and Mayank Agarwal and Alexander G Shypula and Bailin Wang and Derry Wijaya and Jie Chen and Yoon Kim},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=uyUO80sbm0}\n}", "github": "", "project": "", "reviewers": "XyFJ;CiwV;pfnB", "site": "https://openreview.net/forum?id=uyUO80sbm0", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "3;4;3", "reproducibility": "4;5;4", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-0848-4703;;;;", "linkedin": "https://linkedin.com/in/mayank312/;alexander-shypula-4831a281/;derry-wijaya-577b80178/;;;;peter-tang-83802495/", "aff_unique_index": "0;1;2;0;3;3;2", "aff_unique_norm": "International Business Machines Corporation;University of Pennsylvania;Boston University;Massachusetts Institute of Technology", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.ibm.com;https://www.upenn.edu;https://www.bu.edu;https://web.mit.edu", "aff_unique_abbr": "IBM;UPenn;BU;MIT", "aff_campus_unique_index": "1", "aff_campus_unique": ";Boston", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "uyl1O2LkAF", "title": "TaTA: A Multilingual Table-to-Text Dataset for African Languages", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Existing data-to-text generation datasets are mostly limited to English. To address this lack of data, we create Table-to-Text in African languages (TaTA), the first large multilingual table-to-text dataset with a focus on African languages. We created TaTA by transcribing figures and accompanying text in bilingual reports by the Demographic and Health Surveys Program, followed by professional translation to make the dataset fully parallel. TaTA includes 8,700 examples in nine languages including four African languages (Hausa, Igbo, Swahili, and Yor\u00f9b\u00e1) and a zero-shot test language (Russian). We additionally release screenshots of the original figures for future research on multilingual multi-modal approaches. Through an in-depth human evaluation, we show that TaTA is challenging for current models and that less than half the outputs from an mT5-XXL-based model are understandable and attributable to the source data. Our results highlight a) the need for validating metrics; and b) the importance of domain-specific metrics.", "keywords": "table-to-text;multilingual;NLG;African languages", "primary_area": "", "supplementary_material": "", "author": "Sebastian Gehrmann;Sebastian Ruder;Vitaly Nikolaev;Jan A. Botha;Michael Chavinda;Ankur P Parikh;Clara E. Rivera", "authorids": "~Sebastian_Gehrmann1;~Sebastian_Ruder2;~Vitaly_Nikolaev1;~Jan_A._Botha1;~Michael_Chavinda1;~Ankur_P_Parikh1;~Clara_E._Rivera1", "gender": "M;;M;M;M;;M", "homepage": "https://sebastiangehrmann.com;;http://bothameister.github.io/;https://mchav.github.io;;https://scholar.google.co.uk/citations?user=K7JKtEkAAAAJ&hl=en;http://sebastianruder.com/", "dblp": "131/1378;;98/11425;;80/8411;;186/7066", "google_scholar": "R401sNwAAAAJ;m2UQEwwAAAAJ;https://scholar.google.co.uk/citations?user=EklJdhYAAAAJ;;bRpjhycAAAAJ;https://scholar.google.co.uk/citations?user=K7JKtEkAAAAJ;https://scholar.google.de/citations?user=8ONXPV8AAAAJ", "or_profile": "~Sebastian_Gehrmann1;~Vitaly_Nikolaev1;~Jan_A._Botha1;~Michael_Chavinda1;~Ankur_P_Parikh1;~Clara_E._Rivera1;~Sebastian_Ruder1", "aff": "Bloomberg;Research, Google;Google;;Google;Research, Google;Google", "aff_domain": "bloomberg.com;research.google.com;google.com;;google.com;research.google.com;google.com", "position": "Researcher;Researcher;Researcher;;Research Scientist;Researcher;Research scientist", "bibtex": "@inproceedings{\ngehrmann2023tata,\ntitle={Ta{TA}: A Multilingual Table-to-Text Dataset for African Languages},\nauthor={Sebastian Gehrmann and Sebastian Ruder and Vitaly Nikolaev and Jan A. Botha and Michael Chavinda and Ankur P Parikh and Clara E. Rivera},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=uyl1O2LkAF}\n}", "github": "", "project": "", "reviewers": "KJRS;qzSY;RmMM", "site": "https://openreview.net/forum?id=uyl1O2LkAF", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;1;3", "excitement": "3;4;3", "reproducibility": "2;4;3", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 2.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;", "linkedin": ";;;;ankur-parikh-2a240979;;sebastianruder", "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "Bloomberg;Google", "aff_unique_dep": ";Google Research", "aff_unique_url": "https://www.bloomberg.com;https://research.google", "aff_unique_abbr": "Bloomberg;Google", "aff_campus_unique_index": "1;1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "uz4OrlHDA8", "title": "Predict the Future from the Past? On the Temporal Data Distribution Shift in Financial Sentiment Classifications", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Temporal data distribution shift is prevalent in the financial text. How can a financial sentiment analysis system be trained in a volatile market environment that can accurately infer sentiment and be robust to temporal data distribution shifts? In this paper, we conduct an empirical study on the financial sentiment analysis system under temporal data distribution shifts using a real-world financial social media dataset that spans three years. We find that the fine-tuned models suffer from general performance degradation in the presence of temporal distribution shifts. Furthermore, motivated by the unique temporal nature of the financial text, we propose a novel method that combines out-of-distribution detection with time series modeling for temporal financial sentiment analysis. Experimental results show that the proposed method enhances the model's capability to adapt to evolving temporal shifts in a volatile financial market.", "keywords": "Distribution Shift;Financial Sentiment Classifications;Language Models Robustness", "primary_area": "", "supplementary_material": "", "author": "Yue Guo;Chenxi Hu;Yi Yang", "authorids": "~Yue_Guo4;~Chenxi_Hu2;~Yi_Yang7", "gender": "F;;M", "homepage": "https://irenehere.github.io/;http://yya518.github.io/;", "dblp": ";;", "google_scholar": "ZhBvjJUAAAAJ;https://scholar.google.com.hk/citations?user=Prh_dHkAAAAJ;", "or_profile": "~Yue_Guo4;~Yi_Yang7;~Bruce_HU1", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;ust.hk;ust.hk", "position": "PhD student;Assistant Professor;Undergrad student", "bibtex": "@inproceedings{\nguo2023predict,\ntitle={Predict the Future from the Past? On the Temporal Data Distribution Shift in Financial Sentiment Classifications},\nauthor={Yue Guo and Chenxi Hu and Yi Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=uz4OrlHDA8}\n}", "github": "", "project": "", "reviewers": "EjqB;mkMy;MD99", "site": "https://openreview.net/forum?id=uz4OrlHDA8", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;2", "excitement": "3;4;4", "reproducibility": "3;4;2", "correctness": "3;4;3", "rating_avg": 5.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-8603-8904;0000-0001-8863-112X;", "linkedin": ";;bruce-hu-4172181b7", "aff_unique_index": "0;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "uz89EXE540", "title": "Self-Supervised Rule Learning to Link Text Segments to Relational Elements of Structured Knowledge", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We present a neuro-symbolic approach to self-learn rules that serve as interpretable knowledge to perform relation linking in knowledge base question answering systems. These rules define natural language text predicates as a weighted mixture of knowledge base paths. The weights learned during training effectively serve the mapping needed to perform relation linking. We use popular masked training strategy to self-learn the rules. A key distinguishing aspect of our work is that the masked training operate over logical forms of the sentence instead of their natural language text form. This offers opportunity to extract extended context information from the structured knowledge source and use that to build robust and human readable rules. We evaluate accuracy and usefulness of such learned rules by utilizing them for prediction of missing kinship relation in CLUTRR dataset and relation linking in a KBQA system using SWQ-WD dataset. Results demonstrate the effectiveness of our approach - its generalizability, interpretability and ability to achieve an average performance gain of 17% on CLUTRR dataset.", "keywords": "self-supervised learning;rule learning;relation linking;question answering", "primary_area": "", "supplementary_material": "", "author": "Shajith Ikbal;Udit Sharma;Hima Karanam;Sumit Neelam;Ronny Luss;Dheeraj Sreedhar;Pavan Kapanipathi;Naweed Khan;Kyle Erwin;Ndivhuwo Makondo;Ibrahim Abdelaziz;Achille Fokoue;Alexander G. Gray;Maxwell Crouse;Subhajit Chaudhury;Chitra K Subramanian", "authorids": "~Shajith_Ikbal1;~Udit_Sharma1;~Hima_Karanam1;~Sumit_Neelam1;~Ronny_Luss1;~Dheeraj_Sreedhar1;~Pavan_Kapanipathi1;~Naweed_Khan1;~Kyle_Erwin1;~Ndivhuwo_Makondo1;~Ibrahim_Abdelaziz2;~Achille_Fokoue1;~Alexander_G._Gray1;~Maxwell_Crouse1;~Subhajit_Chaudhury1;~Chitra_K_Subramanian1", "gender": "M;M;M;M;;M;M;M;M;M;M;M;;M;;", "homepage": "https://sites.google.com/site/shajithikbal/;;;;;;https://researcher.watson.ibm.com/researcher/view.php?person=us-kapanipa;;;https://researcher.watson.ibm.com/researcher/view.php?person=ibm-Ndivhuwo.Makondo;https://researcher.watson.ibm.com/researcher/view.php?person=us-achille;;;https://subhajitchaudhury.github.io/;https://researcher.watson.ibm.com/researcher/view.php?person=us-cksubram;", "dblp": "66/4370;96/892;230/8590;149/5874.html;80/75;;27/8503;234/1513.html;;173/7534;13/2150;85/110.html;218/7208;http://dblp2.uni-trier.de/pers/hd/c/Chaudhury:Subhajit;279/2973;153/1958.html", "google_scholar": "yYN1agcAAAAJ;dkEtWZwAAAAJ;8HycjgoAAAAJ;K_mOIxsAAAAJ;lBPWZdAAAAAJ;tg910CkAAAAJ;ZnHk2x8AAAAJ;CXaTRZUAAAAJ;BV2U-GQAAAAJ;https://scholar.google.co.uk/citations?user=A-zYKGYAAAAJ;sf_8K8gAAAAJ;https://scholar.google.com/citations?hl=en;dagUw4MAAAAJ;https://scholar.google.co.jp/citations?user=EBTpFrQAAAAJ;;_kKCBlYAAAAJ", "or_profile": "~Shajith_Ikbal1;~Udit_Sharma1;~Hima_Karanam1;~Sumit_Neelam1;~Ronny_Luss1;~Dheeraj_Sreedhar1;~Pavan_Kapanipathi1;~Naweed_Khan1;~Kyle_Erwin1;~Ndivhuwo_Makondo1;~Achille_Fokoue1;~Alexander_G._Gray1;~Maxwell_Crouse1;~Subhajit_Chaudhury1;~Chitra_K_Subramanian1;~Ibrahim_Abdelaziz1", "aff": "IBM Research AI, India;International Business Machines;International Business Machines;International Business Machines;IBM;International Business Machines;International Business Machines;International Business Machines;International Business Machines;University of the Witwatersrand;International Business Machines;International Business Machines;International Business Machines;International Business Machines;International Business Machines;International Business Machines (IBM)", "aff_domain": "ibm.com;ibm.com;ibm.com;ibm.com;us.ibm.com;ibm.com;ibm.com;ibm.com;ibm.com;wits.ac.za;ibm.com;ibm.com;ibm.com;ibm.com;ibm.com;ibm.com", "position": "Senior Research Scientist;Software Engineer;Researcher;Researcher;Research Scientist;Researcher;Principal Researcher;Researcher;Researcher;Postdoc;Principal Researcher;VP, Foundations of AI;Researcher;Research Scientist;Researcher;Researcher", "bibtex": "@inproceedings{\nikbal2023selfsupervised,\ntitle={Self-Supervised Rule Learning to Link Text Segments to Relational Elements of Structured Knowledge},\nauthor={Shajith Ikbal and Udit Sharma and Hima Karanam and Sumit Neelam and Ronny Luss and Dheeraj Sreedhar and Pavan Kapanipathi and Naweed Khan and Kyle Erwin and Ndivhuwo Makondo and Ibrahim Abdelaziz and Achille Fokoue and Alexander G. Gray and Maxwell Crouse and Subhajit Chaudhury and Chitra K Subramanian},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=uz89EXE540}\n}", "github": "", "project": "", "reviewers": "aFLf;CrYn;d5MH", "site": "https://openreview.net/forum?id=uz89EXE540", "pdf_size": 0, "rating": "2;2;2", "confidence": "3;2;3", "excitement": "3;4;4", "reproducibility": "3;1;3", "correctness": "3;2;2", "rating_avg": 2.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 2.3333333333333335, "replies_avg": 11, "authors#_avg": 16, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;0000-0002-5525-1128;;0000-0002-4147-3328;;0000-0003-0337-7359;;;0009-0009-8745-9693;", "linkedin": "shajithikbal/;uditsharma7/;;;;;;naweedaghmad/;;makondond/;;alexander-gray-b554b64/;;subhajit-chaudhury-24955455/;;", "aff_unique_index": "0;1;1;1;1;1;1;1;1;2;1;1;1;1;1;3", "aff_unique_norm": "IBM;International Business Machines Corporation;University of the Witwatersrand;International Business Machines", "aff_unique_dep": "AI;;;", "aff_unique_url": "https://www.ibm.com/research;https://www.ibm.com;https://www.wits.ac.za;https://www.ibm.com", "aff_unique_abbr": "IBM;IBM;Wits;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1;1;1;1;1;2;1;1;1;1;1;1", "aff_country_unique": "India;United States;South Africa" }, { "id": "v15z3FzZGu", "title": "Adapter Pruning using Tropical Characterization", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Adapters are widely popular parameter-efficient transfer learning approaches in natural language processing that insert trainable modules in between layers of a pre-trained language model. Apart from several heuristics, however, there has been a lack of studies analyzing the optimal number of adapter parameters needed for downstream applications. Thus, we propose an adapter pruning approach by studying the tropical characteristics of trainable modules. We cast it as an optimization problem that aims to prune parameters from the adapter layers without changing the orientation of underlying tropical hypersurfaces. Our experiments on five NLP datasets show that tropical geometry tends to identify more relevant parameters to prune when compared with the magnitude-based baseline, while a combined approach works best across the tasks.", "keywords": "Pruning;Adapters;Parameter Efficient Transfer Learning;Domain Adaptation", "primary_area": "", "supplementary_material": "", "author": "Rishabh Bhardwaj;Tushar Vaidya;Soujanya Poria", "authorids": "~Rishabh_Bhardwaj1;~Tushar_Vaidya1;~Soujanya_Poria1", "gender": "M;M;M", "homepage": "https://www.rishabh.ai/;https://sites.google.com/view/tusharvfm;https://soujanyaporia.github.io", "dblp": "245/1413.html;199/1756.html;116/4904", "google_scholar": "nomHn1sAAAAJ;shwb3OMAAAAJ;https://scholar.google.co.in/citations?user=oS6gRc4AAAAJ", "or_profile": "~Rishabh_Bhardwaj1;~Tushar_Vaidya1;~Soujanya_Poria1", "aff": "Singapore University of Technology and Design;Nanyang Technological University;Singapore University of Technology and Design", "aff_domain": "sutd.edu.sg;ntu.edu.sg;sutd.edu.sg", "position": "PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\nbhardwaj2023adapter,\ntitle={Adapter Pruning using Tropical Characterization},\nauthor={Rishabh Bhardwaj and Tushar Vaidya and Soujanya Poria},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=v15z3FzZGu}\n}", "github": "", "project": "", "reviewers": "pb9m;rtfk;28EZ;2vRX", "site": "https://openreview.net/forum?id=v15z3FzZGu", "pdf_size": 0, "rating": "2;2;2;2", "confidence": "4;4;3;2", "excitement": "3;3;4;4", "reproducibility": "3;3;3;3", "correctness": "3;4;3;3", "rating_avg": 2.0, "confidence_avg": 3.25, "excitement_avg": 3.5, "reproducibility_avg": 3.0, "correctness_avg": 3.25, "replies_avg": 13, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-3833-4754;0000-0002-2264-2595;", "linkedin": "rishabh-bhardwaj-nlp/;tushar-vaidya-730a9713/;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Singapore University of Technology and Design;Nanyang Technological University", "aff_unique_dep": ";", "aff_unique_url": "https://www.sutd.edu.sg;https://www.ntu.edu.sg", "aff_unique_abbr": "SUTD;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "id": "v2wbkddf52", "title": "Quality Estimation-Assisted Automatic Post-Editing", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Automatic Post-Editing (APE) systems are prone to over-correction of the Machine Translation (MT) outputs. While Word-level Quality Estimation (QE) system can provide a way to curtail the over-correction, a significant performance gain has not been observed thus far by utilizing existing APE and QE combination strategies. In this paper, we propose joint training of a model on APE and QE tasks to improve the APE. Our proposed approach utilizes a multi-task learning (MTL) methodology, which shows significant improvement while treating both tasks as a 'bargaining game' during training. Moreover, we investigate various existing combination strategies and show that our approach achieves state-of-the-art performance for a 'distant' language pair, viz., English-Marathi. We observe an improvement of 1.09 TER and 1.37 BLEU points over a baseline QE-Unassisted APE system for English-Marathi, while also observing 0.46 TER and 0.62 BLEU points for English-German. Further, we discuss the results qualitatively and show how our approach helps reduce over-correction, thereby improving the APE performance. We also observe that the degree of integration between QE and APE directly correlates with the APE performance gain. We release our code and models publicly.", "keywords": "Automaric Post-Editing;Quality Estimation;Multi-task Learning", "primary_area": "", "supplementary_material": "", "author": "Sourabh Dattatray Deoghare;Diptesh Kanojia;Fred Blain;Tharindu Ranasinghe;Pushpak Bhattacharyya", "authorids": "~Sourabh_Dattatray_Deoghare1;~Diptesh_Kanojia1;~Fred_Blain1;~Tharindu_Ranasinghe1;~Pushpak_Bhattacharyya1", "gender": "M;M;M;M;M", "homepage": ";http://dipteshkanojia.github.io;https://tharindu.co.uk/;https://www.cse.iitb.ac.in/~pb/;https://fredblain.org", "dblp": "299/7314;127/0183;242/4755;p/PushpakBhattacharyya;97/11423", "google_scholar": "BpqJ8mYAAAAJ;https://scholar.google.co.in/citations?user=UNCgCAEAAAAJ;https://scholar.google.co.uk/citations?user=9t7WhIIAAAAJ;https://scholar.google.com.tw/citations?user=vvg-pAkAAAAJ;https://scholar.google.fr/citations?user=sgBANaQAAAAJ", "or_profile": "~Sourabh_Dattatray_Deoghare1;~Diptesh_Kanojia1;~Tharindu_Ranasinghe1;~Pushpak_Bhattacharyya1;~Frederic_Blain1", "aff": "Indian Institute of Technology, Bombay;University of Surrey;Aston University;Indian Institute of Technology, Bombay, Dhirubhai Ambani Institute Of Information and Communication Technology;Tilburg University", "aff_domain": "iitb.ac.in;surrey.ac.uk;aston.ac.uk;iitb.ac.in;tilburguniversity.edu", "position": "PhD student;Lecturer;Lecturer;Full Professor;Assistant Professor", "bibtex": "@inproceedings{\ndeoghare2023quality,\ntitle={Quality Estimation-Assisted Automatic Post-Editing},\nauthor={Sourabh Dattatray Deoghare and Diptesh Kanojia and Fred Blain and Tharindu Ranasinghe and Pushpak Bhattacharyya},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=v2wbkddf52}\n}", "github": "", "project": "", "reviewers": "aQPt;WKNZ;R7xZ", "site": "https://openreview.net/forum?id=v2wbkddf52", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;2;4", "excitement": "3;4;3", "reproducibility": "4;3;4", "correctness": "4;4;3", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-8814-0080;0000-0003-3207-3821;;0000-0003-3017-3722", "linkedin": ";dipteshkanojia/;tharinduranasinghe/;pushpakbh/?originalSubdomain=in;https://linkedin.com/in/fredblain", "aff_unique_index": "0;1;2;3;4", "aff_unique_norm": "Indian Institute of Technology Bombay;University of Surrey;Aston University;Indian Institute of Technology, Bombay;Tilburg University", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.iitb.ac.in;https://www.surrey.ac.uk;https://www.aston.ac.uk;https://www.iitb.ac.in;https://www.tilburguniversity.edu/", "aff_unique_abbr": "IIT Bombay;Surrey;Aston;IIT Bombay;Tilburg U", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Bombay;", "aff_country_unique_index": "0;1;1;0;2", "aff_country_unique": "India;United Kingdom;Netherlands" }, { "id": "v6VbokqzvP", "title": "R$^3$ Prompting: Review, Rephrase and Resolve for Chain-of-Thought Reasoning in Large Language Models under Noisy Context", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "With the help of Chain-of-Thought (CoT) prompting, Large Language Models (LLMs) have achieved remarkable performance on various reasoning tasks.\nHowever, most of them have been evaluated under noise-free context and the dilemma for LLMs to produce inaccurate results under the noisy context has not been fully investigated.\nExisting studies utilize trigger sentences to encourage LLMs to concentrate on the relevant information but the trigger has limited effect on final answer prediction.\nInspired by interactive CoT method, where intermediate reasoning steps are promoted by multiple rounds of interaction between users and LLMs, we propose a novel prompting method, namely R$^3$ prompting, for CoT reasoning under noisy context.\nSpecifically, R$^3$ prompting interacts with LLMs to perform key sentence extraction, variable declaration and answer prediction, which corresponds to a thought process of reviewing, rephrasing and resolving.\nThe responses generated at the last interaction will perform as hints to guide toward the responses of the next interaction.\nOur experiments show that R$^3$ prompting significantly outperforms existing CoT prompting methods on five reasoning tasks under noisy context.\nWith GPT-3.5-turbo, we observe 3.7\\% accuracy improvement on average on the reasoning tasks under noisy context compared to the most competitive prompting baseline.\nMore analyses and ablation studies show the robustness and generalization of R$^3$ prompting method in solving reasoning tasks in LLMs under noisy context.", "keywords": "Chain-of-Thought;Large Language Models;Arithmetic Reasoning;Prompt Learning", "primary_area": "", "supplementary_material": "", "author": "Qingyuan Tian;Hanlun Zhu;Lei Wang;Yang Li;Yunshi Lan", "authorids": "~Qingyuan_Tian1;~Hanlun_Zhu1;~Lei_Wang28;~Yang_Li45;~Yunshi_Lan1", "gender": "M;M;M;M;F", "homepage": "https://mattian7.github.io/about/;https://github.com/timberflow;https://demoleiwang.github.io/HomePage/;;https://lanyunshi.github.io", "dblp": ";359/0727;;;185/6830.html", "google_scholar": ";-yKPeckAAAAJ;VidA02oAAAAJ;AeCTbv8AAAAJ;Q0F92XIAAAAJ", "or_profile": "~Qingyuan_Tian1;~Hanlun_Zhu1;~Lei_Wang28;~Yang_Li45;~Yunshi_Lan1", "aff": ";East China Normal University;Singapore Management University;Alibaba Group;East China Normal University", "aff_domain": ";stu.ecnu.edu.cn;smu.edu.sg;alibaba-inc.com;ecnu.edu.cn", "position": ";Undergrad student;PhD student;Researcher;Associate Professor", "bibtex": "@inproceedings{\ntian2023r,\ntitle={R\\${\\textasciicircum}3\\$ Prompting: Review, Rephrase and Resolve for Chain-of-Thought Reasoning in Large Language Models under Noisy Context},\nauthor={Qingyuan Tian and Hanlun Zhu and Lei Wang and Yang Li and Yunshi Lan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=v6VbokqzvP}\n}", "github": "", "project": "", "reviewers": "FVdJ;56UP;k3pf", "site": "https://openreview.net/forum?id=v6VbokqzvP", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;3", "excitement": "3;2;3", "reproducibility": "5;4;4", "correctness": "3;4;4", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-0192-8498", "linkedin": ";;;;", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "East China Normal University;Singapore Management University;Alibaba Group", "aff_unique_dep": ";;", "aff_unique_url": "http://www.ecnu.edu.cn;https://www.smu.edu.sg;https://www.alibaba.com", "aff_unique_abbr": "ECNU;SMU;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "China;Singapore" }, { "id": "v6hcCtzAWz", "title": "Simple Hardware-Efficient PCFGs with Independent Left and Right Productions", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Scaling dense PCFGs to thousands of nonterminals via low-rank parameterizations of the rule probability tensor has been shown to be beneficial for unsupervised parsing. However, PCFGs scaled this way still perform poorly as a language model, and even underperform similarly-sized HMMs. This work introduces $\\emph{SimplePCFG}$, a simple PCFG formalism with independent left and right productions. Despite imposing a stronger independence assumption than the low-rank approach, we find that this formalism scales more effectively both as a language model and as an unsupervised parser. We further introduce $\\emph{FlashInside}$, a hardware IO-aware implementation of the inside algorithm for efficiently scaling simple PCFGs. Through extensive experiments on multiple grammar induction benchmarks, we validate the effectiveness of simple PCFGs over low-rank baselines.", "keywords": "grammar induction;unsupervised parsing;latent variable models", "primary_area": "", "supplementary_material": "", "author": "Wei Liu;Songlin Yang;Yoon Kim;Kewei Tu", "authorids": "~Wei_Liu25;~Songlin_Yang1;~Yoon_Kim1;~Kewei_Tu1", "gender": "M;F;;M", "homepage": "https://vpeterv.github.io/;https://sustcsonglin.github.io;https://people.csail.mit.edu/yoonkim/;https://faculty.sist.shanghaitech.edu.cn/faculty/tukw/", "dblp": "49/3283-131;;;22/918", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;1chlis0AAAAJ;n_ts4eYAAAAJ;5gi3Pm0AAAAJ", "or_profile": "~Wei_Liu25;~Songlin_Yang1;~Yoon_Kim1;~Kewei_Tu1", "aff": "ShanghaiTech University;ShanghaiTech University;Massachusetts Institute of Technology;ShanghaiTech University", "aff_domain": "shanghaitech.edu.cn;shanghaitech.edu.cn;mit.edu;shanghaitech.edu.cn", "position": "MS student;MS student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nliu2023simple,\ntitle={Simple Hardware-Efficient {PCFG}s with Independent Left and Right Productions},\nauthor={Wei Liu and Songlin Yang and Yoon Kim and Kewei Tu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=v6hcCtzAWz}\n}", "github": "", "project": "", "reviewers": "ZpL2;KFL6;vYhR", "site": "https://openreview.net/forum?id=v6hcCtzAWz", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "2;4;2", "reproducibility": "4;4;4", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-2195-2310;;;", "linkedin": ";;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "ShanghaiTech University;Massachusetts Institute of Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.shanghaitech.edu.cn;https://web.mit.edu", "aff_unique_abbr": "ShanghaiTech;MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United States" }, { "id": "v6iM1bO78t", "title": "Incorporating Worker Perspectives into MTurk Annotation Practices for NLP", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Current practices regarding data collection for natural language processing on Amazon Mechanical Turk (MTurk) often rely on a combination of studies on data quality and heuristics shared among NLP researchers. However, without considering the perspectives of MTurk workers, these approaches are susceptible to issues regarding workers' rights and poor response quality. We conducted a critical literature review and a survey of MTurk workers aimed at addressing open questions regarding best practices for fair payment, worker privacy, data quality, and considering worker incentives. We found that worker preferences are often at odds with received wisdom among NLP researchers. Surveyed workers preferred reliable, reasonable payments over uncertain, very high payments; reported frequently lying on demographic questions; and expressed frustration at having work rejected with no explanation. We also found that workers view some quality control methods, such as requiring minimum response times or Master's qualifications, as biased and largely ineffective. Based on the survey results, we provide recommendations on how future NLP studies may better account for MTurk workers' experiences in order to respect workers' rights and improve data quality.", "keywords": "data collection;annotators;mechanical turk;AI ethics;data quality", "primary_area": "", "supplementary_material": "", "author": "Olivia Huang;Eve Fleisig;Dan Klein", "authorids": "~Olivia_Huang1;~Eve_Fleisig1;~Dan_Klein1", "gender": "F;F;", "homepage": ";https://www.efleisig.com;http://people.eecs.berkeley.edu/~klein/", "dblp": ";276/0223;", "google_scholar": ";NHlxXzwAAAAJ;", "or_profile": "~Olivia_Huang1;~Eve_Fleisig1;~Dan_Klein1", "aff": "University of California, Berkeley;University of California, Berkeley;University of California, Berkeley", "aff_domain": "berkeley.edu;berkeley.edu;berkeley.edu", "position": "Undergrad student;PhD student;Full Professor", "bibtex": "@inproceedings{\nhuang2023incorporating,\ntitle={Incorporating Worker Perspectives into {MT}urk Annotation Practices for {NLP}},\nauthor={Olivia Huang and Eve Fleisig and Dan Klein},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=v6iM1bO78t}\n}", "github": "", "project": "", "reviewers": "sUkx;NbBA;PCxw", "site": "https://openreview.net/forum?id=v6iM1bO78t", "pdf_size": 0, "rating": "5;5;5", "confidence": "2;1;4", "excitement": "3;3;4", "reproducibility": "3;0;5", "correctness": "3;3;5", "rating_avg": 5.0, "confidence_avg": 2.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "olivia-huang-30/;eve-fleisig/;dan-klein/", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, Berkeley", "aff_unique_dep": "", "aff_unique_url": "https://www.berkeley.edu", "aff_unique_abbr": "UC Berkeley", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Berkeley", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "v7JgI9dny2", "title": "Simpler neural networks prefer subregular languages", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We apply a continuous relaxation of $L_0$ regularization (Louizos et al., 2017), which induces sparsity, to study the inductive biases of LSTMs. In particular, we are interested in the patterns of formal languages which are readily learned and expressed by LSTMs. Across a wide range of tests we find sparse LSTMs prefer subregular languages over regular languages and the strength of this preference increases as we increase the pressure for sparsity. Furthermore LSTMs which are trained on subregular languages have fewer non-zero parameters. We conjecture that this subregular bias in LSTMs is related to the cognitive bias for subregular language observed in human phonology which are both downstream of a simplicity bias in a suitable description language.", "keywords": "subregularity;formal language theory;minimum description length;inductive biases", "primary_area": "", "supplementary_material": "", "author": "Charles John Torres;Richard Futrell", "authorids": "~Charles_John_Torres1;~Richard_Futrell2", "gender": "M;Not Specified", "homepage": "https://www.charles-torres.com;http://socsci.uci.edu/~rfutrell", "dblp": ";169/3172", "google_scholar": "fDE8LUcAAAAJ;BzI4ynUAAAAJ", "or_profile": "~Charles_John_Torres1;~Richard_Futrell2", "aff": "University of California, Irvine;University of California, Irvine", "aff_domain": "uci.edu;uci.edu", "position": "PhD student;Associate Professor", "bibtex": "@inproceedings{\ntorres2023simpler,\ntitle={Simpler neural networks prefer subregular languages},\nauthor={Charles John Torres and Richard Futrell},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=v7JgI9dny2}\n}", "github": "", "project": "", "reviewers": "eZgg;ABqy;Vqqg", "site": "https://openreview.net/forum?id=v7JgI9dny2", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;2", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "University of California, Irvine", "aff_unique_dep": "", "aff_unique_url": "https://www.uci.edu", "aff_unique_abbr": "UCI", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Irvine", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "v8fRIzqeob", "title": "Exploring the Cognitive Knowledge Structure of Large Language Models: An Educational Diagnostic Assessment Approach", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Large Language Models (LLMs) have not only exhibited exceptional performance across various tasks, but also demonstrated sparks of intelligence. Recent studies have focused on assessing their capabilities on human exams and revealed their impressive competence in different domains. However, cognitive research on the overall knowledge structure of LLMs is still lacking. In this paper, based on educational diagnostic assessment method, we conduct an evaluation using MoocRadar, a meticulously annotated human test dataset based on Bloom Taxonomy. We aim to reveal the knowledge structures of LLMs and gain insights of their cognitive capabilities. This research emphasizes the significance of investigating LLMs' knowledge and understanding the disparate cognitive patterns of LLMs. By shedding light on models' knowledge, researchers can advance development and utilization of LLMs in a more informed and effective manner.", "keywords": "large language models;diagnostic assessment;knowledge structure", "primary_area": "", "supplementary_material": "", "author": "Zheyuan Zhang;Jifan Yu;Juanzi Li;Lei Hou", "authorids": "~Zheyuan_Zhang3;~Jifan_Yu2;~Juanzi_Li1;~Lei_Hou2", "gender": "M;M;;M", "homepage": "https://sparrowzheyuan18.github.io/;https://yujifan0326.github.io/;;https://www.cs.tsinghua.edu.cn/csen/info/1305/4466.htm", "dblp": ";239/6130.html;;32/5685-1", "google_scholar": "https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?hl=zh-CN;;YnIq4hsAAAAJ", "or_profile": "~Zheyuan_Zhang3;~Jifan_Yu2;~Juanzi_Li1;~Lei_Hou2", "aff": "Tsinghua University;;;Tsinghua University", "aff_domain": "mails.tsinghua.edu.cn;;;tsinghua.edu.cn", "position": "MS student;;;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023exploring,\ntitle={Exploring the Cognitive Knowledge Structure of Large Language Models: An Educational Diagnostic Assessment Approach},\nauthor={Zheyuan Zhang and Jifan Yu and Juanzi Li and Lei Hou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=v8fRIzqeob}\n}", "github": "", "project": "", "reviewers": "rWPM;Hzpw;XMhD", "site": "https://openreview.net/forum?id=v8fRIzqeob", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "4;4;4", "reproducibility": "3;3;3", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-3471-0572;0000-0003-3430-4048;;0000-0002-8907-3526", "linkedin": ";;;", "aff_unique_index": "0;0", "aff_unique_norm": "Tsinghua University", "aff_unique_dep": "", "aff_unique_url": "https://www.tsinghua.edu.cn", "aff_unique_abbr": "THU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "v9CVjuNlDI", "title": "Breaking Boundaries in Retrieval Systems: Unsupervised Domain Adaptation with Denoise-Finetuning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Dense retrieval models have exhibited remarkable effectiveness, but they rely on abundant labeled data and face challenges when applied to different domains. Previous domain adaptation methods have employed generative models to generate pseudo queries, creating pseudo datasets to enhance the performance of dense retrieval models. However, these approaches typically use unadapted rerank models, leading to potentially imprecise labels. In this paper, we demonstrate the significance of adapting the rerank model to the target domain prior to utilizing it for label generation. This adaptation process enables us to obtain more accurate labels, thereby improving the overall performance of the dense retrieval model. Additionally, by combining the adapted retrieval model with the adapted rerank model, we achieve significantly better domain adaptation results across three retrieval datasets. We release our code for future research.", "keywords": "Information Retrieval;Domain adaptation;Denoise-finetuning;Unsupervised", "primary_area": "", "supplementary_material": "", "author": "Che Wei Chen;Ching Wen Yang;Chun-Yi Lin;Hung-Yu Kao", "authorids": "~Che_Wei_Chen1;~Ching_Wen_Yang1;~Chun-Yi_Lin1;~Hung-Yu_Kao1", "gender": "M;F;M;M", "homepage": "https://eric88525.github.io/;;http://140.116.245.107/advisor.html;", "dblp": ";71/4577;64/5833.html;", "google_scholar": ";https://scholar.google.com/citations?view_op=list_works;https://scholar.google.com.tw/citations?user=X5Is2lAAAAAJ;https://scholar.google.com.tw/citations?user=8I_uKDAAAAAJ", "or_profile": "~Che_Wei_Chen1;~Ching_Wen_Yang1;~Hung-Yu_Kao1;~Chun_Yi_Lin1", "aff": "National Cheng Kung University;National Cheng Kung University;CSIE;National Cheng Kung University", "aff_domain": "ncku.edu.tw;ncku.edu.tw;csie.ncku.edu.tw;ncku.edu.tw", "position": "MS student;MS student;Full Professor;MS student", "bibtex": "@inproceedings{\nchen2023breaking,\ntitle={Breaking Boundaries in Retrieval Systems: Unsupervised Domain Adaptation with Denoise-Finetuning},\nauthor={Che Wei Chen and Ching Wen Yang and Chun-Yi Lin and Hung-Yu Kao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=v9CVjuNlDI}\n}", "github": "", "project": "", "reviewers": "XVNc;kWur;X5Zx;zViP", "site": "https://openreview.net/forum?id=v9CVjuNlDI", "pdf_size": 0, "rating": "2;2;2;2", "confidence": "2;1;4;2", "excitement": "3;2;3;4", "reproducibility": "2;2;4;4", "correctness": "2;3;3;4", "rating_avg": 2.0, "confidence_avg": 2.25, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-3825-9638;0000-0002-8890-8544;0000-0002-2582-2356", "linkedin": "chenchewei/;chingwenyang-06102020/;;%E6%9E%97-%E5%B3%BB%E6%AF%85-7ba67b268/", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "National Cheng Kung University;College of Computer Science and Information Engineering", "aff_unique_dep": ";", "aff_unique_url": "https://www.ncku.edu.tw;", "aff_unique_abbr": "NCKU;CSIE", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Taiwan;", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China;" }, { "id": "vBZ5eBdrgH", "title": "$\\textit{SelectNoise:}$ Unsupervised Noise Injection to Enable Zero-Shot Machine Translation for Extremely Low-resource Languages", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In this work, we focus on the task of machine translation (MT) from extremely low-resource language (ELRLs) to English. The unavailability of parallel data, lack of representation from large multilingual pre-trained models, and limited monolingual data hinder the development of MT systems for ELRLs. However, many ELRLs often share lexical similarities with high-resource languages (HRLs) due to factors such as dialectical variations, geographical proximity, and language structure. We utilize this property to improve cross-lingual signals from closely related HRL to enable MT for ELRLs. Specifically, we propose a novel unsupervised approach, $\\textit{SelectNoise}$, based on $\\textit{selective candidate extraction}$ and $\\textit{noise injection}$ to generate noisy HRLs training data. The noise injection acts as a regularizer, and the model trained with noisy data learns to handle lexical variations such as spelling, grammar, and vocabulary changes, leading to improved cross-lingual transfer to ELRLs. The selective candidates are extracted using BPE merge operations and edit operations, and noise injection is performed using greedy, top-p, and top-k sampling strategies. We evaluate the proposed model on 12 ELRLs from the FLORES-200 benchmark in a zero-shot setting across two language families. The proposed model outperformed all the strong baselines, demonstrating its efficacy. It has comparable performance with the supervised noise injection model. Our code and model are publicly available.", "keywords": "Machine Translation;Low resource languages;Noise Injection;Zero-shot;BPE", "primary_area": "", "supplementary_material": "", "author": "Maharaj Brahma;Kaushal Kumar Maurya;Maunendra Sankar Desarkar", "authorids": "~Maharaj_Brahma2;~Kaushal_Kumar_Maurya1;~Maunendra_Sankar_Desarkar1", "gender": ";M;M", "homepage": ";https://kaushal0494.github.io/;https://www.iith.ac.in/~maunendra/", "dblp": ";276/5025;46/8779", "google_scholar": ";eMb2l_kAAAAJ;https://scholar.google.co.in/citations?user=W8LJ-tEAAAAJ", "or_profile": "~Maharaj_Brahma2;~Kaushal_Kumar_Maurya1;~Maunendra_Sankar_Desarkar1", "aff": ";Indian Institute of Technology Hyderabad;Indian Institute of Technology, Hyderabad,", "aff_domain": ";iith.ac.in;iith.ac.in", "position": ";PhD student;Associate Professor", "bibtex": "@inproceedings{\nbrahma2023textitselectnoise,\ntitle={\\${\\textbackslash}textit\\{SelectNoise:\\}\\$ Unsupervised Noise Injection to Enable Zero-Shot Machine Translation for Extremely Low-resource Languages},\nauthor={Maharaj Brahma and Kaushal Kumar Maurya and Maunendra Sankar Desarkar},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=vBZ5eBdrgH}\n}", "github": "", "project": "", "reviewers": "2AWf;eEMd;4tcT", "site": "https://openreview.net/forum?id=vBZ5eBdrgH", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;4;3", "excitement": "2;3;3", "reproducibility": "4;4;3", "correctness": "4;3;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-1963-7338", "linkedin": ";kaushal-kumar-maurya-73016773/;https://in.linkedin.com/in/maunendra-sankar-desarkar-6a89907", "aff_unique_index": "0;0", "aff_unique_norm": "Indian Institute of Technology Hyderabad", "aff_unique_dep": "", "aff_unique_url": "https://www.iith.ac.in", "aff_unique_abbr": "IIT Hyderabad", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Hyderabad", "aff_country_unique_index": "0;0", "aff_country_unique": "India" }, { "id": "vDvFT7IX4O", "title": "Tree of Clarifications: Answering Ambiguous Questions with Retrieval-Augmented Large Language Models", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Questions in open-domain question answering are often ambiguous, allowing multiple interpretations. \nOne approach to handling them is to identify all possible interpretations of the ambiguous question (AQ) and to generate a long-form answer addressing them all, as suggested by Stelmakh et al., (2022). While it provides a comprehensive response without bothering the user for clarification, considering multiple dimensions of ambiguity and gathering corresponding knowledge remains a challenge.\nTo cope with the challenge, we propose a novel framework, Tree of Clarifications (ToC):\nIt recursively constructs a tree of disambiguations for the AQ---via few-shot prompting leveraging external knowledge---and uses it to generate a long-form answer.\nToC outperforms existing baselines on ASQA in a few-shot setup across the metrics, while surpassing fully-supervised baselines trained on the whole training set in terms of Disambig-F1 and Disambig-ROUGE. Code is available at https://github.com/gankim/tree-of-clarifications.", "keywords": "Question Answering;Large Language Model;Ambiguous QA;Open-domain QA", "primary_area": "", "supplementary_material": "", "author": "Gangwoo Kim;Sungdong Kim;Byeongguk Jeon;Joonsuk Park;Jaewoo Kang", "authorids": "~Gangwoo_Kim1;~Sungdong_Kim1;~Byeongguk_Jeon1;~Joonsuk_Park1;~Jaewoo_Kang1", "gender": "M;;M;M;M", "homepage": "https://gankim.github.io/;;http://www.joonsuk.org;https://dmis.korea.ac.kr;https://github.com/byeongGuks", "dblp": "264/0044;118/1568;50/9717;k/JaewooKang;", "google_scholar": "TmWGEFgAAAAJ;xKrSnDoAAAAJ;3SPMM3oAAAAJ;https://scholar.google.co.kr/citations?user=RaBZafQAAAAJ;", "or_profile": "~Gangwoo_Kim1;~Sungdong_Kim1;~Joonsuk_Park1;~Jaewoo_Kang1;~Byeong_Guk_Jeon1", "aff": "Microsoft;NAVER;University of Richmond;Korea University;Korea University", "aff_domain": "microsoft.com;navercorp.com;richmond.edu;korea.ac.kr;korea.ac.kr", "position": "Intern;Researcher;Assistant Professor;Full Professor;Undergrad student", "bibtex": "@inproceedings{\nkim2023tree,\ntitle={Tree of Clarifications: Answering Ambiguous Questions with Retrieval-Augmented Large Language Models},\nauthor={Gangwoo Kim and Sungdong Kim and Byeongguk Jeon and Joonsuk Park and Jaewoo Kang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=vDvFT7IX4O}\n}", "github": "", "project": "", "reviewers": "GVet;tfvE;UCkq;T734", "site": "https://openreview.net/forum?id=vDvFT7IX4O", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "3;4;4;3", "excitement": "4;4;4;4", "reproducibility": "3;4;4;4", "correctness": "4;4;3;4", "rating_avg": 5.0, "confidence_avg": 3.5, "excitement_avg": 4.0, "reproducibility_avg": 3.75, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-1182-4836;0000-0001-6798-9106;", "linkedin": "gangwoo-kim-96a699183/;;;;byeong-guk-jeon-4b62b627a/", "aff_unique_index": "0;1;2;3;3", "aff_unique_norm": "Microsoft;NAVER Corporation;University of Richmond;Korea University", "aff_unique_dep": "Microsoft Corporation;;;", "aff_unique_url": "https://www.microsoft.com;https://www.naver.com;https://www.richmond.edu;https://www.korea.ac.kr", "aff_unique_abbr": "Microsoft;NAVER;UR;KU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1;1", "aff_country_unique": "United States;South Korea" }, { "id": "vLoSutEAJM", "title": "The neural dynamics of word recognition and integration", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Listeners recognize and integrate words in rapid and noisy everyday speech by combining expectations about upcoming content with incremental sensory evidence. We present a computational model of word recognition which formalizes this perceptual process in Bayesian decision theory. We fit this model to explain scalp EEG signals recorded as subjects passively listened to a fictional story, revealing both the dynamics of the online auditory word recognition process and the neural correlates of the recognition and integration of words.\n\nThe model reveals distinct neural processing of words depending on whether or not they can be quickly recognized. While all words trigger a neural response characteristic of probabilistic integration \u2014 voltage modulations predicted by a word's surprisal in context \u2014 these modulations are amplified for words which require more than roughly 150 ms of input to be recognized. We observe no difference in the latency of these neural responses according to words' recognition times. Our results support a two-part model of speech comprehension, combining an eager and rapid process of word recognition with a temporally independent process of word integration.\nHowever, we also developed alternative models of the scalp EEG signal not incorporating word recognition dynamics which showed similar performance improvements. We discuss potential future modeling steps which may help to separate these hypotheses.", "keywords": "processing;word recognition;speech comprehension;EEG;neuroscience;cognitive neuroscience;cognitive science;psychology;time series", "primary_area": "", "supplementary_material": "", "author": "Jon Gauthier;Roger P. Levy", "authorids": "~Jon_Gauthier1;~Roger_P._Levy1", "gender": "M;M", "homepage": "http://foldl.me;http://www.mit.edu/~rplevy", "dblp": "177/8885;23/90", "google_scholar": "n7Z2vI4AAAAJ;i86O0SAAAAAJ", "or_profile": "~Jon_Gauthier1;~Roger_Levy1", "aff": "Massachusetts Institute of Technology;Massachusetts Institute of Technology", "aff_domain": "mit.edu;mit.edu", "position": "PhD student;Full Professor", "bibtex": "@inproceedings{\ngauthier2023the,\ntitle={The neural dynamics of word recognition and integration},\nauthor={Jon Gauthier and Roger P. Levy},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=vLoSutEAJM}\n}", "github": "", "project": "", "reviewers": "twWy;FAWT;oXeZ", "site": "https://openreview.net/forum?id=vLoSutEAJM", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;3", "excitement": "4;3;4", "reproducibility": "2;3;3", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-6118-5833;0000-0002-4493-8864", "linkedin": ";roger-levy-502a6011/", "aff_unique_index": "0;0", "aff_unique_norm": "Massachusetts Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "https://web.mit.edu", "aff_unique_abbr": "MIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "vMpmabFTFw", "title": "Learning to Compose Representations of Different Encoder Layers towards Improving Compositional Generalization", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recent studies have shown that sequence-to-sequence (seq2seq) models struggle with compositional generalization (CG), i.e., the ability to systematically generalize to unseen compositions of seen components. There is mounting evidence that one of the reasons hindering CG is the representation of the encoder uppermost layer is entangled, i.e., the syntactic and semantic representations of sequences are entangled. However, we consider that the previously identified representation entanglement problem is not comprehensive enough. Additionally, we hypothesize that the source keys and values representations passing into different decoder layers are also entangled.\nStarting from this intuition, we propose \\textsc{CompoSition} (\\textbf{Compo}se \\textbf{S}yntactic and Semant\\textbf{i}c Representa\\textbf{tion}s), an extension to seq2seq models which learns to compose representations of different encoder layers dynamically for different tasks, since recent studies reveal that the bottom layers of the Transformer encoder contain more syntactic information and the top ones contain more semantic information. Specifically, we introduce a \\textit{composed layer} between the encoder and decoder to compose different encoder layers' representations to generate specific keys and values passing into different decoder layers. \\textsc{CompoSition} achieves competitive results on two comprehensive and realistic benchmarks, which empirically demonstrates the effectiveness of our proposal. Codes are available at~\\url{https://github.com/thinkaboutzero/COMPOSITION}.", "keywords": "Compositional Generalization;Seq2Seq Models;Machine Translation;Semantic Parsing", "primary_area": "", "supplementary_material": "", "author": "Lei Lin;Shuangtao Li;Yafang Zheng;Biao Fu;shan liu;Yidong Chen;Xiaodong Shi", "authorids": "~Lei_Lin4;~Shuangtao_Li1;~Yafang_Zheng1;~Biao_Fu1;~shan_liu4;~Yidong_Chen2;~Xiaodong_Shi2", "gender": "M;M;F;M;F;M;M", "homepage": ";;https://scholar.google.com/citations?user=-q7ksUcAAAAJ&hl=zh-CN&oi=sra;;http://nlp.xmu.edu.cn/;http://nlp.xmu.edu.cn/teachers/ydchen/index_en.html;", "dblp": ";;;144/8117;;11/1492;73/5055", "google_scholar": "https://scholar.google.com/citations?hl=en;;;;;;", "or_profile": "~Lei_Lin4;~Shuangtao_Li1;~Yafang_Zheng1;~Biao_Fu1;~shan_liu4;~Yidong_Chen2;~Xiaodong_Shi2", "aff": "Xiamen University;Giant Network;Xiamen University;Xiamen University;Xiamen University;Xiamen University;Xiamen University, Tsinghua University", "aff_domain": "xmu.edu.cn;ztgame.com;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn;xmu.edu.cn", "position": "MS student;Researcher;MS student;MS student;MS student;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nlin2023learning,\ntitle={Learning to Compose Representations of Different Encoder Layers towards Improving Compositional Generalization},\nauthor={Lei Lin and Shuangtao Li and Yafang Zheng and Biao Fu and shan liu and Yidong Chen and Xiaodong Shi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=vMpmabFTFw}\n}", "github": "", "project": "", "reviewers": "XNv7;5kLD;VRuK;DCYU", "site": "https://openreview.net/forum?id=vMpmabFTFw", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;3;4", "excitement": "4;3;2;3", "reproducibility": "4;4;3;4", "correctness": "4;3;3;3", "rating_avg": 3.0, "confidence_avg": 3.75, "excitement_avg": 3.0, "reproducibility_avg": 3.75, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;", "linkedin": ";;;;;;", "aff_unique_index": "0;1;0;0;0;0;0", "aff_unique_norm": "Xiamen University;Giant Network", "aff_unique_dep": ";", "aff_unique_url": "https://www.xmu.edu.cn;", "aff_unique_abbr": "XMU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "vOX7Dfwo3v", "title": "Symbol tuning improves in-context learning in language models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We present symbol tuning - finetuning language models on in-context input-label pairs where natural language labels (e.g., \"positive/negative sentiment\") are replaced with arbitrary symbols (e.g., \"foo/bar\"). Symbol tuning leverages the intuition that when a model cannot use instructions or natural language labels to figure out a task, it must instead do so by learning the input-label mappings.\n\nWe experiment with symbol tuning across PaLM models up to 540B parameters and observe benefits across various settings. First, symbol tuning boosts performance on unseen in-context learning tasks and is much more robust to underspecified prompts, such as those without instructions or without natural language labels. Second, symbol-tuned models are much stronger at algorithmic reasoning tasks, with up to 18.2% better performance on the List Functions benchmark and up to 15.3% better performance on the Simple Turing Concepts benchmark. Finally, symbol-tuned models show large improvements in following flipped-labels presented in-context, meaning that they are more capable of using in-context information to override prior knowledge.", "keywords": "in-context learning;large language models;natural language processing", "primary_area": "", "supplementary_material": "", "author": "Jerry Wei;Le Hou;Andrew Kyle Lampinen;Xiangning Chen;Da Huang;Yi Tay;Xinyun Chen;Yifeng Lu;Denny Zhou;Tengyu Ma;Quoc V Le", "authorids": "~Jerry_Wei1;~Le_Hou1;~Andrew_Kyle_Lampinen1;~Xiangning_Chen1;~Da_Huang2;~Yi_Tay1;~Xinyun_Chen1;~Yifeng_Lu1;~Denny_Zhou1;~Tengyu_Ma1;~Quoc_V_Le1", "gender": "M;M;M;;M;M;M;M;;M;F", "homepage": "http://vision.cs.stonybrook.edu/~lehhou/home/index.html;https://github.com/google/BIG-bench;;;http://yitay.net;;http://ai.stanford.edu/~tengyuma/;;https://dennyzhou.github.io/;https://www.jerrywei.net;https://jungyhuk.github.io/", "dblp": "161/9892;https://dblp.uni-trier.de/pers/hd/l/Lampinen:Andrew_K=;56/7393;;;69/8051;54/9061;29/6166;178/3277;234/9076;", "google_scholar": "kQ0HeQIAAAAJ;_N44XxAAAAAJ;vNcBx1sAAAAJ;ZjuMpLoAAAAJ;VBclY_cAAAAJ;CM4o-cgAAAAJ;i38QlUwAAAAJ;;UwLsYw8AAAAJ;Y4sk3aMAAAAJ;d4W1UT0AAAAJ", "or_profile": "~Le_Hou1;~Andrew_Kyle_Lampinen1;~Xiangning_Chen1;~Da_Huang2;~Yi_Tay1;~Yifeng_Lu1;~Tengyu_Ma1;~Quoc_V_Le1;~Dengyong_Zhou2;~Jerry_Weng_Wei1;~Xinyun_Chen2", "aff": "Google Research;Google DeepMind;University of California, Los Angeles;Google;Google;Google Deepmind;Facebook AI Research;Google;Google DeepMind;Google;Google", "aff_domain": "google.com;google.com;cs.ucla.edu;google.com;google.com;google.com;fb.com;google.com;google.com;google.com;google.com", "position": "Software Engineer;Research Scientist;PhD student;Researcher;Research Scientist;Researcher;Visiting Scientist;Scientist;Research Scientist;Student Researcher;Researcher", "bibtex": "@inproceedings{\nwei2023symbol,\ntitle={Symbol tuning improves in-context learning in language models},\nauthor={Jerry Wei and Le Hou and Andrew Kyle Lampinen and Xiangning Chen and Da Huang and Yi Tay and Xinyun Chen and Yifeng Lu and Denny Zhou and Tengyu Ma and Quoc V Le},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=vOX7Dfwo3v}\n}", "github": "", "project": "", "reviewers": "MTeF;F3QV;43HS", "site": "https://openreview.net/forum?id=vOX7Dfwo3v", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;3", "excitement": "4;4;4", "reproducibility": "3;3;2", "correctness": "4;4;5", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 11, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-7323-5300;;;;;;;;;0009-0001-5279-0177;", "linkedin": ";;;;;;;;;jerryweiai/;", "aff_unique_index": "0;0;1;0;0;2;3;0;0;0;0", "aff_unique_norm": "Google;University of California, Los Angeles;DeepMind;Meta", "aff_unique_dep": "Google Research;;DeepMind;Facebook AI Research", "aff_unique_url": "https://research.google;https://www.ucla.edu;https://deepmind.com;https://research.facebook.com", "aff_unique_abbr": "Google Research;UCLA;DeepMind;FAIR", "aff_campus_unique_index": "0;2;0;0;0;0;0", "aff_campus_unique": "Mountain View;;Los Angeles", "aff_country_unique_index": "0;1;0;0;0;1;0;0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "vR1yERC0Wd", "title": "Global Structure Knowledge-Guided Relation Extraction Method for Visually-Rich Document", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Visual Relation Extraction (VRE) is a powerful means of discovering relationships between entities within visually-rich documents. Existing methods often focus on manipulating entity features to find pairwise relations, yet neglect the more fundamental structural information that links disparate entity pairs together. The absence of global structure information may make the model struggle to learn long-range relations and easily predict conflicted results. To alleviate such limitations, we propose a GlObal Structure knowledge-guided relation Extraction (GOSE) framework. GOSE initiates by generating preliminary relation predictions on entity pairs extracted from a scanned image of the document. Subsequently, global structural knowledge is captured from the preceding iterative predictions, which are then incorporated into the representations of the entities. This ``generate-capture-incorporate'' cycle is repeated multiple times, allowing entity representations and global structure knowledge to be mutually reinforced. Extensive experiments validate that GOSE not only outperforms existing methods in the standard fine-tuning setting but also reveals superior cross-lingual learning capabilities; indeed, even yields stronger data-efficient performance in the low-resource setting.", "keywords": "Visually-Rich Document;Visual relation extraction", "primary_area": "", "supplementary_material": "", "author": "Xiangnan Chen;Qian Xiao;Juncheng Li;Duo Dong;Jun Lin;Xiaozhong Liu;Siliang Tang", "authorids": "~Xiangnan_Chen1;~Qian_Xiao3;~Juncheng_Li3;~Duo_Dong1;~Jun_Lin2;~Xiaozhong_Liu2;~Siliang_Tang1", "gender": "M;M;M;M;M;M;M", "homepage": ";https://scholar.google.com/citations?hl=zh-CN&user=TFQfT-EAAAAJ&view_op=list_works&gmla=AHoSzlX_Flghn3lyXVVaeWvVxGWROTdNoql9k8caZnWlFkoPnO7c2xF3vv7u_AIaDi6uXXMIcxG8Y1zC77qMEiqytMtPlnvKVGDC06YEJzY;;;https://scholar.google.com/citations?user=DvAsN5QAAAAJ&hl=zh-CN;https://www.wpi.edu/people/faculty/xliu14;https://person.zju.edu.cn/en/siliang", "dblp": ";;182/7674-6;;;11/6389.html;44/5693", "google_scholar": "https://scholar.google.com.hk/citations?user=7tTO3b8AAAAJ;https://scholar.google.com/citations?hl=zh-CN;lm9s-QgAAAAJ;xPuqtm4AAAAJ;DvAsN5QAAAAJ;1BUByMcAAAAJ;8e7H3PcAAAAJ", "or_profile": "~Xiangnan_Chen1;~Qian_Xiao3;~Juncheng_Li3;~Duo_Dong1;~Jun_Lin2;~Xiaozhong_Liu2;~Siliang_Tang1", "aff": "Zhejiang University;Alibaba Group;Zhejiang University;Zhejiang University;Alibaba Group;Worcester Polytechnic Institute;Zhejiang University", "aff_domain": "zju.edu.cn;alibaba-inc.com;zju.edu.cn;zju.edu.cn;alibaba-inc.com;wpi.edu;zju.edu.cn", "position": "PhD student;Researcher;PhD student;MS student;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nchen2023global,\ntitle={Global Structure Knowledge-Guided Relation Extraction Method for Visually-Rich Document},\nauthor={Xiangnan Chen and Qian Xiao and Juncheng Li and Duo Dong and Jun Lin and Xiaozhong Liu and Siliang Tang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=vR1yERC0Wd}\n}", "github": "", "project": "", "reviewers": "5V4t;ZM3X;5vV3", "site": "https://openreview.net/forum?id=vR1yERC0Wd", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;3", "excitement": "3;3;3", "reproducibility": "3;3;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-2258-1291;;;;0000-0002-7356-9711", "linkedin": ";;;;;;siliang-tang-4734272a/", "aff_unique_index": "0;1;0;0;1;2;0", "aff_unique_norm": "Zhejiang University;Alibaba Group;Worcester Polytechnic Institute", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;https://www.alibaba.com;https://www.wpi.edu", "aff_unique_abbr": "ZJU;Alibaba;WPI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;1;0", "aff_country_unique": "China;United States" }, { "id": "vU0KbvQ91x", "title": "Learning to Abstract with Nonparametric Variational Information Bottleneck", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Learned representations at the level of characters, sub-words, words, and sentences, have each contributed to advances in understanding different NLP tasks and linguistic phenomena. However, learning textual embeddings is costly as they are tokenization specific and require different models to be trained for each level of abstraction. We introduce a novel language representation model which can learn to compress to different levels of abstraction at different layers of the same model. We apply Nonparametric Variational Information Bottleneck (NVIB) to stacked Transformer self-attention layers in the encoder, which encourages an information-theoretic compression of the representations through the model. \nWe find that the layers within the model correspond to increasing levels of abstraction\nand that their representations are more linguistically informed. Finally, we show that NVIB compression results in a model which is more robust to adversarial perturbations.", "keywords": "Representation Learning;Analysis of Neural Networks;Nonparametric Variational Information Bottleneck;Deep Learning", "primary_area": "", "supplementary_material": "", "author": "Melika Behjati;Fabio James Fehr;James Henderson", "authorids": "~Melika_Behjati1;~Fabio_James_Fehr1;~James_Henderson1", "gender": "F;M;M", "homepage": "https://www.idiap.ch/~mbehjati/;https://fjfehr.github.io/;http://idiap.ch/~jhenderson/", "dblp": "243/6574;315/4886.html;h/JamesHenderson.html", "google_scholar": "02sCdLEAAAAJ;WaZWY0wAAAAJ;CSib0ooAAAAJ", "or_profile": "~Melika_Behjati1;~Fabio_James_Fehr1;~James_Henderson1", "aff": "Idiap Research Institute;Idiap Research Institute;Idiap Research Institute", "aff_domain": "idiap.ch;idiap.ch;idiap.ch", "position": "PhD student;PhD student;Senior Researcher", "bibtex": "@inproceedings{\nbehjati2023learning,\ntitle={Learning to Abstract with Nonparametric Variational Information Bottleneck},\nauthor={Melika Behjati and Fabio James Fehr and James Henderson},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=vU0KbvQ91x}\n}", "github": "", "project": "", "reviewers": "bGq2;fPkp;2hkB;gprF;LT26;YN7E", "site": "https://openreview.net/forum?id=vU0KbvQ91x", "pdf_size": 0, "rating": "4;4;4;4;4;4", "confidence": "1;3;2;3;2;3", "excitement": "3;2;3;4;3;2", "reproducibility": "3;3;4;5;5;4", "correctness": "3;3;3;4;3;3", "rating_avg": 4.0, "confidence_avg": 2.3333333333333335, "excitement_avg": 2.8333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.1666666666666665, "replies_avg": 20, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-3714-4799", "linkedin": ";fabio-j-fehr;james-henderson-3b68346b/", "aff_unique_index": "0;0;0", "aff_unique_norm": "Idiap Research Institute", "aff_unique_dep": "", "aff_unique_url": "https://www.idiap.ch", "aff_unique_abbr": "Idiap", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "id": "vVdRgpC1Oh", "title": "An Empirical Study of Multimodal Model Merging", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Model merging (e.g., via interpolation or task arithmetic) fuses multiple models trained on different tasks to generate a multi-task solution. The technique has been proven successful in previous studies, where the models are trained on similar tasks and with the same initialization. In this paper, we expand on this concept to a multimodal setup by merging transformers trained on different modalities. Furthermore, we conduct our study for a novel goal where we can merge vision, language, and cross-modal transformers of a modality-specific architecture to create a parameter-efficient modality-agnostic architecture. Through comprehensive experiments, we systematically investigate the key factors impacting model performance after merging, including initialization, merging mechanisms, and model architectures. We also propose two metrics that assess the distance between weights to be merged and can serve as an indicator of the merging outcomes. Our analysis leads to an effective training recipe for matching the performance of the modality-agnostic baseline (i.e., pre-trained from scratch) via model merging. Our method also outperforms naive merging significantly on various tasks, with improvements of 3% on VQA, 7% on COCO retrieval, 25% on NLVR2, 14% on Flickr30k and 3% on ADE20k.", "keywords": "model merging; vision-and-language", "primary_area": "", "supplementary_material": "", "author": "Yi-Lin Sung;Linjie Li;Kevin Lin;Zhe Gan;Mohit Bansal;Lijuan Wang", "authorids": "~Yi-Lin_Sung1;~Linjie_Li1;~Kevin_Lin3;~Zhe_Gan1;~Mohit_Bansal2;~Lijuan_Wang1", "gender": "F;;M;M;F;M", "homepage": ";https://sites.google.com/site/kevinlin311tw/;http://zhegan27.github.io/;https://www.cs.unc.edu/~mbansal/;https://www.microsoft.com/en-us/research/people/lijuanw/;https://ylsung.github.io/", "dblp": "200/8256;;41/7845;32/5243.html;51/2527.html;212/7264", "google_scholar": "WR875gYAAAAJ;https://scholar.google.com.tw/citations?user=LKSy1kwAAAAJ;E64XWyMAAAAJ;DN8QtscAAAAJ;cDcWXuIAAAAJ;aW2XnF0AAAAJ", "or_profile": "~Linjie_Li1;~Kevin_Lin3;~Zhe_Gan1;~Mohit_Bansal2;~Lijuan_Wang1;~Yi_Lin_Sung1", "aff": "Microsoft;Microsoft;Apple;University of North Carolina at Chapel Hill;Microsoft;Department of Computer Science, University of North Carolina, Chapel Hill", "aff_domain": "microsoft.com;microsoft.com;apple.com;unc.edu;microsoft.com;cs.unc.edu", "position": "Researcher;Principal Researcher;Principal Researcher;Full Professor;Principal Researcher;PhD student", "bibtex": "@inproceedings{\nsung2023an,\ntitle={An Empirical Study of Multimodal Model Merging},\nauthor={Yi-Lin Sung and Linjie Li and Kevin Lin and Zhe Gan and Mohit Bansal and Lijuan Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=vVdRgpC1Oh}\n}", "github": "", "project": "", "reviewers": "Ms1f;v68G;KA3D", "site": "https://openreview.net/forum?id=vVdRgpC1Oh", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;5", "excitement": "3;3;3", "reproducibility": "5;4;4", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-8944-1336;;;;", "linkedin": ";;zhe-gan-a2229a78/;;;yi-lin-sung-41a427120/", "aff_unique_index": "0;0;1;2;0;2", "aff_unique_norm": "Microsoft;Apple;University of North Carolina", "aff_unique_dep": "Microsoft Corporation;Apple Inc.;", "aff_unique_url": "https://www.microsoft.com;https://www.apple.com;https://www.unc.edu", "aff_unique_abbr": "Microsoft;Apple;UNC", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Chapel Hill", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "vVrwnY76W1", "title": "Remember what you did so you know what to do next", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "We explore using the 6B parameter GPT-J language model to create a plan for a simulated robot to achieve 30 classes of goals in ScienceWorld, a text game simulator for elementary science experiments and for which previously published empirical work has shown large language models (LLM)s to be a poor fit (Wang et al., 2022). Using the Markov assumption, the LLM outperforms the state-of-the-art based on reinforcement learning by a factor of 1.4. When we fill the LLM\u2019s input buffer with as many prior steps as will fit, improvement rises to 3.3x. Even when training on only 6.5% of the training data, we observe a 2.3x improvement over the state-of-the-art. Our experiments show that performance varies widely across the 30 classes of actions, indicating that averaging over tasks can hide significant performance issues.", "keywords": "large language models;text games", "primary_area": "", "supplementary_material": "", "author": "Manuel Rafael Ciosici;Alex Hedges;Yash Kankanampati;Justin Andrew Martin;Marjorie Freedman;Ralph M. Weischedel", "authorids": "~Manuel_Rafael_Ciosici1;~Alex_Hedges1;~Yash_Kankanampati1;~Justin_Andrew_Martin1;~Marjorie_Freedman1;~Ralph_M._Weischedel1", "gender": "M;Not Specified;M;M;;M", "homepage": ";;;https://www.isi.edu/directory/jmartin/;https://www.isi.edu/people/mrf/about;", "dblp": "185/1013;283/5929;280/0847;;93/4232;15/4714", "google_scholar": "vseIg5YAAAAJ;;;;bVN5VwEAAAAJ;guhccUcAAAAJ", "or_profile": "~Manuel_Rafael_Ciosici1;~Alex_Hedges1;~Yash_Kankanampati1;~Justin_Andrew_Martin1;~Marjorie_Freedman1;~Ralph_M._Weischedel1", "aff": "USC/ISI;USC Information Sciences Institute;University of Southern California;USC ISI;USC/ISI;USC/ISI", "aff_domain": "isi.edu;isi.edu;usc.edu;isi.edu;isi.edu;isi.edu", "position": "Computer Scientist;Researcher;Research Engineer;Researcher;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nciosici2023remember,\ntitle={Remember what you did so you know what to do next},\nauthor={Manuel Rafael Ciosici and Alex Hedges and Yash Kankanampati and Justin Andrew Martin and Marjorie Freedman and Ralph M. Weischedel},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=vVrwnY76W1}\n}", "github": "", "project": "", "reviewers": "bA1q;4kDw;4gZk", "site": "https://openreview.net/forum?id=vVrwnY76W1", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "4;3;3", "reproducibility": "4;4;4", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-7768-738X;;;;;", "linkedin": "manuelciosici/;alexphedges;;;marjorie-freedman-37799722/;", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "University of Southern California", "aff_unique_dep": "", "aff_unique_url": "https://isi.usc.edu", "aff_unique_abbr": "USC", "aff_campus_unique_index": "0;2;2;0;0", "aff_campus_unique": "ISI;;Los Angeles", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "vW3TFDUKWl", "title": "Mitigating Backdoor Poisoning Attacks through the Lens of Spurious Correlation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Modern NLP models are often trained over large untrusted datasets, raising the potential for a malicious adversary to compromise model\nbehaviour. For instance, backdoors can be implanted through crafting training instances with a specific textual trigger and a target label. This paper posits that backdoor poisoning attacks exhibit a spurious correlation between simple text features and classification labels, and accordingly, proposes methods for mitigating spurious correlation as means of defence. Our empirical study reveals that the malicious triggers are highly correlated to their target labels; therefore such correlations are extremely distinguishable compared to those scores of benign features, and can be used to filter out potentially problematic instances. Compared with several existing defences, our defence method significantly reduces attack success rates across backdoor attacks, and in the case of insertion-based attacks, our method provides a near-perfect defence.", "keywords": "backdoor attack;backdoor defence;spurious correlation", "primary_area": "", "supplementary_material": "", "author": "Xuanli He;Qiongkai Xu;Jun Wang;Benjamin I. P. Rubinstein;Trevor Cohn", "authorids": "~Xuanli_He2;~Qiongkai_Xu1;~Jun_Wang29;~Benjamin_I._P._Rubinstein1;~Trevor_Cohn1", "gender": "M;M;M;M;M", "homepage": ";https://xuqiongkai.github.io;;http://www.bipr.net/;https://people.eng.unimelb.edu.au/tcohn/", "dblp": "182/1859;127/0174;125/8189;90/1092;66/4613", "google_scholar": "TU8t0iAAAAAJ;https://scholar.google.com.au/citations?user=wCer2WUAAAAJ;pW78ZCUAAAAJ;https://scholar.google.com.au/citations?user=hMG_gR4AAAAJ;https://scholar.google.com.au/citations?user=FCom398AAAAJ", "or_profile": "~Xuanli_He2;~Qiongkai_Xu1;~Jun_Wang29;~Benjamin_I._P._Rubinstein1;~Trevor_Cohn1", "aff": "University College London, University of London;University of Melbourne;University of Melbourne;The University of Melbourne;The University of Melbourne", "aff_domain": "ucl.ac.uk;unimelb.edu;unimelb.edu.au;unimelb.edu.au;unimelb.edu.au", "position": "Postdoc;Postdoc;PhD student;Associate Professor;Professor", "bibtex": "@inproceedings{\nhe2023mitigating,\ntitle={Mitigating Backdoor Poisoning Attacks through the Lens of Spurious Correlation},\nauthor={Xuanli He and Qiongkai Xu and Jun Wang and Benjamin I. P. Rubinstein and Trevor Cohn},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=vW3TFDUKWl}\n}", "github": "", "project": "", "reviewers": "Z6Ee;fRrP;Sdi4", "site": "https://openreview.net/forum?id=vW3TFDUKWl", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;5", "excitement": "3;4;3", "reproducibility": "5;5;4", "correctness": "4;3;2", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.666666666666667, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-3312-6825;;0000-0002-2947-6980;", "linkedin": ";;;benjaminrubinstein/;", "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "University College London;University of Melbourne", "aff_unique_dep": ";", "aff_unique_url": "https://www.ucl.ac.uk;https://www.unimelb.edu.au", "aff_unique_abbr": "UCL;UniMelb", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;1;1", "aff_country_unique": "United Kingdom;Australia" }, { "id": "vWol8k64op", "title": "A Dataset for Investigating the Impact of Context for Offensive Language Detection in Tweets", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Offensive language detection is crucial in natural language processing (NLP). We investigated the importance of context for detecting such language in reply tweets on Twitter, where the use of offensive language is widespread. We collected a Turkish tweet dataset where the target group was unvaccinated people during the Covid period. Tweets in the dataset were enriched with contextual information by adding the original tweet to which a particular tweet was posted as a reply. The dataset, which includes over 28,000 tweet-reply pairs, was manually labeled by human annotators and made publicly available. In addition, we compared the performance of different machine learning models with and without contextual information. Our results show that this type of contextual information was not very useful in improving the performance of the models in general, although it slightly increased the macro-averaged F1-score of certain models.", "keywords": "Offensive Language Detection;Twitter Dataset;Language Resources and Evaluation", "primary_area": "", "supplementary_material": "", "author": "Musa Nuri \u0130htiyar;\u00d6mer \u00d6zdemir;Mustafa Emre Ereng\u00fcl;Arzucan \u00d6zg\u00fcr", "authorids": "~Musa_Nuri_\u0130htiyar1;~\u00d6mer_\u00d6zdemir1;~Mustafa_Emre_Ereng\u00fcl1;~Arzucan_\u00d6zg\u00fcr1", "gender": "M;M;M;", "homepage": ";https://github.com/omerozdemir1;https://github.com/MustafaEmreErengul;", "dblp": ";;;26/6952", "google_scholar": ";;;https://scholar.google.com.tr/citations?user=8Kn4-EsAAAAJ", "or_profile": "~Musa_Nuri_\u0130htiyar1;~\u00d6mer_\u00d6zdemir1;~Mustafa_Emre_Ereng\u00fcl1;~Arzucan_\u00d6zg\u00fcr1", "aff": "Bo\u011fazi\u00e7i University;Bo\u011fazi\u00e7i University;Bo\u011fazi\u00e7i University;Bogazici University", "aff_domain": "boun.edu.tr;boun.edu.tr;boun.edu.tr;bogazici.edu.tr", "position": "MS student;Undergrad student;Undergrad student;Associate Professor", "bibtex": "@inproceedings{\ni{\\ensuremath{\\dot{}}}htiyar2023a,\ntitle={A Dataset for Investigating the Impact of Context for Offensive Language Detection in Tweets},\nauthor={Musa Nuri {\\.I}htiyar and {\\\"O}mer {\\\"O}zdemir and Mustafa Emre Ereng{\\\"u}l and Arzucan {\\\"O}zg{\\\"u}r},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=vWol8k64op}\n}", "github": "", "project": "", "reviewers": "UEvi;tdPF;xKL1;xHbP", "site": "https://openreview.net/forum?id=vWol8k64op", "pdf_size": 0, "rating": "2;2;2;2", "confidence": "4;4;4;4", "excitement": "4;3;2;3", "reproducibility": "2;4;3;5", "correctness": "4;3;2;3", "rating_avg": 2.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.5, "correctness_avg": 3.0, "replies_avg": 13, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0006-1271-8863;;;", "linkedin": ";https://www.linkedin.com/mwlite/in/omer-ozdemir1;mustafa-emre-erengul/;", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Bo\u011fazi\u00e7i University;Bogazici University", "aff_unique_dep": ";", "aff_unique_url": "https://www.boun.edu.tr;https://www.boun.edu.tr", "aff_unique_abbr": "BU;BU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "T\u00fcrkiye" }, { "id": "vWy66avGPR", "title": "Perceptual Structure in the absence of grounding: the impact of abstractedness and subjectivity in color language for LLMs", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "The need for grounding in language understanding is an active research topic. Previous work has suggested that color perception and color language appear as a suitable test bed to empirically study the problem, given its cognitive significance and showing that there is considerable alignment between a defined color space and the feature space defined by a language model. To further study this issue, we collect a large scale source of colors and their descriptions, containing almost a 1 million examples\n, and perform an empirical analysis to compare two kinds of alignments: (i) inter-space, by learning a mapping between embedding space and color space, and (ii) intra-space, by means of prompting comparatives between color descriptions. Our results show that while color space alignment holds for monolexemic, highly pragmatic color descriptions, this alignment drops considerably in the presence of examples that exhibit elements of real linguistic usage such as subjectivity and abstractedness, suggesting that grounding may be required in such cases.", "keywords": "Color language;grounding;language models", "primary_area": "", "supplementary_material": "", "author": "Pablo Loyola;Edison Marrese-Taylor;Andres Hoyos-Idrobo", "authorids": "~Pablo_Loyola2;~Edison_Marrese-Taylor2;~Andres_Hoyos-Idrobo1", "gender": ";;M", "homepage": ";;", "dblp": ";;", "google_scholar": ";;J3344dQAAAAJ", "or_profile": "~Pablo_Loyola2;~Edison_Marrese-Taylor2;~Andres_Hoyos-Idrobo1", "aff": ";;", "aff_domain": ";;", "position": ";;", "bibtex": "@inproceedings{\nloyola2023perceptual,\ntitle={Perceptual Structure in the absence of grounding: the impact of abstractedness and subjectivity in color language for {LLM}s},\nauthor={Pablo Loyola and Edison Marrese-Taylor and Andres Hoyos-Idrobo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=vWy66avGPR}\n}", "github": "", "project": "", "reviewers": "T8is;H3n7;tgng", "site": "https://openreview.net/forum?id=vWy66avGPR", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "3;4;4", "reproducibility": "4;4;4", "correctness": "2;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-1729-1927", "linkedin": ";;" }, { "id": "va7nzRsbA4", "title": "What Makes Chain-of-Thought Prompting Effective? A Counterfactual Study", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The effectiveness of Chain-of-thought prompting (CoT) has been widely recognized, but the underlying mechanisms behind its success, the reason why it just works for a wide range of tasks, remains an open question. To investigate this, we employ a counterfactual prompting approach, systematically manipulating elements of examples used in a few-shot prompt, and testing the consequences on model behavior. This allows us to understand the relative contributions of prompt elements such as symbols (digits, entities) and patterns (equations, sentence structure) on in-context learning. Our experiments with three different large language models (LLMs) reveal several key findings. First, the specific symbols used in the prompt do not significantly impact the model's performance. However, consistent patterns in examples and specifying text in style frequently found on the web are crucial. Second, our findings suggest that the necessity of accurate few-shot examples depends on their role in communicating task understanding. We identify tasks where inaccurate few-shot examples hurt and, surprisingly, tasks where they improve performance. Additionally, we find that the intermediate steps in CoT may not necessarily facilitate learning how to solve a task, but instead efficiently convey task understanding (what) to the model. Furthermore, CoT leverages LLMs to fill in missing commonsense information, particularly helping difficult reasoning problems and long-tail questions.", "keywords": "LLMs;Analysis;Chain-of-thought;Reasoning;Prompting;Few-shot Reasoning", "primary_area": "", "supplementary_material": "", "author": "Aman Madaan;Katherine Hermann;Amir Yazdanbakhsh", "authorids": "~Aman_Madaan1;~Katherine_Hermann1;~Amir_Yazdanbakhsh1", "gender": ";F;M", "homepage": "https://madaan.github.io;;https://www.ayazdan.com/", "dblp": "138/1043;254/1923;44/8745", "google_scholar": "jW9ts2cAAAAJ;owcAYmEAAAAJ;Vdu_sqwAAAAJ", "or_profile": "~Aman_Madaan1;~Katherine_Hermann1;~Amir_Yazdanbakhsh1", "aff": "Carnegie Mellon University;Google;Google Brain", "aff_domain": "cmu.edu;google.com;google.com", "position": "PhD student;Researcher;Researcher", "bibtex": "@inproceedings{\nmadaan2023what,\ntitle={What Makes Chain-of-Thought Prompting Effective? A Counterfactual Study},\nauthor={Aman Madaan and Katherine Hermann and Amir Yazdanbakhsh},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=va7nzRsbA4}\n}", "github": "", "project": "", "reviewers": "jgj8;xMnP;sLkz", "site": "https://openreview.net/forum?id=va7nzRsbA4", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "4;3;4", "reproducibility": "2;4;4", "correctness": "3;3;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-8199-7671", "linkedin": "amnmadaan/;;ayazdanb/", "aff_unique_index": "0;1;1", "aff_unique_norm": "Carnegie Mellon University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.cmu.edu;https://www.google.com", "aff_unique_abbr": "CMU;Google", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "vaKgq549Dy", "title": "FactKB: Generalizable Factuality Evaluation using Language Models Enhanced with Factual Knowledge", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Evaluating the factual consistency of automatically generated summaries is essential for the progress and adoption of reliable summarization systems. Despite recent advances, existing factuality evaluation models are not robust, being especially prone to entity and relation errors in new domains. We propose FactKB---a simple new approach to factuality evaluation that is generalizable across domains, in particular with respect to entities and relations. FactKB is based on language models pretrained using facts extracted from external knowledge bases. We introduce three types of complementary factuality pretraining objectives based on entity-specific facts, facts extracted from auxiliary knowledge about entities, and facts constructed compositionally through knowledge base walks. \nThe resulting factuality evaluation model achieves state-of-the-art performance on two in-domain news summarization benchmarks as well as on three out-of-domain scientific literature datasets. Further analysis of FactKB shows improved ability to detect erroneous entities and relations in summaries and is robust and easily generalizable across domains.", "keywords": "factuality evaluation;knowledge bases;summarization", "primary_area": "", "supplementary_material": "", "author": "Shangbin Feng;Vidhisha Balachandran;Yuyang Bai;Yulia Tsvetkov", "authorids": "~Shangbin_Feng1;~Vidhisha_Balachandran1;~Yuyang_Bai1;~Yulia_Tsvetkov1", "gender": "M;F;M;F", "homepage": "https://bunsenfeng.github.io/;https://vidhishanair.github.io/;https://leopoldwhite.github.io/;https://homes.cs.washington.edu/~yuliats/", "dblp": "295/9571;234/4867;261/0192;75/8157", "google_scholar": "Y3rLP9UAAAAJ;LgitgaIAAAAJ;J2O6M1AAAAAJ;SEDPkrsAAAAJ", "or_profile": "~Shangbin_Feng1;~Vidhisha_Balachandran1;~Yuyang_Bai1;~Yulia_Tsvetkov1", "aff": "University of Washington;Carnegie Mellon University;Xi'an Jiaotong University;Department of Computer Science, University of Washington", "aff_domain": "cs.washington.edu;cmu.edu;xjtu.edu.cn;cs.washington.edu", "position": "PhD student;PhD student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nfeng2023factkb,\ntitle={Fact{KB}: Generalizable Factuality Evaluation using Language Models Enhanced with Factual Knowledge},\nauthor={Shangbin Feng and Vidhisha Balachandran and Yuyang Bai and Yulia Tsvetkov},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=vaKgq549Dy}\n}", "github": "", "project": "", "reviewers": "BoeU;S3Ep;ttnU", "site": "https://openreview.net/forum?id=vaKgq549Dy", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "4;3;3", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-4133-1987;;;0000-0002-4634-7128", "linkedin": ";;;", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Washington;Carnegie Mellon University;Xi'an Jiao Tong University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.washington.edu;https://www.cmu.edu;https://www.xjtu.edu.cn", "aff_unique_abbr": "UW;CMU;XJTU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "United States;China" }, { "id": "vdLFYqupHA", "title": "Enhancing Uncertainty-Based Hallucination Detection with Stronger Focus", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Large Language Models (LLMs) have gained significant popularity for their impressive performance across diverse fields. However, LLMs are prone to hallucinate untruthful or nonsensical outputs that fail to meet user expectations in many real-world applications. Existing works for detecting hallucinations in LLMs either rely on external knowledge for reference retrieval or require sampling multiple responses from the LLM for consistency verification, making these methods costly and inefficient. In this paper, we propose a novel reference-free, uncertainty-based method for detecting hallucinations in LLMs. Our approach imitates human focus in factuality checking from three aspects: 1) focus on the most informative and important keywords in the given text; 2) focus on the unreliable tokens in historical context which may lead to a cascade of hallucinations; and 3) focus on the token properties such as token type and token frequency. Experimental results on relevant datasets demonstrate the effectiveness of our proposed method, which achieves state-of-the-art performance across all the evaluation metrics and eliminates the need for additional information.", "keywords": "hallucination detection;large language model;text generation", "primary_area": "", "supplementary_material": "", "author": "Tianhang Zhang;Lin Qiu;Qipeng Guo;Cheng Deng;Yue Zhang;Zheng Zhang;Chenghu Zhou;Xinbing Wang;Luoyi Fu", "authorids": "~Tianhang_Zhang1;~Lin_Qiu2;~Qipeng_Guo1;~Cheng_Deng4;~Yue_Zhang7;~Zheng_Zhang1;~Chenghu_Zhou3;~Xinbing_Wang1;~Luoyi_Fu1", "gender": "M;M;M;M;M;M;M;M;F", "homepage": ";;;https://www.cdeng.net/;http://frcchang.github.io;https://shanghai.nyu.edu/academics/faculty/directory/zheng-zhang;http://www.igsnrr.cas.cn/gkjj/ysfc/ysfc_zhouchenghu/;http://www.cs.sjtu.edu.cn/~wang-xb/;http://www.cs.sjtu.edu.cn/~fu-ly/index.html", "dblp": "173/9526;;172/1046;;47/722-4;;85/1324.html;96/1149.html;", "google_scholar": "xYyNYs8AAAAJ;U4GJuPIAAAAJ;k3mPGKgAAAAJ;0VFxZy0AAAAJ;;https://scholar.google.com.hk/citations?user=k0KiE4wAAAAJ;;https://scholar.google.com.tw/citations?user=CT5yZbwAAAAJ;https://scholar.google.com.tw/citations?user=xHs9mCUAAAAJ", "or_profile": "~Tianhang_Zhang1;~Lin_Qiu2;~Qipeng_Guo1;~Cheng_Deng4;~Yue_Zhang7;~Zheng_Zhang1;~Chenghu_Zhou3;~Xinbing_Wang1;~Luoyi_Fu1", "aff": "Shanghai Jiaotong University;Amazon;Amazon;Shanghai Jiaotong University;Westlake University;Amazon;IGSNRR, Chinese Academy of Sciences, Beijing, China;Shanghai Jiaotong University;Shanghai Jiaotong University", "aff_domain": "sjtu.edu.cn;amazon.com;amazon.com;sjtu.edu.cn;westlake.edu.cn;amazon.com;lreis.ac.cn;cs.sjtu.edu.cn;sjtu.edu.cn", "position": "MS student;Researcher;Researcher;PhD student;Full Professor;Senior Principal Scientist;Full Professor;Full Professor;Associate Professor", "bibtex": "@inproceedings{\nzhang2023enhancing,\ntitle={Enhancing Uncertainty-Based Hallucination Detection with Stronger Focus},\nauthor={Tianhang Zhang and Lin Qiu and Qipeng Guo and Cheng Deng and Yue Zhang and Zheng Zhang and Chenghu Zhou and Xinbing Wang and Luoyi Fu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=vdLFYqupHA}\n}", "github": "", "project": "", "reviewers": "2xZc;3BQy;4M4D", "site": "https://openreview.net/forum?id=vdLFYqupHA", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;3", "excitement": "4;4;3", "reproducibility": "4;4;3", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-5214-2268;;;0000-0002-0357-8356;", "linkedin": ";;;;;;;;", "aff_unique_index": "0;1;1;0;2;1;3;0;0", "aff_unique_norm": "Shanghai Jiao Tong University;Amazon;Westlake University;Chinese Academy of Sciences", "aff_unique_dep": ";Amazon.com, Inc.;;IGSNRR", "aff_unique_url": "https://www.sjtu.edu.cn;https://www.amazon.com;https://www.westlake.edu.cn;http://www.cas.cn", "aff_unique_abbr": "SJTU;Amazon;WU;CAS", "aff_campus_unique_index": "1", "aff_campus_unique": ";Beijing", "aff_country_unique_index": "0;1;1;0;0;1;0;0;0", "aff_country_unique": "China;United States" }, { "id": "vexCLJO7vo", "title": "MenatQA: A New Dataset for Testing the Temporal Comprehension and Reasoning Abilities of Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models (LLMs) have shown nearly saturated performance on many natural language processing (NLP) tasks. As a result, it is natural for people to believe that LLMs have also mastered abilities such as time understanding and reasoning. However, research on the temporal sensitivity of LLMs has been insufficiently emphasized. To fill this gap, this paper constructs Multiple Sensitive Factors Time QA (MenatQA), which encompasses three temporal factors (scope factor, order factor, counterfactual factor) with total 2,853 samples for evaluating the time comprehension and reasoning abilities of LLMs. This paper tests current mainstream LLMs with different parameter sizes, ranging from billions to hundreds of billions. The results show most LLMs fall behind smaller temporal reasoning models with different degree on these factors. In specific, LLMs show a significant vulnerability to temporal biases and depend heavily on the temporal information provided in questions. Furthermore, this paper undertakes a preliminary investigation into potential improvement strategies by devising specific prompts and leveraging external tools. These approaches serve as valuable baselines or references for future research endeavors.", "keywords": "Temporal Reasoning;Large Language Models", "primary_area": "", "supplementary_material": "", "author": "Yifan Wei;Yisong Su;Huanhuan Ma;Xiaoyan Yu;Fangyu Lei;Yuanzhe Zhang;Jun Zhao;Kang Liu", "authorids": "~Yifan_Wei1;~Yisong_Su1;~Huanhuan_Ma1;~Xiaoyan_Yu1;~Fangyu_Lei1;~Yuanzhe_Zhang1;~Jun_Zhao4;~Kang_Liu1", "gender": ";M;M;F;M;M;M;M", "homepage": ";https://github.com/Thewillman;https://www.huanhuanma.top/;;https://lfy79001.github.io;https://yuanzhe-zhang.github.io/;http://nlpr-web.ia.ac.cn/cip/english/~junzhao/index.html;http://www.nlpr.ia.ac.cn/cip/~liukang/index.html", "dblp": ";;;48/6513.html;329/5621;141/4448;https://dblp.uni-trier.de/pid/47/2026-1.html;42/4903.html", "google_scholar": ";;B4ZlbH8AAAAJ;MgsexesAAAAJ;1WzAOSkAAAAJ;H4GYRx8AAAAJ;https://scholar.google.com.hk/citations?user=HljRttwAAAAJ;DtZCfl0AAAAJ", "or_profile": "~Yifan_Wei1;~Yisong_Su1;~Huanhuan_Ma1;~Xiaoyan_Yu1;~Fangyu_Lei1;~Yuanzhe_Zhang1;~Jun_Zhao4;~Kang_Liu1", "aff": ";Fuzhou University;Institute of Automation, Chinese Academy of Sciences;Beijing Institute of Technology;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of automation, Chinese academy of science;Institute of Automation, Chinese Academy of Sciences", "aff_domain": ";fzu.edu.cn;ia.ac.cn;bit.edu.cn;ia.ac.cn;ia.ac.cn;nlpr.ia.ac.cn;ia.ac.cn", "position": ";MS student;MS student;PhD student;PhD student;Associate Professor;Full Professor;Professor", "bibtex": "@inproceedings{\nwei2023menatqa,\ntitle={Menat{QA}: A New Dataset for Testing the Temporal Comprehension and Reasoning Abilities of Large Language Models},\nauthor={Yifan Wei and Yisong Su and Huanhuan Ma and Xiaoyan Yu and Fangyu Lei and Yuanzhe Zhang and Jun Zhao and Kang Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=vexCLJO7vo}\n}", "github": "", "project": "", "reviewers": "7Azk;2UnL;LWDP", "site": "https://openreview.net/forum?id=vexCLJO7vo", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;3", "excitement": "3;2;3", "reproducibility": "4;2;4", "correctness": "4;2;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-7151-9550;;;;;", "linkedin": ";;;;;;;", "aff_unique_index": "0;1;2;1;1;1;1", "aff_unique_norm": "Fuzhou University;Chinese Academy of Sciences;Beijing Institute of Technology", "aff_unique_dep": ";Institute of Automation;", "aff_unique_url": "https://www.fznu.edu.cn;http://www.ia.cas.cn;http://www.bit.edu.cn/", "aff_unique_abbr": "FZU;CAS;BIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "vg55TCMjbC", "title": "Reading Books is Great, But Not if You Are Driving! Visually Grounded Reasoning about Defeasible Commonsense Norms", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Commonsense norms are defeasible by context: reading books is usually great, but not when driving a car. While contexts can be explicitly described in language, in embodied scenarios, contexts are often provided visually. This type of visually grounded reasoning about defeasible commonsense norms is generally easy for humans, but (as we show) poses a challenge for machines, as it necessitates both visual understanding and reasoning about commonsense norms. \n\nWe construct a new multimodal benchmark for studying commonsense norms: NormLens. NormLens consists of 10K human judgments accompanied by free-form explanations covering 2K multimodal situations, and serves as a probe to address two questions: (1) to what extent can models align with average human judgment? and (2) how well can models explain their predicted judgments? We find that state-of-the-art model judgments and explanations are not well-aligned with human annotation. Additionally, we present a simple yet effective approach to better align models with humans by distilling social commonsense knowledge from large language models. The data and code will be released.", "keywords": "Multimodal;Commonsense;Dataset;Social Norm;Morality", "primary_area": "", "supplementary_material": "", "author": "Seungju Han;Junhyeok Kim;Jack Hessel;Liwei Jiang;Jiwan Chung;Yejin Son;Yejin Choi;Youngjae Yu", "authorids": "~Seungju_Han2;~Junhyeok_Kim1;~Jack_Hessel1;~Liwei_Jiang2;~Jiwan_Chung1;~Yejin_Son3;~Yejin_Choi1;~Youngjae_Yu1", "gender": "M;M;M;F;M;F;F;M", "homepage": "https://seungjuhan.me;https://junhyeok.kim;https://www.jmhessel.com;https://liweijiang.me;https://jiwanchung.github.io/;https://github.com/ozzaney?tab=repositories;https://yejinc.github.io/;https://yj-yu.github.io/home/", "dblp": ";;https://dblp.uni-trier.de/pid/132/5250.html;;277/2798;359/0753;89/579-1;188/6210", "google_scholar": "g_anRqAAAAAJ;lvN7dQEAAAAJ;SxQQ1msAAAAJ;lcPsDgUAAAAJ;https://scholar.google.co.kr/citations?user=l4UBOZAAAAAJ;;vhP-tlcAAAAJ;https://scholar.google.co.kr/citations?user=WDO24ZYAAAAJ", "or_profile": "~Seungju_Han2;~Junhyeok_Kim1;~Jack_Hessel1;~Liwei_Jiang2;~Jiwan_Chung1;~Yejin_Son3;~Yejin_Choi1;~Youngjae_Yu1", "aff": "Seoul National University;Yonsei University;Allen Institute for Artificial Intelligence;University of Washington;Seoul National University;Yonsei University;Department of Computer Science, University of Washington;Allen Institute for Artificial Intelligence", "aff_domain": "snu.ac.kr;yonsei.ac.kr;allenai.org;washington.edu;snu.ac.kr;yonsei.ac.kr;cs.washington.edu;allenai.org", "position": "Undergrad student;MS student;Researcher;PhD student;MS student;MS student;Full Professor;Postdoc", "bibtex": "@inproceedings{\nhan2023reading,\ntitle={Reading Books is Great, But Not if You Are Driving! Visually Grounded Reasoning about Defeasible Commonsense Norms},\nauthor={Seungju Han and Junhyeok Kim and Jack Hessel and Liwei Jiang and Jiwan Chung and Yejin Son and Yejin Choi and Youngjae Yu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=vg55TCMjbC}\n}", "github": "", "project": "", "reviewers": "aeyz;wNre;SVtk", "site": "https://openreview.net/forum?id=vg55TCMjbC", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;3;4", "excitement": "4;3;4", "reproducibility": "4;4;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 9, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-4012-8979;;;;;", "linkedin": "seungju-han-66b85017a/;;;;chung-jiwan-81231b245/;;;", "aff_unique_index": "0;1;2;3;0;1;3;2", "aff_unique_norm": "Seoul National University;Yonsei University;Allen Institute for Artificial Intelligence;University of Washington", "aff_unique_dep": ";;;", "aff_unique_url": "https://www.snu.ac.kr;https://www.yonsei.ac.kr;https://allenai.org;https://www.washington.edu", "aff_unique_abbr": "SNU;Yonsei;AI2;UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;1;1;0;0;1;1", "aff_country_unique": "South Korea;United States" }, { "id": "vgaJRhYVje", "title": "Coarse-to-Fine Contrastive Learning in Image-Text-Graph Space for Improved Vision-Language Compositionality", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Contrastively trained vision-language models have achieved remarkable progress in vision and language representation learning. However, recent research has highlighted severe limitations of these models in their ability to perform compositional reasoning over objects, attributes, and relations. Scene graphs have emerged as an effective way to understand images compositionally. These are graph-structured semantic representations of images that contain objects, their attributes, and relations with other objects in a scene. In this work, we consider the scene graph parsed from text as a proxy for the image scene graph and propose a graph decomposition and augmentation framework along with a coarse-to-fine contrastive learning objective between images and text that aligns sentences of various complexities to the same image. We also introduce novel negative mining techniques in the scene graph space for improving attribute binding and relation understanding. Through extensive experiments, we demonstrate the effectiveness of our approach that significantly improves attribute binding, relation understanding, systematic generalization, and productivity on multiple recently proposed benchmarks (For example, improvements \nup to $\\mathbf{18}$% for systematic generalization, $\\mathbf{16.5}$% for relation understanding over a strong baseline), while achieving similar or better performance than CLIP on various general multimodal tasks.", "keywords": "Vision-Language Compositionality;Systematic Generalization;Vision-Language Contrastive Learning;Multimodal Foundation Models", "primary_area": "", "supplementary_material": "", "author": "Harman Singh;Pengchuan Zhang;Qifan Wang;Mengjiao Wang;Wenhan Xiong;Jingfei Du;Yu Chen", "authorids": "~Harman_Singh1;~Pengchuan_Zhang1;~Qifan_Wang2;~Mengjiao_Wang1;~Wenhan_Xiong1;~Jingfei_Du1;~Yu_Chen5", "gender": "M;M;M;F;M;M;M", "homepage": ";https://pzzhang.github.io/pzzhang/;https://wqfcr.github.io/;;https://xwhan.github.io;;http://academic.hugochan.net", "dblp": "162/5054.html;;33/8610;93/9728-2;203/8542;137/3917;87/1254-22", "google_scholar": "BanlVLYAAAAJ;3VZ_E64AAAAJ;LrSyLosAAAAJ;https://scholar.google.co.uk/citations?user=98J-rNMAAAAJ;;;m6Sj1yoAAAAJ", "or_profile": "~Harman_Singh1;~Pengchuan_Zhang1;~Qifan_Wang2;~Mengjiao_Wang1;~Wenhan_Xiong1;~Jingfei_Du1;~Yu_Chen5", "aff": "Meta;Meta AI;Meta AI;Meta;Meta Facebook;;Meta AI", "aff_domain": "fb.com;meta.com;fb.com;meta.com;fb.com;;fb.com", "position": "AI Resident;Researcher;Principal Researcher;Scientist;Researcher;;Research Scientist", "bibtex": "@inproceedings{\nsingh2023coarsetofine,\ntitle={Coarse-to-Fine Contrastive Learning in Image-Text-Graph Space for Improved Vision-Language Compositionality},\nauthor={Harman Singh and Pengchuan Zhang and Qifan Wang and Mengjiao Wang and Wenhan Xiong and Jingfei Du and Yu Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=vgaJRhYVje}\n}", "github": "", "project": "", "reviewers": "oZZx;JAaq;Ut7T", "site": "https://openreview.net/forum?id=vgaJRhYVje", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;4", "excitement": "4;4;3", "reproducibility": "4;4;4", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-3970-6276;;0000-0002-7570-5756;0000-0002-4873-5677;;;", "linkedin": "harman-singh-4243ab180/;;;;;;", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Meta", "aff_unique_dep": "Meta Platforms, Inc.", "aff_unique_url": "https://meta.com", "aff_unique_abbr": "Meta", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "vgg3dKoyDH", "title": "Analyzing Norm Violations in Live-Stream Chat", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Toxic language, such as hate speech, can deter users from participating in online communities and enjoying popular platforms.\nPrevious approaches to detecting toxic language and norm violations have been primarily concerned with conversations from online forums and social media, such as Reddit and Twitter. \nThese approaches are less effective when applied to conversations on live-streaming platforms, such as Twitch and YouTube Live, as each comment is only visible for a limited time and lacks a thread structure that establishes its relationship with other comments. \nIn this work, we share the first NLP study dedicated to detecting norm violations in conversations on live-streaming platforms.\nWe define norm violation categories in live-stream chats and annotate 4,583 moderated comments from Twitch. \nWe articulate several facets of live-stream data that differ from other forums, and demonstrate that existing models perform poorly in this setting. \nBy conducting a user study, we identify the informational context humans use in live-stream moderation, and train models leveraging context to identify norm violations. Our results show that appropriate contextual information can boost moderation performance by 35\\%.", "keywords": "Norm Violation;Toxicity Detection;Live Streaming", "primary_area": "", "supplementary_material": "", "author": "Jihyung Moon;Dong-Ho Lee;Hyundong Justin Cho;Woojeong Jin;Chan Young Park;Minwoo Kim;Jonathan May;Jay Pujara;Sungjoon Park", "authorids": "~Jihyung_Moon1;~Dong-Ho_Lee1;~Hyundong_Justin_Cho1;~Woojeong_Jin1;~Chan_Young_Park1;~Minwoo_Kim5;~Jonathan_May1;~Jay_Pujara1;~Sungjoon_Park1", "gender": "F;M;M;;F;M;M;;M", "homepage": ";https://danny-lee.info;https://justin-cho.com;https://woojeongjin.github.io;https://chan0park.github.io;https://programs.sigchi.org/chi/2019/authors/21440;http://jonmay.net;https://www.jaypujara.org;https://sungjoonpark.github.io", "dblp": ";;263/6759;194/4234;15/480;;00/4758;65/10103;63/1326", "google_scholar": "w5idTSEAAAAJ;oei2TXwAAAAJ;https://scholar.google.com/citations?hl=en;;https://scholar.google.com/citations?hl=en;;tmK5EPEAAAAJ;yvdSr4AAAAAJ;bDihJCQAAAAJ", "or_profile": "~Jihyung_Moon1;~Dong-Ho_Lee1;~Hyundong_Justin_Cho1;~Woojeong_Jin1;~Chan_Young_Park1;~Minwoo_Kim5;~Jonathan_May1;~Jay_Pujara1;~Sungjoon_Park1", "aff": ";Snap Inc.;USC/ISI;University of Southern California;School of Computer Science, Carnegie Mellon University;DATUMO Inc.;USC/ISI;University of Southern California;Korea Advanced Institute of Science & Technology", "aff_domain": ";snapchat.com;isi.edu;usc.edu;cs.cmu.edu;selectstar.ai;isi.edu;usc.edu;kaist.ac.kr", "position": ";Intern;PhD student;PhD student;PhD student;Researcher;Research Scientist;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nmoon2023analyzing,\ntitle={Analyzing Norm Violations in Live-Stream Chat},\nauthor={Jihyung Moon and Dong-Ho Lee and Hyundong Justin Cho and Woojeong Jin and Chan Young Park and Minwoo Kim and Jonathan May and Jay Pujara and Sungjoon Park},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=vgg3dKoyDH}\n}", "github": "", "project": "", "reviewers": "9tw6;YGch;HcKC", "site": "https://openreview.net/forum?id=vgg3dKoyDH", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;5", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "4;4;5", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.333333333333333, "replies_avg": 9, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;0000-0002-5284-477X;0000-0001-6921-1744;", "linkedin": ";;;;;;jonmayjonmay/;pujara;sungjoon-park-815b6456/", "aff_unique_index": "0;1;1;2;3;1;1;4", "aff_unique_norm": "Snap Inc.;University of Southern California;Carnegie Mellon University;DATUMO Inc.;Korea Advanced Institute of Science and Technology", "aff_unique_dep": ";;School of Computer Science;;", "aff_unique_url": "https://www.snapinc.com;https://isi.usc.edu;https://www.cmu.edu;;https://www.kaist.ac.kr", "aff_unique_abbr": "Snap;USC;CMU;;KAIST", "aff_campus_unique_index": "1;2;3;1;2", "aff_campus_unique": ";ISI;Los Angeles;Pittsburgh", "aff_country_unique_index": "0;0;0;0;0;0;0;1", "aff_country_unique": "United States;South Korea" }, { "id": "vjTnfxbkaL", "title": "Hierarchical Enhancement Framework for Aspect-based Argument Mining", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Aspect-Based Argument Mining (ABAM) is a critical task in computational argumentation. Existing methods have primarily treated ABAM as a nested named entity recognition problem, overlooking the need for tailored strategies to effectively address the specific challenges of ABAM tasks. To this end, we propose a layer-based Hierarchical Enhancement Framework (HEF) for ABAM, and introduce three novel components: the Semantic and Syntactic Fusion (SSF) component, the Batch-level Heterogeneous Graph Attention Network (BHGAT) component, and the Span Mask Interactive Attention (SMIA) component. These components serve the purposes of optimizing underlying representations, detecting argument unit stances, and constraining aspect term recognition boundaries, respectively. By incorporating these components, our framework enables better handling of the challenges and improves the performance and accuracy in argument unit and aspect term recognition. Experiments on multiple datasets and various tasks verify the effectiveness of the proposed framework and components.", "keywords": "Aspect-based Argument Mining;Nested Named Entity Recognition;Argument Unit Recognition and Classification;Aspect Term Extraction", "primary_area": "", "supplementary_material": "", "author": "Yujie Fu;Yang Li;Suge Wang;Xiaoli Li;Deyu Li;Jian Liao;JianXing Zheng", "authorids": "~Yujie_Fu2;~Yang_Li75;~Suge_Wang1;~Xiaoli_Li1;~Deyu_Li2;~Jian_Liao1;~JianXing_Zheng1", "gender": ";F;F;M;M;M;", "homepage": ";https://jr.sxufe.edu.cn/info/1555/3072.htm;https://cs.sxu.edu.cn/faculty/professor/1430/index.htm;https://personal.ntu.edu.sg/xlli/;https://cs.sxu.edu.cn/faculty/professor/1093/index.htm;http://cs.sxu.edu.cn/faculty/associate_professor/4749/index.htm;", "dblp": ";37/4190-74;56/963.html;l/XiaoliLi.html;;;", "google_scholar": "u9vfyKoAAAAJ;;;E3yQKloAAAAJ;;;", "or_profile": "~Yujie_Fu2;~Yang_Li75;~Suge_Wang1;~Xiaoli_Li1;~Deyu_Li2;~Jian_Liao1;~JianXing_Zheng1", "aff": "Shanxi University;Shanxi University of Finance and Economics;Shanxi University;A*STAR;Shanxi University;Shanxi University;", "aff_domain": "sxu.edu.cn;sxufe.edu.cn;sxu.edu.cn;a-star.edu.sg;sxu.edu.cn;sxu.edu.cn;", "position": "PhD student;Associate Professor;Full Professor;Principal Researcher;Full Professor;Associate Professor;", "bibtex": "@inproceedings{\nfu2023hierarchical,\ntitle={Hierarchical Enhancement Framework for Aspect-based Argument Mining},\nauthor={Yujie Fu and Yang Li and Suge Wang and Xiaoli Li and Deyu Li and Jian Liao and JianXing Zheng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=vjTnfxbkaL}\n}", "github": "", "project": "", "reviewers": "qcN8;3LjC;1g4G", "site": "https://openreview.net/forum?id=vjTnfxbkaL", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;3;3", "reproducibility": "4;4;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-1837-4970;0000-0002-1553-2937;0000-0002-0762-6562;0000-0003-2489-9404;0000-0002-9385-6873;", "linkedin": ";;;li-xiaoli-41027ba/;;;", "aff_unique_index": "0;1;0;2;0;0", "aff_unique_norm": "Shanxi University;Shanxi University of Finance and Economics;Agency for Science, Technology and Research", "aff_unique_dep": ";;", "aff_unique_url": "http://www.sxu.edu.cn;http://www.sxufe.edu.cn;https://www.a-star.edu.sg", "aff_unique_abbr": "SXU;;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0;0", "aff_country_unique": "China;Singapore" }, { "id": "vkEYzLIdLX", "title": "Dolphin: A Challenging and Diverse Benchmark for Arabic NLG", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We present Dolphin, a novel benchmark that addresses the need for a natural language generation (NLG) evaluation framework dedicated to the wide collection of Arabic languages and varieties. The proposed benchmark encompasses a broad range of 13 different NLG tasks, including dialogue generation, question answering, machine translation, summarization, among others. Dolphin comprises a substantial corpus of 40 diverse and representative public datasets across 50 test splits, carefully curated to reflect real-world scenarios and the linguistic richness of Arabic. It sets a new standard for evaluating the performance and generalization capabilities of Arabic and multilingual models, promising to enable researchers to push the boundaries of current methodologies. We provide an extensive analysis of Dolphin, highlighting its diversity and identifying gaps in current Arabic NLG research. We also offer a public leaderboard that is both interactive and modular and evaluate several Arabic and multilingual models on our benchmark, allowing us to set strong baselines against which researchers can compare.", "keywords": "Arabic language;Dialectal Arabic;NLG benchmark.", "primary_area": "", "supplementary_material": "", "author": "El Moatez Billah Nagoudi;AbdelRahim A. Elmadany;Ahmed Oumar El-Shangiti;Muhammad Abdul-Mageed", "authorids": "~El_Moatez_Billah_Nagoudi1;~AbdelRahim_A._Elmadany1;~Ahmed_Oumar_El-Shangiti2;~Muhammad_Abdul-Mageed2", "gender": ";;M;", "homepage": ";;https://github.com/ahmedoumar;", "dblp": ";;346/4936;", "google_scholar": ";;on0NSNMAAAAJ;", "or_profile": "~El_Moatez_Billah_Nagoudi1;~AbdelRahim_A._Elmadany1;~Ahmed_Oumar_El-Shangiti2;~Muhammad_Abdul-Mageed2", "aff": ";;Mohamed bin Zayed University of Artificial Intelligence;", "aff_domain": ";;mbzuai.ac.ae;", "position": ";;Researcher;", "bibtex": "@inproceedings{\nnagoudi2023dolphin,\ntitle={Dolphin: A Challenging and Diverse Benchmark for Arabic {NLG}},\nauthor={El Moatez Billah Nagoudi and AbdelRahim A. Elmadany and Ahmed Oumar El-Shangiti and Muhammad Abdul-Mageed},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=vkEYzLIdLX}\n}", "github": "", "project": "", "reviewers": "cCB7;5EDb;rLjw", "site": "https://openreview.net/forum?id=vkEYzLIdLX", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;4", "excitement": "3;4;4", "reproducibility": "3;4;5", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0", "aff_unique_norm": "Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": "", "aff_unique_url": "https://mbzuai.ac.ae", "aff_unique_abbr": "MBZUAI", "aff_country_unique_index": "0", "aff_country_unique": "United Arab Emirates" }, { "id": "voBhcwDyPt", "title": "On the Risk of Misinformation Pollution with Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We investigate the potential misuse of modern Large Language Models (LLMs) for generating credible-sounding misinformation and its subsequent impact on information-intensive applications, particularly Open-Domain Question Answering (ODQA) systems. We establish a threat model and simulate potential misuse scenarios, both unintentional and intentional, to assess the extent to which LLMs can be utilized to produce misinformation. Our study reveals that LLMs can act as effective misinformation generators, leading to a significant degradation (up to 87%) in the performance of ODQA systems. Moreover, we uncover disparities in the attributes associated with persuading humans and machines, presenting an obstacle to current human-centric approaches to combat misinformation. To mitigate the harm caused by LLM-generated misinformation, we propose three defense strategies: misinformation detection, vigilant prompting, and reader ensemble. These approaches have demonstrated promising results, albeit with certain associated costs. Lastly, we discuss the practicality of utilizing LLMs as automatic misinformation generators and provide relevant resources and code to facilitate future research in this area.", "keywords": "Large Language Models;Misinformation;Question Answering", "primary_area": "", "supplementary_material": "", "author": "Yikang Pan;Liangming Pan;Wenhu Chen;Preslav Nakov;Min-Yen Kan;William Yang Wang", "authorids": "~Yikang_Pan1;~Liangming_Pan1;~Wenhu_Chen3;~Preslav_Nakov2;~Min-Yen_Kan1;~William_Yang_Wang2", "gender": "Not Specified;M;M;M;M;M", "homepage": ";https://liangmingpan.bio;https://mbzuai.ac.ae/study/faculty/preslav-nakov/;https://www.comp.nus.edu.sg/~kanmy/;https://wenhuchen.github.io/;https://www.cs.ucsb.edu/~william/", "dblp": ";186/9707;https://dblp.uni-trier.de/pid/19/1947;k/MinYenKan;136/0957.html;08/9282", "google_scholar": ";JcjjOTUAAAAJ;DfXsKZ4AAAAJ;https://scholar.google.com.tw/citations?user=aNVcd3EAAAAJ;https://scholar.google.co.jp/citations?user=U8ShbhUAAAAJ;gf8Ms_8AAAAJ", "or_profile": "~Yikang_Pan1;~Liangming_Pan1;~Preslav_Nakov2;~Min-Yen_Kan1;~wenhu_chen1;~William_Wang1", "aff": "Zhejiang University;University of California, Santa Barbara;Mohamed bin Zayed University of Artificial Intelligence;National University of Singapore;University of Waterloo;UC Santa Barbara", "aff_domain": "zju.edu.cn;ucsb.edu;mbzuai.ac.ae;nus.edu.sg;uwaterloo.ca;ucsb.edu", "position": "Undergrad student;Postdoc;Full Professor;Associate Professor;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\npan2023on,\ntitle={On the Risk of Misinformation Pollution with Large Language Models},\nauthor={Yikang Pan and Liangming Pan and Wenhu Chen and Preslav Nakov and Min-Yen Kan and William Yang Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=voBhcwDyPt}\n}", "github": "", "project": "", "reviewers": "k9JU;UJMA;Qkxs", "site": "https://openreview.net/forum?id=voBhcwDyPt", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "4;3;3", "reproducibility": "2;4;2", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-3600-1510;;;", "linkedin": "yikang-pan-1759b3262/;;preslavnakov/;;;", "aff_unique_index": "0;1;2;3;4;1", "aff_unique_norm": "Zhejiang University;University of California, Santa Barbara;Mohamed bin Zayed University of Artificial Intelligence;National University of Singapore;University of Waterloo", "aff_unique_dep": ";;;;", "aff_unique_url": "https://www.zju.edu.cn;https://www.ucsb.edu;https://mbzuai.ac.ae;https://www.nus.edu.sg;https://uwaterloo.ca", "aff_unique_abbr": "ZJU;UCSB;MBZUAI;NUS;UW", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Santa Barbara", "aff_country_unique_index": "0;1;2;3;4;1", "aff_country_unique": "China;United States;United Arab Emirates;Singapore;Canada" }, { "id": "vooJHgn1Gm", "title": "Fidelity-Enriched Contrastive Search: Reconciling the Faithfulness-Diversity Trade-Off in Text Generation", "track": "main", "status": "Short Main", "tldr": "", "abstract": "In this paper, we address the hallucination problem commonly found in natural language generation tasks. Language models often generate fluent and convincing content but can lack consistency with the provided source, resulting in potential inaccuracies. We propose a new decoding method called Fidelity-Enriched Contrastive Search (FECS), which augments the contrastive search framework with context-aware regularization terms. FECS promotes tokens that are semantically similar to the provided source while penalizing repetitiveness in the generated text. We demonstrate its effectiveness across two tasks prone to hallucination: abstractive summarization and dialogue generation. Results show that FECS consistently enhances faithfulness across various language model sizes while maintaining output diversity comparable to well-performing decoding algorithms.", "keywords": "hallucination;faithfulness;decoding", "primary_area": "", "supplementary_material": "", "author": "Wei-Lin Chen;Cheng-Kuang Wu;Hsin-Hsi Chen;Chung-Chi Chen", "authorids": "~Wei-Lin_Chen1;~Cheng-Kuang_Wu1;~Hsin-Hsi_Chen2;~Chung-Chi_Chen1", "gender": ";M;M;M", "homepage": "https://wlchen0206.github.io/;https://brian-ckwu.github.io/;http://nlg.csie.ntu.edu.tw/advisor.php;https://nlpfin.github.io/", "dblp": "72/7187;88/415;84/3130.html;177/6602", "google_scholar": "https://scholar.google.com.tw/citations?user=Hrbne1wAAAAJ;hc_e7rsAAAAJ;CRth4q4AAAAJ;sJwWSg8AAAAJ", "or_profile": "~Wei-Lin_Chen1;~Cheng-Kuang_Wu1;~Hsin-Hsi_Chen2;~Chung-Chi_Chen1", "aff": "National Taiwan University;National Taiwan University;National Taiwan University;AIST, National Institute of Advanced Industrial Science and Technology", "aff_domain": "ntu.edu.tw;csie.ntu.edu.tw;ntu.edu.tw;aist.go.jp", "position": "MS student;MS student;Full Professor;Researcher", "bibtex": "@inproceedings{\nchen2023fidelityenriched,\ntitle={Fidelity-Enriched Contrastive Search: Reconciling the Faithfulness-Diversity Trade-Off in Text Generation},\nauthor={Wei-Lin Chen and Cheng-Kuang Wu and Hsin-Hsi Chen and Chung-Chi Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=vooJHgn1Gm}\n}", "github": "", "project": "", "reviewers": "cpWD;evYM;2F8i", "site": "https://openreview.net/forum?id=vooJHgn1Gm", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;4;4", "excitement": "3;4;4", "reproducibility": "4;5;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 9, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-0740-0846;0000-0001-9757-9423;0000-0003-3680-9277", "linkedin": ";cheng-kuang-wu-062214219/;;chungchichen/", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "National Taiwan University;National Institute of Advanced Industrial Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.ntu.edu.tw;https://www.aist.go.jp", "aff_unique_abbr": "NTU;AIST", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Taiwan;", "aff_country_unique_index": "0;0;0;1", "aff_country_unique": "China;Japan" }, { "id": "vpkEJM9qYR", "title": "Unveiling the Multi-Annotation Process: Examining the Influence of Annotation Quantity and Instance Difficulty on Model Performance", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The NLP community has long advocated for the construction of multi-annotator datasets to better capture the nuances of language interpretation, subjectivity, and ambiguity. This paper conducts a retrospective study to show how performance scores can vary when a dataset expands from a single annotation per instance to multiple annotations. We propose a novel multi-annotator simulation process to generate datasets with varying annotation budgets. We show that similar datasets with the same annotation budget can lead to varying performance gains. Our findings challenge the popular belief that models trained on multi-annotation examples always lead to better performance than models trained on single or few-annotation examples.", "keywords": "multi-annotation;label-distribution;PVI;annotator-set;$\\mathcal{V}$-Information;entropy;annotation budget;datamaps;cartography", "primary_area": "", "supplementary_material": "", "author": "Pritam Kadasi;Mayank Singh", "authorids": "~Pritam_Kadasi2;~Mayank_Singh1", "gender": "M;M", "homepage": "https://pskadasi.github.io/;https://mayank4490.github.io/", "dblp": "266/8282;96/4770", "google_scholar": "Jwe7-lgAAAAJ;U2NUj90AAAAJ", "or_profile": "~Pritam_Kadasi2;~Mayank_Singh1", "aff": "Indian Institute of Technology, Gandhinagar;Indian Institute of Technology Gandhinagar", "aff_domain": "iitgn.ac.in;iitgn.ac.in", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nkadasi2023unveiling,\ntitle={Unveiling the Multi-Annotation Process: Examining the Influence of Annotation Quantity and Instance Difficulty on Model Performance},\nauthor={Pritam Kadasi and Mayank Singh},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=vpkEJM9qYR}\n}", "github": "", "project": "", "reviewers": "qogc;KTZY;hZk1", "site": "https://openreview.net/forum?id=vpkEJM9qYR", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;5;4", "excitement": "4;4;3", "reproducibility": "4;4;3", "correctness": "3;3;2", "rating_avg": 4.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-8757-2623", "linkedin": ";mayank-singh-b591a818/", "aff_unique_index": "0;0", "aff_unique_norm": "Indian Institute of Technology Gandhinagar", "aff_unique_dep": "", "aff_unique_url": "https://www.iitgn.ac.in", "aff_unique_abbr": "IITGN", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Gandhinagar", "aff_country_unique_index": "0;0", "aff_country_unique": "India" }, { "id": "vq4BnrPyPb", "title": "Knowledge is a Region in Weight Space for Fine-tuned Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Research on neural networks has focused on understanding a single model trained on a single dataset. However, relatively little is known about the relationships between different models, particularly those trained or tested on different datasets. We address this by studying how the weight space and the underlying loss landscape of different models are interconnected.\n\nSpecifically, we demonstrate that finetuned models that were optimized for high performance, reside in well-defined regions in weight space, and vice versa -- that any model that resides anywhere in those regions also exhibits high performance. Notably, we show that language models that have been finetuned on the same dataset form a tight cluster in the weight space, while models finetuned on different datasets from the same underlying task form a looser cluster. Moreover, traversing around the region between the models leads to new models that perform comparably or even better than models obtained via finetuning, even on tasks that the original models were not finetuned on.\n\nOur findings provide insight into the relationships between models, demonstrating that a model positioned between two similar models can acquire the knowledge of both. We leverage this and design a method for selecting a better model for efficient finetuning. Specifically, we show that starting from the center of the region is as effective, if not more, than using the pretrained model in 11 out of 12 datasets, resulting in an average accuracy improvement of 3.06.", "keywords": "Weight space;loss landscape;loss space;finetuning;fine-tune;loss connectivity;basin;minima", "primary_area": "", "supplementary_material": "", "author": "Almog Gueta;Elad Venezian;Colin Raffel;Noam Slonim;Yoav Katz;Leshem Choshen", "authorids": "~Almog_Gueta1;~Elad_Venezian1;~Colin_Raffel1;~Noam_Slonim1;~Yoav_Katz1;~Leshem_Choshen1", "gender": ";M;;M;M;Not Specified", "homepage": ";;http://colinraffel.com;https://researcher.watson.ibm.com/researcher/view.php?person=il-NOAMS;https://researcher.watson.ibm.com/researcher/view.php?person=il-KATZ;https://ktilana.wixsite.com/leshem-choshen", "dblp": ";206/6812;149/0082;62/7001;40/21;218/5237", "google_scholar": ";;I66ZBYwAAAAJ;https://scholar.google.co.il/citations?user=KjvrNGMAAAAJ;EfW-wnAAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Almog_Gueta1;~Elad_Venezian1;~Colin_Raffel1;~Noam_Slonim1;~Yoav_Katz1;~Leshem_Choshen1", "aff": "Technion - Israel Institute of Technology, Technion - Israel Institute of Technology;International Business Machines;University of North Carolina, Chapel Hill;International Business Machines;International Business Machines;International Business Machines", "aff_domain": "campus.technion.ac.il;ibm.com;unc.edu;ibm.com;ibm.com;ibm.com", "position": "MS student;Researcher;Assistant Professor;Principal Researcher;IBM;Researcher", "bibtex": "@inproceedings{\ngueta2023knowledge,\ntitle={Knowledge is a Region in Weight Space for Fine-tuned Language Models},\nauthor={Almog Gueta and Elad Venezian and Colin Raffel and Noam Slonim and Yoav Katz and Leshem Choshen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=vq4BnrPyPb}\n}", "github": "", "project": "", "reviewers": "TEP2;xwZQ;JyVr;UZHb", "site": "https://openreview.net/forum?id=vq4BnrPyPb", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;3;2;4", "excitement": "4;4;3;4", "reproducibility": "3;4;4;4", "correctness": "3;3;3;4", "rating_avg": 4.0, "confidence_avg": 3.25, "excitement_avg": 3.75, "reproducibility_avg": 3.75, "correctness_avg": 3.25, "replies_avg": 13, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0002-0085-6496", "linkedin": "almog-gueta/;;;noam-slonim-28a80b63/;yoav-katz-0326b74/?originalSubdomain=il;leshemchoshen/", "aff_unique_index": "0;1;2;1;1;1", "aff_unique_norm": "Technion - Israel Institute of Technology;International Business Machines Corporation;University of North Carolina", "aff_unique_dep": ";;", "aff_unique_url": "https://www.technion.ac.il/en/;https://www.ibm.com;https://www.unc.edu", "aff_unique_abbr": "Technion;IBM;UNC", "aff_campus_unique_index": "1", "aff_campus_unique": ";Chapel Hill", "aff_country_unique_index": "0;1;1;1;1;1", "aff_country_unique": "Israel;United States" }, { "id": "vscmppXqXE", "title": "GEMINI: Controlling The Sentence-Level Summary Style in Abstractive Text Summarization", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Human experts write summaries using different techniques, including extracting a sentence from the document and rewriting it, or fusing various information from the document to abstract it. These techniques are flexible and thus difficult to be imitated by any single method. To address this issue, we propose an adaptive model, GEMINI, that integrates a rewriter and a generator to mimic the sentence rewriting and abstracting techniques, respectively. GEMINI adaptively chooses to rewrite a specific document sentence or generate a summary sentence from scratch. Experiments demonstrate that our adaptive approach outperforms the pure abstractive and rewriting baselines on three benchmark datasets, achieving the best results on WikiHow. Interestingly, empirical results show that the human summary styles of summary sentences are consistently predictable given their context. We release our code and model at \\url{https://github.com/baoguangsheng/gemini}.", "keywords": "Text Summarization;Summary Style", "primary_area": "", "supplementary_material": "", "author": "Guangsheng Bao;Zebin Ou;Yue Zhang", "authorids": "~Guangsheng_Bao1;~Zebin_Ou1;~Yue_Zhang7", "gender": "M;;M", "homepage": "https://baoguangsheng.github.io/;;http://frcchang.github.io", "dblp": "276/0515;195/1849;47/722-4", "google_scholar": "cxPJx2kAAAAJ;znJISzAAAAAJ;", "or_profile": "~Guangsheng_Bao1;~Zebin_Ou1;~Yue_Zhang7", "aff": "Westlake University;Westlake University;Westlake University", "aff_domain": "westlake.edu.cn;westlake.edu.cn;westlake.edu.cn", "position": "PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nbao2023gemini,\ntitle={{GEMINI}: Controlling The Sentence-Level Summary Style in Abstractive Text Summarization},\nauthor={Guangsheng Bao and Zebin Ou and Yue Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=vscmppXqXE}\n}", "github": "", "project": "", "reviewers": "okNv;nZqP;EuEc", "site": "https://openreview.net/forum?id=vscmppXqXE", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;3;4", "reproducibility": "4;4;4", "correctness": "5;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-3815-3988;;0000-0002-5214-2268", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Westlake University", "aff_unique_dep": "", "aff_unique_url": "https://www.westlake.edu.cn", "aff_unique_abbr": "WU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "vtC3sLXjDY", "title": "How Reliable Are AI-Generated-Text Detectors? An Assessment Framework Using Evasive Soft Prompts", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In recent years, there has been a rapid proliferation of AI-generated text, primarily driven by the release of powerful pre-trained language models (PLMs). To address the issue of misuse associated with AI-generated text, various high-performing detectors have been developed, including the OpenAI detector and the Stanford DetectGPT. In our study, we ask how reliable these detectors are. We answer the question by designing a novel approach that can prompt any PLM to generate text that evades these high-performing detectors. The proposed approach suggests a universal evasive prompt, a novel type of soft prompt, which guides PLMs in producing \"human-like\" text that can mislead the detectors. The novel universal evasive prompt is achieved in two steps: First, we create an evasive soft prompt tailored to a specific PLM through prompt tuning; and then, we leverage the transferability of soft prompts to transfer the learned evasive soft prompt from one PLM to another. Employing multiple PLMs in various writing tasks, we conduct extensive experiments to evaluate the efficacy of the evasive soft prompts in their evasion of state-of-the-art detectors.", "keywords": "AI-generated-text detection;soft prompts;Large language models", "primary_area": "", "supplementary_material": "", "author": "Tharindu Sandaruwan Kumarage;Paras Sheth;Raha Moraffah;Joshua Garland;huan liu", "authorids": "~Tharindu_Sandaruwan_Kumarage1;~Paras_Sheth1;~Raha_Moraffah1;~Joshua_Garland1;~huan_liu1", "gender": "M;M;F;M;", "homepage": ";;https://rmoraffa.github.io/;https://joshuagarland.com/;", "dblp": "217/2805;;200/2411;43/10316;", "google_scholar": "esDfeWQAAAAJ;pNWixdQAAAAJ;https://scholar.google.com/citations?hl=en;xAwia0YAAAAJ;", "or_profile": "~Tharindu_Sandaruwan_Kumarage1;~Paras_Sheth1;~Raha_Moraffah1;~Joshua_Garland1;~huan_liu1", "aff": "Arizona State University;Arizona State University;Arizona State University;Arizona State University;", "aff_domain": "asu.edu;asu.edu;asu.edu;asu.edu;", "position": "PhD student;PhD student;PhD student;Associate Professor;", "bibtex": "@inproceedings{\nkumarage2023how,\ntitle={How Reliable Are {AI}-Generated-Text Detectors? An Assessment Framework Using Evasive Soft Prompts},\nauthor={Tharindu Sandaruwan Kumarage and Paras Sheth and Raha Moraffah and Joshua Garland and huan liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=vtC3sLXjDY}\n}", "github": "", "project": "", "reviewers": "wSah;FphE;u6Zn", "site": "https://openreview.net/forum?id=vtC3sLXjDY", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "3;4;3", "reproducibility": "4;4;4", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-9148-0710;;;0000-0002-6724-2755;", "linkedin": "tskumarage/;;;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Arizona State University", "aff_unique_dep": "", "aff_unique_url": "https://www.asu.edu", "aff_unique_abbr": "ASU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "vtqfPW6OSm", "title": "Linear-Time Modeling of Linguistic Structure: An Order-Theoretic Perspective", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Tasks that model the relation between pairs of tokens in a string are a vital part of understanding natural language.\nSuch tasks, in general, require exhaustive pair-wise comparisons of tokens, thus having a quadratic runtime complexity in the length of the string.\nWe show that these exhaustive comparisons can be avoided, and, moreover, the complexity of such tasks can be reduced to linear by casting the relation between tokens as a partial order over the string.\nOur method predicts real numbers for each token in a string in parallel and sorts the tokens accordingly, resulting in total orders of the tokens in the string.\nEach total order implies a set of arcs oriented from smaller to greater tokens, sorted by their predicted numbers.\nThe intersection of total orders results in a partial order over the set of tokens in the string, which is then decoded into a directed graph representing the desired linguistic structure.\nOur experiments on dependency parsing and coreference resolution show that our method achieves state-of-the-art or comparable performance.\nMoreover, the linear complexity and parallelism of our method double the speed of graph-based coreference resolution models, and bring a 10-times speed-up over graph-based dependency parsers.", "keywords": "structured prediction;dependency parsing;coreference resolution", "primary_area": "", "supplementary_material": "", "author": "Tianyu Liu;Afra Amini;Mrinmaya Sachan;Ryan Cotterell", "authorids": "~Tianyu_Liu5;~Afra_Amini1;~Mrinmaya_Sachan3;~Ryan_Cotterell1", "gender": ";F;;", "homepage": ";;;", "dblp": ";270/4959;;", "google_scholar": ";;;", "or_profile": "~Tianyu_Liu5;~Afra_Amini1;~Mrinmaya_Sachan3;~Ryan_Cotterell1", "aff": ";Research, Google;;", "aff_domain": ";research.google.com;;", "position": ";Intern;;", "bibtex": "@inproceedings{\nliu2023lineartime,\ntitle={Linear-Time Modeling of Linguistic Structure: An Order-Theoretic Perspective},\nauthor={Tianyu Liu and Afra Amini and Mrinmaya Sachan and Ryan Cotterell},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=vtqfPW6OSm}\n}", "github": "", "project": "", "reviewers": "uTtf;esuW;JosL", "site": "https://openreview.net/forum?id=vtqfPW6OSm", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;3", "excitement": "5;5;5", "reproducibility": "4;4;4", "correctness": "5;5;5", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 5.0, "reproducibility_avg": 4.0, "correctness_avg": 5.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";afraamini;;", "aff_unique_index": "0", "aff_unique_norm": "Google", "aff_unique_dep": "Google Research", "aff_unique_url": "https://research.google", "aff_unique_abbr": "Google", "aff_campus_unique_index": "0", "aff_campus_unique": "Mountain View", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "vuabr8zbCq", "title": "Improving the Robustness of Summarization Models by Detecting and Removing Input Noise", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The evaluation of abstractive summarization models typically uses test data that is identically distributed as training data. In real-world practice, documents to be summarized may contain input noise caused by text extraction artifacts or data pipeline bugs. The robustness of model performance under distribution shift caused by such noise is relatively under studied. We present a large empirical study quantifying the sometimes severe loss in performance \u2013 up to 12 ROUGE-1 points \u2013 from different types of input noise for a range of datasets and model sizes. We then propose a light-weight method for detecting and removing such noise in the input during model inference without requiring any extra training, auxiliary models, or even prior knowledge of the type of noise. Our proposed approach effectively mitigates the loss in performance, recovering a large fraction of the performance drop, sometimes as large as 11 ROUGE-1 points.", "keywords": "summarization;robustness to noise;safety in ML", "primary_area": "", "supplementary_material": "", "author": "Kundan Krishna;Yao Zhao;Jie Ren;Balaji Lakshminarayanan;Jiaming Luo;Mohammad Saleh;Peter J Liu", "authorids": "~Kundan_Krishna1;~Yao_Zhao5;~Jie_Ren2;~Balaji_Lakshminarayanan1;~Jiaming_Luo2;~Mohammad_Saleh1;~Peter_J_Liu1", "gender": "M;;F;M;;;", "homepage": "https://kkrishna.in/;;;http://www.gatsby.ucl.ac.uk/~balaji/;;;http://www.peterjliu.com", "dblp": "207/7773.html;;;71/8324;41/4878;;190/7667", "google_scholar": "0d59fEcAAAAJ;p7L3HrMAAAAJ;https://scholar.google.com/citations?hl=en;QYn8RbgAAAAJ;;MmX7K38AAAAJ;", "or_profile": "~Kundan_Krishna1;~Yao_Zhao5;~Jie_Ren2;~Balaji_Lakshminarayanan1;~Jiaming_Luo2;~Mohammad_Saleh1;~Peter_J_Liu1", "aff": "Carnegie Mellon University;Google;Google;Google Brain;Google;;Google Brain", "aff_domain": "cmu.edu;google.com;google.com;google.com;google.com;;google.com", "position": "PhD student;Researcher;Research Scientist;Research Scientist;Researcher;;Research Scientist", "bibtex": "@inproceedings{\nkrishna2023improving,\ntitle={Improving the Robustness of Summarization Models by Detecting and Removing Input Noise},\nauthor={Kundan Krishna and Yao Zhao and Jie Ren and Balaji Lakshminarayanan and Jiaming Luo and Mohammad Saleh and Peter J Liu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=vuabr8zbCq}\n}", "github": "", "project": "", "reviewers": "hpax;PApz;vmhZ", "site": "https://openreview.net/forum?id=vuabr8zbCq", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;4;4", "excitement": "3;4;4", "reproducibility": "2;4;3", "correctness": "3;4;2", "rating_avg": 2.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;", "linkedin": ";;;;;mohammad-saleh-18a56b155;p3t3rliu", "aff_unique_index": "0;1;1;1;1;1", "aff_unique_norm": "Carnegie Mellon University;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.cmu.edu;https://www.google.com", "aff_unique_abbr": "CMU;Google", "aff_campus_unique_index": "1;1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "vvnUi75U9i", "title": "Is Explanation the Cure? Misinformation Mitigation in the Short Term and Long Term", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "With advancements in natural language processing (NLP) models, automatic explanation generation has been proposed to mitigate misinformation on social media platforms in addition to adding warning labels to identified fake news. While many researchers have focused on generating good explanations, how these explanations can really help humans combat fake news is under-explored. In this study, we compare the effectiveness of a warning label and the state-of- the-art counterfactual explanations generated by GPT-4 in debunking misinformation. In a two-wave, online human-subject study, participants (N = 215) were randomly assigned to a control group in which false contents are shown without any intervention, a warning tag group in which the false claims were labeled, or an explanation group in which the false contents were accompanied by GPT-4 generated explanations. Our results show that both interventions significantly decrease participants\u2019 self-reported belief in fake claims in an equivalent manner for the short-term and long-term. We discuss the implications of our findings and directions for future NLP-based misinformation debunking strategies.", "keywords": "Fake news debunking strategy;misinformation;Counterfactual Explanation;Natural Language Generation;Warning Tag;Longterm study", "primary_area": "", "supplementary_material": "", "author": "Yi-Li Hsu;Shih-Chieh Dai;Aiping Xiong;Lun-Wei Ku", "authorids": "~Yi-Li_Hsu1;~Shih-Chieh_Dai2;~Aiping_Xiong1;~Lun-Wei_Ku1", "gender": ";M;;F", "homepage": ";https://sjdai.github.io;;http://www.lunweiku.com/", "dblp": ";179/8789;;82/2054", "google_scholar": ";4ze3U6AAAAAJ;;SzcLXlkAAAAJ", "or_profile": "~Yi-Li_Hsu1;~Shih-Chieh_Dai2;~Aiping_Xiong1;~Lun-Wei_Ku1", "aff": ";University of Texas at Austin;;Academia Sinica", "aff_domain": ";utexas.edu;;sinica.edu.tw", "position": ";MS student;;Researcher", "bibtex": "@inproceedings{\nhsu2023is,\ntitle={Is Explanation the Cure? Misinformation Mitigation in the Short Term and Long Term},\nauthor={Yi-Li Hsu and Shih-Chieh Dai and Aiping Xiong and Lun-Wei Ku},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=vvnUi75U9i}\n}", "github": "", "project": "", "reviewers": "1tRC;fdhr;whUc", "site": "https://openreview.net/forum?id=vvnUi75U9i", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;4", "excitement": "2;4;3", "reproducibility": "1;3;3", "correctness": "2;3;4", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 2.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-5439-3917;;0000-0003-2691-5404", "linkedin": ";scdai;;lun-wei-ku/", "aff_unique_index": "0;1", "aff_unique_norm": "University of Texas at Austin;Academia Sinica", "aff_unique_dep": ";", "aff_unique_url": "https://www.utexas.edu;https://www.sinica.edu.tw", "aff_unique_abbr": "UT Austin;Academia Sinica", "aff_campus_unique_index": "0;1", "aff_campus_unique": "Austin;Taiwan", "aff_country_unique_index": "0;1", "aff_country_unique": "United States;China" }, { "id": "w3hL7wFgb3", "title": "We're Afraid Language Models Aren't Modeling Ambiguity", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Ambiguity is an intrinsic feature of natural language. Managing ambiguity is a key part of human language understanding, allowing us to anticipate misunderstanding as communicators and revise our interpretations as listeners. As language models are increasingly employed as dialogue interfaces and writing aids, handling ambiguous language is critical to their success. We capture ambiguity in a sentence through its effect on entailment relations with another sentence, and collect AmbiEnt, a linguist-annotated benchmark of 1,645 examples with diverse kinds of ambiguity. We design a suite of tests based on AmbiEnt, presenting the first evaluation of pretrained LMs to recognize ambiguity and disentangle possible meanings. We find that the task remains extremely challenging, including for GPT-4, whose generated disambiguations are considered correct only 32% of the time in crowdworker evaluation, compared to 90% for disambiguations in our dataset. Finally, to illustrate the value of ambiguity-sensitive tools, we show that a multilabel NLI model can flag political claims in the wild that are misleading due to ambiguity. We encourage the field to rediscover the importance of ambiguity for NLP.", "keywords": "evaluation;semantics;ambiguity", "primary_area": "", "supplementary_material": "", "author": "Alisa Liu;Zhaofeng Wu;Julian Michael;Alane Suhr;Peter West;Alexander Koller;Swabha Swayamdipta;Noah A. Smith;Yejin Choi", "authorids": "~Alisa_Liu1;~Zhaofeng_Wu1;~Julian_Michael1;~Alane_Suhr1;~Peter_West1;~Alexander_Koller2;~Swabha_Swayamdipta1;~Noah_A._Smith2;~Yejin_Choi1", "gender": "F;;M;Not Specified;M;;F;;F", "homepage": "https://alisawuffles.github.io/;https://zhaofengwu.github.io/;https://julianmichael.org;http://www.alanesuhr.com;https://peterwestai.notion.site/;;http://swabhs.com/;;https://yejinc.github.io/", "dblp": ";168/7994.html;185/0981;203/9306;179/4587;;121/2036;;89/579-1", "google_scholar": "3-lTFAwAAAAJ;53baCywAAAAJ;9DDOHR8AAAAJ;daslsUkAAAAJ;https://scholar.google.ca/citations?user=9ubCBYwAAAAJ;;3uTVQt0AAAAJ;;vhP-tlcAAAAJ", "or_profile": "~Alisa_Liu1;~Zhaofeng_Wu1;~Julian_Michael1;~Alane_Suhr1;~Peter_West1;~Alexander_Koller2;~Swabha_Swayamdipta1;~Noah_A._Smith2;~Yejin_Choi1", "aff": "Google;Massachusetts Institute of Technology;New York University;Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;;University of Southern California;;Department of Computer Science, University of Washington", "aff_domain": "google.com;mit.edu;nyu.edu;allenai.org;allenai.org;;usc.edu;;cs.washington.edu", "position": "Intern;PhD student;Postdoc;Postdoc;Intern;;Assistant Professor;;Full Professor", "bibtex": "@inproceedings{\nliu2023were,\ntitle={We're Afraid Language Models Aren't Modeling Ambiguity},\nauthor={Alisa Liu and Zhaofeng Wu and Julian Michael and Alane Suhr and Peter West and Alexander Koller and Swabha Swayamdipta and Noah A. Smith and Yejin Choi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=w3hL7wFgb3}\n}", "github": "", "project": "", "reviewers": "LLmw;CZrE;Eef3;4NEm", "site": "https://openreview.net/forum?id=w3hL7wFgb3", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "2;4;3;4", "excitement": "4;5;4;4", "reproducibility": "4;4;5;5", "correctness": "4;5;3;4", "rating_avg": 4.0, "confidence_avg": 3.25, "excitement_avg": 4.25, "reproducibility_avg": 4.5, "correctness_avg": 4.0, "replies_avg": 14, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-5358-3102;;;;0000-0002-5851-8254;;", "linkedin": ";zhaofengwu/;;;;;swabhaswayamdipta;;", "aff_unique_index": "0;1;2;3;3;4;5", "aff_unique_norm": "Google;Massachusetts Institute of Technology;New York University;Allen Institute for Artificial Intelligence;University of Southern California;University of Washington", "aff_unique_dep": "Google;;;;;Department of Computer Science", "aff_unique_url": "https://www.google.com;https://web.mit.edu;https://www.nyu.edu;https://allenai.org;https://www.usc.edu;https://www.washington.edu", "aff_unique_abbr": "Google;MIT;NYU;AI2;USC;UW", "aff_campus_unique_index": "0;2;3", "aff_campus_unique": "Mountain View;;Los Angeles;Seattle", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "w4FwmICSHZ", "title": "Multitask Multimodal Prompted Training for Interactive Embodied Task Completion", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Interactive and embodied tasks pose at least two fundamental challenges to existing Vision \\& Language (VL) models, including 1) grounding language in trajectories of actions and observations, and 2) referential disambiguation. To tackle these challenges, we propose an Embodied MultiModal Agent (EMMA): a unified encoder-decoder model that reasons over images and trajectories, and casts action prediction as multimodal text generation. By unifying all tasks as text generation, EMMA learns a language of actions which facilitates transfer across tasks. Different to previous modular approaches with independently trained components, we use a single multitask model where each task contributes to goal completion. EMMA performs on par with similar models on several VL benchmarks and sets a new state-of-the-art performance (36.81\\% success rate) on the Dialog-guided Task Completion (DTC), a benchmark to evaluate dialog-guided agents in the Alexa Arena.", "keywords": "Vision and Language;Embodied AI;Natural Language Interaction", "primary_area": "", "supplementary_material": "", "author": "Georgios Pantazopoulos;Malvina Nikandrou;Amit Parekh;Bhathiya Hemanthage;Arash Eshghi;Ioannis Konstas;Verena Rieser;Oliver Lemon;Alessandro Suglia", "authorids": "~Georgios_Pantazopoulos1;~Malvina_Nikandrou1;~Amit_Parekh1;~Bhathiya_Hemanthage1;~Arash_Eshghi1;~Ioannis_Konstas1;~Verena_Rieser1;~Oliver_Lemon1;~Alessandro_Suglia1", "gender": ";F;;M;M;M;F;M;M", "homepage": ";;;https://www.edinburgh-robotics.org/students/supun-bhathiya-hemanthage;https://sites.google.com/site/araesh81/;;https://sites.google.com/site/verenateresarieser/home;https://sites.google.com/site/olemon/;https://alesuglia.github.io/", "dblp": ";263/4644;;;58/3222;69/241;75/5602;36/6352;184/4588", "google_scholar": "AUFTexwAAAAJ;f0Wu__cAAAAJ;;https://scholar.google.co.uk/citations?hl=en;https://scholar.google.co.uk/citations?user=yCku-o8AAAAJ;FAJSqSkjAoIC;https://scholar.google.co.uk/citations?hl=en;https://scholar.google.com/citations?hl=en;429MAoUAAAAJ", "or_profile": "~Georgios_Pantazopoulos1;~Malvina_Nikandrou1;~Amit_Parekh1;~Bhathiya_Hemanthage1;~Arash_Eshghi1;~Ioannis_Konstas1;~Verena_Rieser1;~Oliver_Lemon1;~Alessandro_Suglia1", "aff": "Heriot-Watt University;Heriot-Watt University;;University of Edinburgh, University of Edinburgh;Heriot-Watt University;Heriot-Watt University;Heriot-Watt University;Heriot-Watt University;Heriot-Watt University", "aff_domain": "hw.ac.uk;hw.ac.uk;;ed.ac.uk;hw.ac.uk;hw.ac.uk;hw.ac.uk;hw.ac.uk;hw.ac.uk", "position": "PhD student;PhD student;;PhD student;Assistant Professor;Associate Professor;Full Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\npantazopoulos2023multitask,\ntitle={Multitask Multimodal Prompted Training for Interactive Embodied Task Completion},\nauthor={Georgios Pantazopoulos and Malvina Nikandrou and Amit Parekh and Bhathiya Hemanthage and Arash Eshghi and Ioannis Konstas and Verena Rieser and Oliver Lemon and Alessandro Suglia},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=w4FwmICSHZ}\n}", "github": "", "project": "", "reviewers": "eEZM;GWnB;GKE9", "site": "https://openreview.net/forum?id=w4FwmICSHZ", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "4;4;3", "reproducibility": "5;3;2", "correctness": "4;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0001-9516-2026;;;;0000-0001-9497-4743;0000-0002-3177-5197", "linkedin": ";;;https://www.linkedin.com/mwlite/in/bhathiyasupun;;;verena-rieser-3590b86/;olemon/;alessandrosuglia/", "aff_unique_index": "0;0;1;0;0;0;0;0", "aff_unique_norm": "Heriot-Watt University;University of Edinburgh", "aff_unique_dep": ";", "aff_unique_url": "https://www.hw.ac.uk;https://www.ed.ac.uk", "aff_unique_abbr": "HWU;Edinburgh", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "United Kingdom" }, { "id": "w4YwLzuD29", "title": "Selecting Key Views for Zero-Shot Entity Linking", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Entity linking, which aligns mentions in the text to entities in knowledge bases, is essential for many natural language processing tasks. Considering the real-world scenarios, recent research hotspot of entity linking has focused on the zero-shot setting, where mentions need to link to unseen entities and only the description of each entity is provided. This task challenges the language understanding ability of models to capture the coherence evidence between the mention context and entity description. However, entity descriptions often contain rich information from multiple views, and a mention with context only relates to a small part of the information. Other irrelevant information will introduce noise, which interferes with models to make the right judgments. Furthermore, the existence of these information also makes it difficult to synthesize key information. To solve these problems, we select key views from descriptions and propose a KVZEL framework for zero-shot entity linking. Specifically, our KVZEL first adopts unsupervised clustering to form sub views. Then, it employs a mention-aware key views selection module to iteratively accumulate mention-focused views. This puts emphasis on capturing mention-related information and allows long-range key information integration. Finally, we aggregate key views to make the final decision. Experimental results show the effectiveness of our KVZEL and it achieves the new state-of-the-art on the zero-shot entity linking dataset.", "keywords": "Zero-shot entity linking;Multi-view", "primary_area": "", "supplementary_material": "", "author": "Xuhui Sui;Ying Zhang;Kehui Song;Baohang Zhou;Xiaojie Yuan;Wensheng Zhang", "authorids": "~Xuhui_Sui1;~Ying_Zhang7;~Kehui_Song1;~Baohang_Zhou1;~Xiaojie_Yuan1;~Wensheng_Zhang5", "gender": ";F;F;M;;M", "homepage": "https://www.linkedin.com/in/%E6%97%AD%E8%BE%89-%E9%9A%8B-0305b334b/;https://dbis.nankai.edu.cn/2023/0322/c12139a506904/page.htm;;https://scholar.google.com/citations?user=U_-raXAAAAAJ;https://dbis.nankai.edu.cn/2023/0322/c12139a506919/page.htm;https://people.ucas.ac.cn/~wenshengzhang", "dblp": "321/6900.html;13/6769-15;197/1051.html;284/1471.html;79/2280;94/6627-2.html/", "google_scholar": ";;;U_-raXAAAAAJ;;", "or_profile": "~Xuhui_Sui1;~Ying_Zhang7;~Kehui_Song1;~Baohang_Zhou1;~Xiaojie_Yuan1;~Wensheng_Zhang5", "aff": "Nankai University;Nankai University;Nankai University;Nankai University;Nankai University;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "nankai.edu.cn;nankai.edu.cn;nankai.edu.cn;nankai.edu.cn;nankai.edu.cn;ia.ac.cn", "position": "PhD student;Full Professor;Postdoc;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nsui2023selecting,\ntitle={Selecting Key Views for Zero-Shot Entity Linking},\nauthor={Xuhui Sui and Ying Zhang and Kehui Song and Baohang Zhou and Xiaojie Yuan and Wensheng Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=w4YwLzuD29}\n}", "github": "", "project": "", "reviewers": "zpya;EoeX;7XKJ", "site": "https://openreview.net/forum?id=w4YwLzuD29", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "3;3;4", "reproducibility": "3;3;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-5386-9912;0000-0003-4906-5828;;;0000-0002-5876-6856;0000-0003-0752-941X", "linkedin": ";;;;;", "aff_unique_index": "0;0;0;0;0;1", "aff_unique_norm": "Nankai University;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Automation", "aff_unique_url": "http://www.nankai.edu.cn;http://www.ia.cas.cn", "aff_unique_abbr": "NKU;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "w8LoOWsbU7", "title": "Learning Language-guided Adaptive Hyper-modality Representation for Multimodal Sentiment Analysis", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Though Multimodal Sentiment Analysis (MSA) proves effective by utilizing rich information from multiple sources (*e.g.,* language, video, and audio), the potential sentiment-irrelevant and conflicting information across modalities may hinder the performance from being further improved. To alleviate this, we present Adaptive Language-guided Multimodal Transformer (ALMT), which incorporates an Adaptive Hyper-modality Learning (AHL) module to learn an irrelevance/conflict-suppressing representation from visual and audio features under the guidance of language features at different scales. With the obtained hyper-modality representation, the model can obtain a complementary and joint representation through multimodal fusion for effective MSA. In practice, ALMT achieves state-of-the-art performance on several popular datasets (*e.g.,* MOSI, MOSEI and CH-SIMS) and an abundance of ablation demonstrates the validity and necessity of our irrelevance/conflict suppression mechanism.", "keywords": "multimodal sentiment analysis;multimodal representation learning", "primary_area": "", "supplementary_material": "", "author": "Haoyu Zhang;Yu Wang;Guanghao Yin;Kejun Liu;Yuanyuan Liu;Tianshu Yu", "authorids": "~Haoyu_Zhang5;~Yu_Wang64;~Guanghao_Yin2;~Kejun_Liu1;~Yuanyuan_Liu4;~Tianshu_Yu2", "gender": ";F;;F;F;M", "homepage": ";;;;;https://mypage.cuhk.edu.cn/academics/yutianshu/", "dblp": ";02/5889;247/1052;;97/2119-4;152/6675", "google_scholar": ";;;https://scholar.google.com.tw/citations?hl=zh-TW;o8hLiIcAAAAJ;MTHO7DsAAAAJ", "or_profile": "~Haoyu_Zhang5;~Yu_Wang64;~Guanghao_Yin2;~Kejun_Liu1;~Yuanyuan_Liu4;~Tianshu_Yu2", "aff": ";China University of Geosciences Wuhan;China University of Geosciences Wuhan;China University of Geosciences Wuhan;China University of Geosciences (Wuhan);Chinese University of Hong Kong (Shenzhen)", "aff_domain": ";cug.edu.cn;cug.edu.cn;cug.edu.cn;cug.edu.cn;cuhk.edu.cn", "position": ";PhD student;MS student;MS student;Associate Professor;Assistant Professor", "bibtex": "@inproceedings{\nzhang2023learning,\ntitle={Learning Language-guided Adaptive Hyper-modality Representation for Multimodal Sentiment Analysis},\nauthor={Haoyu Zhang and Yu Wang and Guanghao Yin and Kejun Liu and Yuanyuan Liu and Tianshu Yu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=w8LoOWsbU7}\n}", "github": "", "project": "", "reviewers": "PJLs;WX81;JSNF", "site": "https://openreview.net/forum?id=w8LoOWsbU7", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;4", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-8873-4778;0000-0001-5036-6439;;0000-0002-1913-0089;0000-0002-6537-1924", "linkedin": ";;;;;", "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "China University of Geosciences;Chinese University of Hong Kong", "aff_unique_dep": ";", "aff_unique_url": "http://www.cug.edu.cn/;https://www.cuhk.edu.cn", "aff_unique_abbr": "CUG;CUHK", "aff_campus_unique_index": "0;0;0;0;1", "aff_campus_unique": "Wuhan;Shenzhen", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "wDfXP6uAkR", "title": "Towards LLM-driven Dialogue State Tracking", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Dialogue State Tracking (DST) is of paramount importance in ensuring accurate tracking of user goals and system actions within task-oriented dialogue systems. The emergence of large language models (LLMs) such as GPT3 and ChatGPT has sparked considerable interest in assessing their efficacy across diverse applications. In this study, we conduct an initial examination of ChatGPT's capabilities in DST. Our evaluation uncovers the exceptional performance of ChatGPT in this task, offering valuable insights to researchers regarding its capabilities and providing useful directions for designing and enhancing dialogue systems. Despite its impressive performance, ChatGPT has significant limitations including its closed-source nature, request restrictions, raising data privacy concerns, and lacking local deployment capabilities. To address these concerns, we present LDST, an LLM-driven DST framework based on smaller, open-source foundation models. By utilizing a novel domain-slot instruction tuning method, LDST achieves performance on par with ChatGPT. Comprehensive evaluations across three distinct experimental settings, we find that LDST exhibits remarkable performance improvements in both zero-shot and few-shot setting compared to previous SOTA methods. The source code is provided for reproducibility.", "keywords": "dialogue state tracking;large language models", "primary_area": "", "supplementary_material": "", "author": "Yujie Feng;ZEXIN LU;Bo LIU;Li-Ming Zhan;Xiao-Ming Wu", "authorids": "~Yujie_Feng1;~ZEXIN_LU1;~Bo_LIU28;~Li-Ming_Zhan1;~Xiao-Ming_Wu1", "gender": "M;M;M;M;F", "homepage": ";;;https://github.com/xiaojimi;http://www4.comp.polyu.edu.hk/~csxmwu/", "dblp": "116/3380;;;255/5615;98/2898-3", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;UVwZ_6YAAAAJ;RrYbWIYAAAAJ;3KbaUFkAAAAJ", "or_profile": "~Yujie_Feng1;~ZEXIN_LU1;~Bo_LIU28;~Li-Ming_Zhan1;~Xiao-Ming_Wu1", "aff": "Hong Kong Polytechnic University;Hong Kong Polytechnic University;Hong Kong Polytechnic University;The Hong Kong Polytechnic University;Hong Kong Polytechnic University", "aff_domain": "polyu.edu.hk;polyu.edu.hk;polyu.edu.hk;polyu.edu.hk;polyu.edu.hk", "position": "PhD student;Postdoc;PhD student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nfeng2023towards,\ntitle={Towards {LLM}-driven Dialogue State Tracking},\nauthor={Yujie Feng and ZEXIN LU and Bo LIU and Li-Ming Zhan and Xiao-Ming Wu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=wDfXP6uAkR}\n}", "github": "", "project": "", "reviewers": "GUBv;Qbz6;1Gie", "site": "https://openreview.net/forum?id=wDfXP6uAkR", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;4;4", "reproducibility": "4;4;5", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-6819-525X;0000-0002-2205-7812;;;", "linkedin": ";;;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Hong Kong Polytechnic University", "aff_unique_dep": "", "aff_unique_url": "https://www.polyu.edu.hk", "aff_unique_abbr": "PolyU", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "wFILOtxmxU", "title": "Syntax-Aware Retrieval Augmented Code Generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Neural code generation models are nowadays widely adopted to generate code from natural language descriptions automatically. Recently, pre-trained neural models equipped with token-level retrieval capabilities have exhibited great potentials in neural machine translation. However, applying them directly to code generation experience challenges: the use of the retrieval-based mechanism inevitably introduces extraneous noise to the generation process, resulting in even syntactically incorrect code. Computationally, such models necessitate frequent searches of the cached datastore, which turns out to be time-consuming. To address these issues, we propose $k$NN-TRANX, a token-level retrieval augmented code generation method. $k$NN-TRANX allows for searches in smaller datastores tailored for the code generation task. It leverages syntax constraints for the retrieval of datastores, which reduces the impact of retrieve noise. We evaluate $k$NN-TRANX on two public datasets and the experimental results confirm the effectiveness of our approach.", "keywords": "Code Generation;Retrieval Augmented Generation;Neural-Symbolic", "primary_area": "", "supplementary_material": "", "author": "Xiangyu Zhang;Yu Zhou;Guang Yang;Taolue Chen", "authorids": "~Xiangyu_Zhang12;~Yu_Zhou19;~Guang_Yang14;~Taolue_Chen2", "gender": ";M;;", "homepage": "https://github.com/NUAAZXY;https://csyuzhou.github.io/;https://ntdxyg.github.io/;", "dblp": ";36/2728-10.html;25/5712-19;", "google_scholar": "j2XgDxUAAAAJ;Wuq5F8MAAAAJ;JFoOXQwAAAAJ;", "or_profile": "~Xiangyu_Zhang12;~Yu_Zhou19;~Guang_Yang14;~Taolue_Chen2", "aff": "Nanjing University of Aeronautics and Astronautics;;Nanjing University of Aeronautics and Astronautics;", "aff_domain": "nuaa.edu.cn;;nuaa.edu.cn;", "position": "MS student;;PhD student;", "bibtex": "@inproceedings{\nzhang2023syntaxaware,\ntitle={Syntax-Aware Retrieval Augmented Code Generation},\nauthor={Xiangyu Zhang and Yu Zhou and Guang Yang and Taolue Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=wFILOtxmxU}\n}", "github": "", "project": "", "reviewers": "Av9a;VgbA;dTew", "site": "https://openreview.net/forum?id=wFILOtxmxU", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "2;3;4", "reproducibility": "3;2;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-3374-6680;", "linkedin": ";;;", "aff_unique_index": "0;0", "aff_unique_norm": "Nanjing University of Aeronautics and Astronautics", "aff_unique_dep": "", "aff_unique_url": "http://www.nuaa.edu.cn", "aff_unique_abbr": "NUAA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "wFaBjgGqaL", "title": "Conceptual structure coheres in human cognition but not in large language models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Neural network models of language have long been used as a tool for developing hypotheses about conceptual representation in the mind and brain. For many years, such use involved extracting vector-space representations of words and using distances among these to predict or understand human behavior in various semantic tasks. In contemporary language models, however, it is possible to interrogate the latent structure of conceptual representations using methods nearly identical to those commonly used with human participants. The current work uses three common techniques borrowed from cognitive psychology to estimate and compare lexical-semantic structure in both humans and a well-known large language model, the DaVinci variant of GPT-3. In humans, we show that conceptual structure is robust to differences in culture, language, and method of estimation. Structures estimated from the LLM behavior, while individually fairly consistent with those estimated from human behavior, depend much more upon the particular task used to generate behavior responses\u2013responses generated by the very same model in the three tasks yield estimates of conceptual structure that cohere less with one another than do human structure estimates. The results suggest one important way that knowledge inhering in contemporary LLMs can differ from human cognition.", "keywords": "Large Language Models;Cognitive Science;Semantic Norms;Human conceptual structure;AI conceptual structure", "primary_area": "", "supplementary_material": "", "author": "Siddharth Suresh;Kushin Mukherjee;Xizheng Yu;Wei-Chun Huang;Lisa Padua;Timothy T. Rogers", "authorids": "~Siddharth_Suresh1;~Kushin_Mukherjee1;~Xizheng_Yu1;~Wei-Chun_Huang1;~Lisa_Padua1;~Timothy_T._Rogers1", "gender": ";M;M;M;F;M", "homepage": "https://www.sidsuresh.com/;https://kushinm.github.io;;https://alex-weichun-huang.github.io/;;http://concepts.psych.wisc.edu/", "dblp": "262/0748;;;;;25/7229", "google_scholar": "xsyrntwAAAAJ;MSVqlWEAAAAJ;o74_zz0AAAAJ;;;7u_uyOsAAAAJ", "or_profile": "~Siddharth_Suresh1;~Kushin_Mukherjee1;~Xizheng_Yu1;~Wei-Chun_Huang1;~Lisa_Padua1;~Timothy_T._Rogers1", "aff": "University of Wisconsin - Madison;University of Wisconsin, Madison;University of Wisconsin - Madison;University of Wisconsin - Madison;Albany State University;University of Wisconsin - Madison", "aff_domain": "wisc.edu;wisc.edu;wisc.edu;wisc.edu;asurams.edu;wisc.edu", "position": "MS student;PhD student;Undergrad student;Undergrad student;Undergrad student;Full Professor", "bibtex": "@inproceedings{\nsuresh2023conceptual,\ntitle={Conceptual structure coheres in human cognition but not in large language models},\nauthor={Siddharth Suresh and Kushin Mukherjee and Xizheng Yu and Wei-Chun Huang and Lisa Padua and Timothy T. Rogers},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=wFaBjgGqaL}\n}", "github": "", "project": "", "reviewers": "P15m;qZ7X;DWHV", "site": "https://openreview.net/forum?id=wFaBjgGqaL", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;4;3", "excitement": "4;4;4", "reproducibility": "5;5;4", "correctness": "5;5;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 4.666666666666667, "correctness_avg": 4.666666666666667, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0001-6304-755X", "linkedin": "siddsuresh97/;;xzyu;;lisa-padua-0a08a0263;", "aff_unique_index": "0;1;0;0;2;0", "aff_unique_norm": "University of Wisconsin-Madison;University of Wisconsin;Albany State University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.wisc.edu;https://www.wisc.edu;https://www.albany.edu", "aff_unique_abbr": "UW-Madison;UW;ASU", "aff_campus_unique_index": "0;0;0;0;0", "aff_campus_unique": "Madison;", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "wKqdk1sOMY", "title": "Execution-Based Evaluation for Open-Domain Code Generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "To extend the scope of coding queries to more realistic settings, we propose ODEX, the first Open-Domain EXecution-based natural language (NL) to Python code generation dataset. ODEX has 945 NL-Code pairs spanning 79 diverse libraries, along with 1,707 human-written test cases for execution. Our NL-Code pairs are harvested from StackOverflow forums to encourage natural and practical coding queries. Moreover, ODEX supports four natural languages as intents, in English, Spanish, Japanese, and Russian. ODEX unveils intriguing behavioral differences among top-performing code language models (LM). While CODEX achieves better overall results, CODEGEN improves effectively via scaling \u2013 CODEGEN 6.1B performs comparably with CODEX 12B. Both models show substantial gaps between open and closed domains, but CODEGEN gaps tend to decrease with model size while CODEX gaps increase. We release ODEX to facilitate research into open-domain problems for the code generation community.", "keywords": "code generation;open domain;execution-based evaluation", "primary_area": "", "supplementary_material": "", "author": "Zhiruo Wang;Shuyan Zhou;Daniel Fried;Graham Neubig", "authorids": "~Zhiruo_Wang1;~Shuyan_Zhou1;~Daniel_Fried1;~Graham_Neubig1", "gender": "F;Non-Binary;M;M", "homepage": "https://zorazrw.github.io;https://shuyanzhou.github.io/;https://dpfried.github.io/;http://phontron.com", "dblp": "249/2286;;117/4804;03/8155", "google_scholar": "https://scholar.google.com/citations?hl=en;t6YzEpgAAAAJ;sJDqACEAAAAJ;wlosgkoAAAAJ", "or_profile": "~Zhiruo_Wang1;~Shuyan_Zhou1;~Daniel_Fried1;~Graham_Neubig1", "aff": "Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University", "aff_domain": "cmu.edu;cs.cmu.edu;cmu.edu;cmu.edu", "position": "MS student;PhD student;Assistant Professor;Associate Professor", "bibtex": "@inproceedings{\nwang2023executionbased,\ntitle={Execution-Based Evaluation for Open-Domain Code Generation},\nauthor={Zhiruo Wang and Shuyan Zhou and Daniel Fried and Graham Neubig},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=wKqdk1sOMY}\n}", "github": "", "project": "", "reviewers": "d6KA;rKvX;FbaY", "site": "https://openreview.net/forum?id=wKqdk1sOMY", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "2;4;4", "reproducibility": "3;4;3", "correctness": "2;4;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "Carnegie Mellon University", "aff_unique_dep": "", "aff_unique_url": "https://www.cmu.edu", "aff_unique_abbr": "CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "wRwbv3aWzN", "title": "VLIS: Unimodal Language Models Guide Multimodal Language Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Multimodal language generation, which leverages the synergy of language and vision, is a rapidly expanding field. However, existing vision-language models face challenges in tasks that require complex linguistic understanding. To address this issue, we introduce Visual-Language models as Importance Sampling weights (VLIS), a novel framework that combines the visual conditioning capability of vision-language models with the language understanding of unimodal text-only language models without further training. It extracts pointwise mutual information of each image and text from a visual-language model and uses the value as an importance sampling weight to adjust the token likelihood from a text-only model. VLIS improves vision-language models on diverse tasks, including commonsense understanding (WHOOPS, OK-VQA, and ScienceQA) and complex text generation (Concadia, Image Paragraph Captioning, and ROCStories). Our results suggest that VLIS represents a promising new direction for multimodal language generation.", "keywords": "visual-language models;image captioning;multimodal understanding;language model decoding", "primary_area": "", "supplementary_material": "", "author": "Jiwan Chung;Youngjae Yu", "authorids": "~Jiwan_Chung1;~Youngjae_Yu1", "gender": "M;M", "homepage": "https://jiwanchung.github.io/;https://yj-yu.github.io/home/", "dblp": "277/2798;188/6210", "google_scholar": "https://scholar.google.co.kr/citations?user=l4UBOZAAAAAJ;https://scholar.google.co.kr/citations?user=WDO24ZYAAAAJ", "or_profile": "~Jiwan_Chung1;~Youngjae_Yu1", "aff": "Seoul National University;Allen Institute for Artificial Intelligence", "aff_domain": "snu.ac.kr;allenai.org", "position": "MS student;Postdoc", "bibtex": "@inproceedings{\nchung2023vlis,\ntitle={{VLIS}: Unimodal Language Models Guide Multimodal Language Generation},\nauthor={Jiwan Chung and Youngjae Yu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=wRwbv3aWzN}\n}", "github": "", "project": "", "reviewers": "z5TT;Vw57;TDrJ", "site": "https://openreview.net/forum?id=wRwbv3aWzN", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;4;2", "reproducibility": "3;3;2", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": "chung-jiwan-81231b245/;", "aff_unique_index": "0;1", "aff_unique_norm": "Seoul National University;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.snu.ac.kr;https://allenai.org", "aff_unique_abbr": "SNU;AI2", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "South Korea;United States" }, { "id": "wV44qtTJ61", "title": "Prompt-based Logical Semantics Enhancement for Implicit Discourse Relation Recognition", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Implicit Discourse Relation Recognition (IDRR), which infers discourse relations without the help of explicit connectives, is still a crucial and challenging task for discourse parsing. Recent works tend to exploit the hierarchical structure information from the annotated senses, which demonstrate enhanced discourse relation representations can be obtained by integrating sense hierarchy. Nevertheless, the performance and robustness for IDRR are significantly constrained by the availability of annotated data. Fortunately, there is a wealth of unannotated utterances with explicit connectives, that can be utilized to acquire enriched discourse relation features. In light of such motivation, we propose a $\\textbf{P}$rompt-based $\\textbf{L}$ogical $\\textbf{S}$emantics $\\textbf{E}$nhancement (PLSE) method for IDRR. Essentially, our method seamlessly injects knowledge relevant to discourse relation into pre-trained language models through prompt-based connective prediction. Furthermore, considering the prompt-based connective prediction exhibits local dependencies due to the deficiency of masked language model (MLM) in capturing global semantics, we design a novel self-supervised learning objective based on mutual information maximization to derive enhanced representations of logical semantics for IDRR. Experimental results on PDTB 2.0 and CoNLL16 datasets demonstrate that our method achieves outstanding and consistent performance against the current state-of-the-art models.", "keywords": "implicit discourse relation recognition;logical semantics enhancement;prompt-based connective prediction;mutual information maximization", "primary_area": "", "supplementary_material": "", "author": "Chenxu Wang;Ping Jian;Mu Huang", "authorids": "~Chenxu_Wang5;~Ping_Jian1;~Mu_Huang1", "gender": "M;F;M", "homepage": "https://github.com/lalalamdbf;;https://wrioste.github.io/", "dblp": ";64/8451;", "google_scholar": "g-mCh_sAAAAJ;fpyIDJUAAAAJ;", "or_profile": "~Chenxu_Wang5;~Ping_Jian1;~Mu_Huang1", "aff": "Beijing Institute of Technology;Beijing Institute of Technology;Beijing Institute of Technology", "aff_domain": "bit.edu.cn;bit.edu.cn;bit.edu.cn", "position": "MS student;Lecturer;Undergrad student", "bibtex": "@inproceedings{\nwang2023promptbased,\ntitle={Prompt-based Logical Semantics Enhancement for Implicit Discourse Relation Recognition},\nauthor={Chenxu Wang and Ping Jian and Mu Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=wV44qtTJ61}\n}", "github": "", "project": "", "reviewers": "7gCC;q9JC;skmg", "site": "https://openreview.net/forum?id=wV44qtTJ61", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;4;5", "reproducibility": "4;4;5", "correctness": "4;4;5", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 4.333333333333333, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.333333333333333, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-7236-2922;0009-0003-9383-592X", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Beijing Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.bit.edu.cn/", "aff_unique_abbr": "BIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "wWFWwyXElN", "title": "LLM-powered Data Augmentation for Enhanced Cross-lingual Performance", "track": "main", "status": "Long Main", "tldr": "", "abstract": "This paper explores the potential of leveraging Large Language Models (LLMs) for data augmentation in multilingual commonsense reasoning datasets where the available training data is extremely limited.\nTo achieve this, we utilise several LLMs, namely Dolly-v2, StableVicuna, ChatGPT, and GPT-4, to augment three datasets: XCOPA, XWinograd, and XStoryCloze.\nSubsequently, we evaluate the effectiveness of fine-tuning smaller multilingual models, mBERT and XLMR, using the synthesised data.\nWe compare the performance of training with data generated in English and target languages, as well as translated English-generated data, revealing the overall advantages of incorporating data generated by LLMs, e.g. a notable 13.4 accuracy score improvement for the best case.\nFurthermore, we conduct a human evaluation by asking native speakers to assess the naturalness and logical coherence of the generated examples across different languages.\nThe results of the evaluation indicate that LLMs such as ChatGPT and GPT-4 excel at producing natural and coherent text in most languages, however, they struggle to generate meaningful text in certain languages like Tamil. We also observe that ChatGPT falls short in generating plausible alternatives compared to the original dataset, whereas examples from GPT-4 exhibit competitive logical consistency.", "keywords": "Large Language Models;Data Augmentation;Multilingual Commonsense Reasoning", "primary_area": "", "supplementary_material": "", "author": "Chenxi Whitehouse;Monojit Choudhury;Alham Fikri Aji", "authorids": "~Chenxi_Whitehouse1;~Monojit_Choudhury1;~Alham_Fikri_Aji1", "gender": ";M;M", "homepage": "https://chenxwh.github.io/;https://mbzuai.ac.ae/study/faculty/monojit-choudhury/;", "dblp": ";29/5841;188/8762", "google_scholar": "MxJqtPIAAAAJ;WR1ImCMAAAAJ;0Cyfqv4AAAAJ", "or_profile": "~Chenxi_Whitehouse1;~Monojit_Choudhury1;~Alham_Fikri_Aji1", "aff": "City University;Microsoft;Amazon", "aff_domain": "city.ac.uk;microsoft.com;amazon.com", "position": "PhD student;Principal Researcher;Researcher", "bibtex": "@inproceedings{\nwhitehouse2023llmpowered,\ntitle={{LLM}-powered Data Augmentation for Enhanced Cross-lingual Performance},\nauthor={Chenxi Whitehouse and Monojit Choudhury and Alham Fikri Aji},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=wWFWwyXElN}\n}", "github": "", "project": "", "reviewers": "7m4W;AwiM;4SYH", "site": "https://openreview.net/forum?id=wWFWwyXElN", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "3;3;4", "reproducibility": "4;4;5", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "chenxwh;monojit-choudhury-54225898/;", "aff_unique_index": "0;1;2", "aff_unique_norm": "City University;Microsoft;Amazon", "aff_unique_dep": ";Microsoft Corporation;Amazon.com, Inc.", "aff_unique_url": "https://www.cityuniversity.edu;https://www.microsoft.com;https://www.amazon.com", "aff_unique_abbr": "CityU;Microsoft;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "wWT51dSyBj", "title": "Gradient-based Gradual Pruning for Language-Specific Multilingual Neural Machine Translation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Multilingual neural machine translation (MNMT) offers the convenience of translating between multiple languages with a single model. However, MNMT often suffers from performance degradation in high-resource languages compared to bilingual counterparts. This degradation is commonly attributed to parameter interference, which occurs when parameters are fully shared across all language pairs. In this work, to tackle this issue we propose a gradient-based gradual pruning technique for MNMT. Our approach aims to identify an optimal sub-network for each language pair within the multilingual model by leveraging gradient-based information as pruning criterion and gradually increasing the pruning ratio as schedule. Our approach allows for partial parameter sharing across language pairs to alleviate interference, and each pair preserves its unique parameters to capture language-specific information. Comprehensive experiments on IWSLT and WMT datasets show that our approach yields a notable performance gain on both datasets.", "keywords": "Multilingual neural machine translation;language-specific sub-network extraction;model pruning;gradual pruning schedule;gradient-based pruning criterion", "primary_area": "", "supplementary_material": "", "author": "Dan He;Minh-Quang PHAM;Thanh-Le Ha;Marco Turchi", "authorids": "~Dan_He2;~Minh-Quang_PHAM1;~Thanh-Le_Ha1;~Marco_Turchi2", "gender": "F;M;M;M", "homepage": ";;;http://marcoturchi.com", "dblp": ";228/5646;142/8606;96/4886", "google_scholar": ";vXX_GLwAAAAJ;dRCJXysAAAAJ;loHH3HcAAAAJ", "or_profile": "~Dan_He2;~Minh-Quang_PHAM1;~Thanh-Le_Ha1;~Marco_Turchi2", "aff": "Zoom Video Communications ;Zoom Video Communications;Zoom Video Communications;Zoom", "aff_domain": "zoom.us;zoom.us;zoom.us;zoom.us", "position": "Researcher;Researcher;Researcher;Principal Researcher", "bibtex": "@inproceedings{\nhe2023gradientbased,\ntitle={Gradient-based Gradual Pruning for Language-Specific Multilingual Neural Machine Translation},\nauthor={Dan He and Minh-Quang PHAM and Thanh-Le Ha and Marco Turchi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=wWT51dSyBj}\n}", "github": "", "project": "", "reviewers": "29ZV;KqQ9;ro9B", "site": "https://openreview.net/forum?id=wWT51dSyBj", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "3;3;4", "reproducibility": "4;4;3", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-3618-481X;0000-0003-4463-6978;0000-0002-5899-4496", "linkedin": "dan-he-487a73228/;minh-quang-pham-a18600a8/;;", "aff_unique_index": "0;0;0;1", "aff_unique_norm": "Zoom Video Communications;Zoom Video Communications Inc.", "aff_unique_dep": ";", "aff_unique_url": "https://zoom.us;https://zoom.us", "aff_unique_abbr": "Zoom;Zoom", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "wYdA8CF94e", "title": "HalOmi: A Manually Annotated Benchmark for Multilingual Hallucination and Omission Detection in Machine Translation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Hallucinations in machine translation are translations that contain information completely unrelated to the input. Omissions are translations that do not include some of the input information. While both cases tend to be catastrophic errors undermining user trust, annotated data with these types of pathologies is extremely scarce and is limited to a few high-resource languages. In this work, we release an annotated dataset for the hallucination and omission phenomena covering 18 translation directions with varying resource levels and scripts. Our annotation covers different levels of partial and full hallucinations as well as omissions both at the sentence and at the word level. \nAdditionally, we revisit previous methods for hallucination and omission detection, show that conclusions made based on a single language pair largely do not hold for a large-scale evaluation, and establish new solid baselines.", "keywords": "Hallucination;Omissions;Dataset;Machine Translation", "primary_area": "", "supplementary_material": "", "author": "David Dale;Elena Voita;Janice Lam;Prangthip Hansanti;Christophe Ropers;Elahe Kalbassi;Cynthia Gao;Loic Barrault;Marta R. Costa-juss\u00e0", "authorids": "~David_Dale1;~Elena_Voita1;~Janice_Lam1;~Prangthip_Hansanti1;~Christophe_Ropers1;~Elahe_Kalbassi1;~Cynthia_Gao1;~Loic_Barrault1;~Marta_R._Costa-juss\u00e01", "gender": "M;F;F;;;F;F;M;F", "homepage": "https://daviddale.ru/en;https://lena-voita.github.io;;;http://www.chrisropers.net;;https://www.linkedin.com/in/cynthiarfgao/;https://loicbarrault.github.io/;https://www.costa-jussa.com", "dblp": "293/7322;220/4162;;;324/2505;;;86/7823;17/2183", "google_scholar": "4GB_6AcAAAAJ;EcN9o7kAAAAJ;;;;;;https://scholar.google.fr/citations?user=S6Xj1BYAAAAJ;ESqQ7FoAAAAJ", "or_profile": "~David_Dale1;~Elena_Voita1;~Janice_Lam1;~Prangthip_Hansanti1;~Christophe_Ropers1;~Elahe_Kalbassi1;~Cynthia_Gao1;~Loic_Barrault1;~Marta_R._Costa-juss\u00e01", "aff": "FAIR at Meta;University of Edinburgh;;;Syntexys Inc;;;Meta AI;Meta", "aff_domain": "meta.com;ed.ac.uk;;;syntexys.com;;;meta.com;fb.com", "position": "Research Engineer;PhD student;;;Linguist, CRO;;;Researcher;Research Scientist", "bibtex": "@inproceedings{\ndale2023halomi,\ntitle={HalOmi: A Manually Annotated Benchmark for Multilingual Hallucination and Omission Detection in Machine Translation},\nauthor={David Dale and Elena Voita and Janice Lam and Prangthip Hansanti and Christophe Ropers and Elahe Kalbassi and Cynthia Gao and Loic Barrault and Marta R. Costa-juss{\\`a}},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=wYdA8CF94e}\n}", "github": "", "project": "", "reviewers": "XBnn;Kt9P;TKvD", "site": "https://openreview.net/forum?id=wYdA8CF94e", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;5;4", "excitement": "4;4;4", "reproducibility": "3;3;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-2045-6833;;;;;;;0000-0002-0634-6147;", "linkedin": "dale-david/;elena-voita/;janice-lam-5aa51142/;prangthip-hansanti-ba477913/;;ekalbassi;;;", "aff_unique_index": "0;1;2;0;0", "aff_unique_norm": "Meta;University of Edinburgh;Syntexys Inc", "aff_unique_dep": "AI Research;;", "aff_unique_url": "https://ai.facebook.com;https://www.ed.ac.uk;", "aff_unique_abbr": "FAIR;Edinburgh;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;United Kingdom;" }, { "id": "wZKRStVJJe", "title": "Toxicity in chatgpt: Analyzing persona-assigned language models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models (LLMs) have shown incredible capabilities and transcended the natural language processing (NLP) community, with adoption throughout many services like healthcare, therapy, education, and customer service. Since users include people with critical information needs like students or patients engaging with chatbots, the safety of these systems is of prime importance. Legislation has recognized its significance and recently drafted a \"Blueprint For An AI Bill Of Rights\" which calls for domain experts to identify risks and potential impact of AI systems. To this end, we systematically evaluate toxicity in over half a million generations of ChatGPT, a popular dialogue-based LLM. We find that setting the system parameter of ChatGPT by assigning it a persona, say that of the boxer Muhammad Ali, significantly increases the toxicity of generations. Depending on the persona assigned to ChatGPT, its toxicity can increase up to $6\\times$, with outputs engaging in incorrect stereotypes, harmful dialogue, and hurtful opinions. Furthermore, we find concerning patterns where specific entities (e.g., certain races) are targeted more than others ($3\\times$ more) irrespective of the assigned persona, reflecting discriminatory biases in the model. Our findings show that multiple provisions in the legislative blueprint are being violated, and we hope that the broader AI community rethinks the efficacy of current safety guardrails and develops better techniques that lead to robust, safe, and trustworthy AI.", "keywords": "AI Safety;Toxicity analysis;LLMs", "primary_area": "", "supplementary_material": "", "author": "Ameet Deshpande;Vishvak Murahari;Tanmay Rajpurohit;Ashwin Kalyan;Karthik R Narasimhan", "authorids": "~Ameet_Deshpande1;~Vishvak_Murahari1;~Tanmay_Rajpurohit1;~Ashwin_Kalyan6;~Karthik_R_Narasimhan1", "gender": "M;M;M;M;M", "homepage": "https://vishvakmurahari.com/;;http://www.karthiknarasimhan.com;https://ameet-1997.github.io;http://ashwinkalyan.com/", "dblp": "249/5621;;147/0322;220/4337;173/5217", "google_scholar": "Y_NYX7MAAAAJ;B4NztA8AAAAJ;euc0GX4AAAAJ;332L1coAAAAJ;KYHL9aIAAAAJ", "or_profile": "~Vishvak_Murahari1;~Tanmay_Rajpurohit1;~Karthik_R_Narasimhan1;~Ameet_S_Deshpande1;~Ashwin_Kalyan_Vijayakumar1", "aff": "Princeton University;Independent Researcher;Princeton University;Princeton University;Allen Institute for Artificial Intelligence", "aff_domain": "princeton.edu;tanmay.one;princeton.edu;princeton.edu;allenai.org", "position": "PhD student;Researcher;Assistant Professor;PhD student;Research Scientist", "bibtex": "@inproceedings{\ndeshpande2023toxicity,\ntitle={Toxicity in chatgpt: Analyzing persona-assigned language models},\nauthor={Ameet Deshpande and Vishvak Murahari and Tanmay Rajpurohit and Ashwin Kalyan and Karthik R Narasimhan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=wZKRStVJJe}\n}", "github": "", "project": "", "reviewers": "tCh3;QcSQ;85RX", "site": "https://openreview.net/forum?id=wZKRStVJJe", "pdf_size": 0, "rating": "2;2;2", "confidence": "5;5;4", "excitement": "4;4;3", "reproducibility": "5;4;3", "correctness": "5;5;2", "rating_avg": 2.0, "confidence_avg": 4.666666666666667, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";tanmay-rajpurohit-b13942125/;;;", "aff_unique_index": "0;1;0;0;2", "aff_unique_norm": "Princeton University;Independent Researcher;Allen Institute for Artificial Intelligence", "aff_unique_dep": ";;", "aff_unique_url": "https://www.princeton.edu;;https://allenai.org", "aff_unique_abbr": "Princeton;;AI2", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States;" }, { "id": "wZmgpJMdb3", "title": "DocAsRef: An Empirical Study on Repurposing Reference-based Summary Quality Metrics as Reference-free Metrics", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Automated summary quality assessment falls into two categories: reference-based and reference-free. Reference-based metrics, historically deemed more accurate due to the additional information provided by human-written references, are limited by their reliance on human input. In this paper, we hypothesize that the comparison methodologies used by some reference-based metrics to evaluate a system summary against its corresponding reference can be effectively adapted to assess it against its source document, thereby transforming these metrics into reference-free ones. Experimental results support this hypothesis. After being repurposed reference-freely, the zero-shot BERTScore using the pretrained DeBERTa-large-MNLI model of $<$0.5B parameters consistently outperforms its original reference-based version across various aspects on the SummEval and Newsroom datasets. It also excels in comparison to most existing reference-free metrics and closely competes with zero-shot summary evaluators based on GPT-3.5.", "keywords": "summarization;evaluation;zero-shot", "primary_area": "", "supplementary_material": "", "author": "Forrest Sheng Bao;Ruixuan Tu;Ge Luo;Yinfei Yang;Hebi Li;Minghui Qiu;Youbiao He;Cen Chen", "authorids": "~Forrest_Sheng_Bao1;~Ruixuan_Tu1;~Ge_Luo1;~Yinfei_Yang1;~Hebi_Li1;~Minghui_Qiu1;~Youbiao_He1;~Cen_Chen1", "gender": "M;Not Specified;M;;Unspecified;M;M;F", "homepage": "https://forrestbao.github.io;https://turx.asia/acad;;;https://lihebi.com;https://sites.google.com/site/qiumh0727/;;https://sites.google.com/site/chencenpersonalwebsite/", "dblp": "98/5980.html;;131/4164-2;117/4082;241/9673.html;132/3541;191/4569;152/6215-1.html", "google_scholar": "XWpFf0IAAAAJ;V6hzHoQAAAAJ;;kvDbu90AAAAJ;klBF60oAAAAJ;https://scholar.google.com.sg/citations?user=xcqJyMgAAAAJ;1kVtkLgAAAAJ;https://scholar.google.com.sg/citations?user=3Mn4S9UAAAAJ", "or_profile": "~Forrest_Sheng_Bao1;~Ruixuan_Tu1;~Ge_Luo1;~Yinfei_Yang1;~Hebi_Li1;~Minghui_Qiu1;~Youbiao_He1;~Cen_Chen1", "aff": "Iowa State University;University of Wisconsin - Madison;Iowa State University;Apple;Iowa State University;ByteDance;Iowa State University;East China Normal University", "aff_domain": "iastate.edu;cs.wisc.edu;iastate.edu;apple.com;iastate.edu;bytedance.com;iastate.edu;dase.ecnu.edu.cn", "position": "Assistant Professor;Undergrad student;PhD student;Researcher;PhD student;Researcher;PhD student;Associate Professor", "bibtex": "@inproceedings{\nbao2023docasref,\ntitle={DocAsRef: An Empirical Study on Repurposing Reference-based Summary Quality Metrics as Reference-free Metrics},\nauthor={Forrest Sheng Bao and Ruixuan Tu and Ge Luo and Yinfei Yang and Hebi Li and Minghui Qiu and Youbiao He and Cen Chen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=wZmgpJMdb3}\n}", "github": "", "project": "", "reviewers": "Je5A;2AuP;j2Me", "site": "https://openreview.net/forum?id=wZmgpJMdb3", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;3", "excitement": "3;3;3", "reproducibility": "5;3;3", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-0013-934X;0000-0003-2136-9774;;;;;0000-0003-0325-1705", "linkedin": ";rxtu/;;;;;;", "aff_unique_index": "0;1;0;2;0;3;0;4", "aff_unique_norm": "Iowa State University;University of Wisconsin-Madison;Apple;ByteDance;East China Normal University", "aff_unique_dep": ";;Apple Inc.;;", "aff_unique_url": "https://www.iastate.edu;https://www.wisc.edu;https://www.apple.com;https://www.bytedance.com;http://www.ecnu.edu.cn", "aff_unique_abbr": "ISU;UW-Madison;Apple;ByteDance;ECNU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Madison", "aff_country_unique_index": "0;0;0;0;0;1;0;1", "aff_country_unique": "United States;China" }, { "id": "wcgfB88Slx", "title": "Explanation Selection Using Unlabeled Data for Chain-of-Thought Prompting", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recent work has shown how to prompt large language models with explanations to obtain strong performance on textual reasoning tasks, i.e., the chain-of-thought paradigm. However, subtly different explanations can yield widely varying downstream task accuracy. Explanations that have not been \"tuned\" for a task, such as off-the-shelf explanations written by non-experts, may lead to mediocre performance. This paper tackles the problem of how to optimize explanation-infused prompts in a blackbox fashion. We first generate sets of candidate explanations for each example in the prompt using a leave-one-out scheme, then find an effective combination of these explanations with a two-stage framework. We first evaluate explanations for each in-context example in isolation according to two proxy metrics, log likelihood and accuracy on new examples. Then, we search over combinations of explanations to find one that yields high performance against a silver-labeled development set. Across four textual reasoning tasks spanning question answering, mathematical reasoning, and natural language inference, results show that our proxy metrics correlate with ground truth accuracy and our overall method can effectively improve prompts over crowdworker annotations and naive search strategies", "keywords": "Reasoning; In-Context Learning; Chain-of-Thought; Prompting", "primary_area": "", "supplementary_material": "", "author": "Xi Ye;Greg Durrett", "authorids": "~Xi_Ye2;~Greg_Durrett1", "gender": ";M", "homepage": "https://xiye17.github.io/;http://www.cs.utexas.edu/~gdurrett/", "dblp": ";69/7968", "google_scholar": "qH83GlAAAAAJ;https://scholar.google.com.tw/citations?user=EpQ_sDEAAAAJ", "or_profile": "~Xi_Ye2;~Greg_Durrett1", "aff": "UT Austin;University of Texas, Austin", "aff_domain": "cs.utexas.edu;utexas.edu", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nye2023explanation,\ntitle={Explanation Selection Using Unlabeled Data for Chain-of-Thought Prompting},\nauthor={Xi Ye and Greg Durrett},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=wcgfB88Slx}\n}", "github": "", "project": "", "reviewers": "T61D;j9eL;cVLb", "site": "https://openreview.net/forum?id=wcgfB88Slx", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "3;4;4", "reproducibility": "4;4;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "University of Texas at Austin", "aff_unique_dep": "", "aff_unique_url": "https://www.utexas.edu", "aff_unique_abbr": "UT Austin", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Austin", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "wcqBfk4jv6", "title": "Improving Biomedical Abstractive Summarisation with Knowledge Aggregation from Citation Papers", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Abstracts derived from biomedical literature possess distinct domain-specific characteristics, including specialised writing styles and biomedical terminologies, which necessitate a deep understanding of the related literature. As a result, existing language models struggle to generate technical summaries that are on par with those produced by biomedical experts, given the absence of domain-specific background knowledge. This paper aims to enhance the performance of language models in biomedical abstractive summarisation by aggregating knowledge from external papers cited within the source article. We propose a novel attention-based citation aggregation model that integrates domain-specific knowledge from citation papers, allowing neural networks to generate summaries by leveraging both the paper content and relevant knowledge from citation papers. Furthermore, we construct and release a large-scale biomedical summarisation dataset that serves as a foundation for our research. Extensive experiments demonstrate that our model outperforms state-of-the-art approaches and achieves substantial improvements in abstractive biomedical text summarisation.", "keywords": "Biomedical Text Summarisation;Abstractive Summarisation;Knowledge Aggregation;Citation Graph", "primary_area": "", "supplementary_material": "", "author": "Chen Tang;Shun Wang;Tomas Goldsack;Chenghua Lin", "authorids": "~Chen_Tang5;~Shun_Wang1;~Tomas_Goldsack1;~Chenghua_Lin1", "gender": ";M;M;", "homepage": ";;https://tgoldsack1.github.io/;", "dblp": ";;;", "google_scholar": ";clkFQwgAAAAJ;SpGQaT0AAAAJ;", "or_profile": "~Chen_Tang5;~Shun_Wang1;~Tomas_Goldsack1;~Chenghua_Lin1", "aff": ";University of Sheffield;University of Sheffield;", "aff_domain": ";sheffield.ac.uk;sheffield.ac.uk;", "position": ";PhD student;PhD student;", "bibtex": "@inproceedings{\ntang2023improving,\ntitle={Improving Biomedical Abstractive Summarisation with Knowledge Aggregation from Citation Papers},\nauthor={Chen Tang and Shun Wang and Tomas Goldsack and Chenghua Lin},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=wcqBfk4jv6}\n}", "github": "", "project": "", "reviewers": "u9sN;bXTQ;AoVu", "site": "https://openreview.net/forum?id=wcqBfk4jv6", "pdf_size": 0, "rating": "5;5;5", "confidence": "2;3;4", "excitement": "3;3;4", "reproducibility": "4;3;5", "correctness": "4;4;5", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 4.333333333333333, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-2205-8193;", "linkedin": ";shun-wang-5454bb192/;tomas-goldsack-729190152/;", "aff_unique_index": "0;0", "aff_unique_norm": "University of Sheffield", "aff_unique_dep": "", "aff_unique_url": "https://www.sheffield.ac.uk", "aff_unique_abbr": "Sheffield", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "United Kingdom" }, { "id": "wiI8ycNfgJ", "title": "LLM-FP4: 4-Bit Floating-Point Quantized Transformers", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We propose LLM-FP4 for quantizing both weights and activations in large language models (LLMs) down to 4-bit floating-point values, in a post-training manner. Existing post-training quantization (PTQ) solutions are primarily integer-based and struggle with bit widths below 8 bits. Compared to integer quantization, floating-point (FP) quantization is more flexible and can better handle long-tail or bell-shaped distributions, and it has emerged as a default choice in many hardware platforms. One characteristic of FP quantization is that its performance largely depends on the choice of exponent bits and clipping range. In this regard, we construct a strong FP-PTQ baseline by searching for the optimal quantization parameters. Furthermore, we observe a high inter-channel variance and low intra-channel variance pattern in activation distributions, which adds activation quantization difficulty. We recognize this pattern to be consistent across a spectrum of transformer models designed for diverse tasks such as LLMs, BERT, and Vision Transformer models. To tackle this, we propose per-channel activation quantization and show that these additional scaling factors can be reparameterized as exponential biases of weights, incurring a negligible cost. Our method, for the first time, can quantize both weights and activations in the LLaMA-13B to only 4-bit and achieves an average score of 63.1 on the common sense zero-shot reasoning tasks, which is only 5.8 lower than the full-precision model, significantly outperforming the previous state-of-the-art by 12.7 points. Code is available at: https://github.com/nbasyl/LLM-FP4.", "keywords": "Model Compression;Model Quantization", "primary_area": "", "supplementary_material": "", "author": "Shih-yang Liu;Zechun Liu;Xijie Huang;Pingcheng Dong;Kwang-Ting Cheng", "authorids": "~Shih-yang_Liu1;~Zechun_Liu1;~Xijie_Huang1;~Pingcheng_Dong2;~Kwang-Ting_Cheng1", "gender": "M;;M;M;", "homepage": "https://vsdl.hkust.edu.hk/people.html;;https://huangowen.github.io/;https://pingchengdong.github.io/;", "dblp": ";;230/4412;309/0566;", "google_scholar": "eBXRoDgAAAAJ;;nFW2mqwAAAAJ;YOUR_GOOGLE_SCHOLAR_ID;", "or_profile": "~Shih-yang_Liu1;~Zechun_Liu1;~Xijie_Huang1;~Pingcheng_Dong2;~Kwang-Ting_Cheng1", "aff": "NVIDIA;;Microsoft Research;Hong Kong University of Science and Technology;", "aff_domain": "nvidia.com;;microsoft.com;ust.hk;", "position": "Intern;;Intern;PhD student;", "bibtex": "@inproceedings{\nliu2023llmfp,\ntitle={{LLM}-{FP}4: 4-Bit Floating-Point Quantized Transformers},\nauthor={Shih-yang Liu and Zechun Liu and Xijie Huang and Pingcheng Dong and Kwang-Ting Cheng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=wiI8ycNfgJ}\n}", "github": "", "project": "", "reviewers": "EuFY;KRMt;SAuf;v99B", "site": "https://openreview.net/forum?id=wiI8ycNfgJ", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "3;4;4;2", "excitement": "4;3;3;3", "reproducibility": "4;3;2;2", "correctness": "3;4;2;3", "rating_avg": 5.0, "confidence_avg": 3.25, "excitement_avg": 3.25, "reproducibility_avg": 2.75, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-1997-0843;;;;", "linkedin": ";;huang-xijie-4224371b8/;;", "aff_unique_index": "0;1;2", "aff_unique_norm": "NVIDIA;Microsoft;Hong Kong University of Science and Technology", "aff_unique_dep": "NVIDIA Corporation;Microsoft Research;", "aff_unique_url": "https://www.nvidia.com;https://www.microsoft.com/en-us/research;https://www.ust.hk", "aff_unique_abbr": "NVIDIA;MSR;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;1", "aff_country_unique": "United States;China" }, { "id": "wirDXDQwYZ", "title": "Pragmatic Reasoning Unlocks Quantifier Semantics for Foundation Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Generalized quantifiers (e.g., $\\textit{few}$, $\\textit{most}$) are used to indicate the proportions predicates satisfy (for example, $\\textit{some}$ apples are red). One way to interpret quantifier semantics is to explicitly bind these satisfactions with percentage scopes (e.g., 30%-40% of apples are red). This approach can be helpful for tasks like logic formalization and surface-form quantitative reasoning (Gordon and Schubert, 2010; Roy et al., 2015). However, it remains unclear if recent foundation models (Bommasani et al., 2021) possess this ability due to the absence of direct training signals. To explore this, we introduce QuRe, a crowd-sourced dataset of human-annotated generalized quantifiers in Wikipedia sentences featuring percentage-equipped predicates. We explore quantifier comprehension using PRESQUE, a framework that combines natural language inference and the Rational Speech Acts framework. Experimental results on the HVD dataset (Herbelot and Vecchi, 2015) and QuRe demonstrate PRESQUE's superiority over a literal listener baseline, showing a 20% relative improvement in F1 in predicting percentage scopes for quantifiers, even with no additional training.", "keywords": "Pragmatic Reasoning;Rational Speech Act;Quantifier Understanding;Generalized Quantifiers", "primary_area": "", "supplementary_material": "", "author": "Yiyuan Li;Rakesh R Menon;Sayan Ghosh;Shashank Srivastava", "authorids": "~Yiyuan_Li1;~Rakesh_R_Menon3;~Sayan_Ghosh2;~Shashank_Srivastava1", "gender": ";M;M;M", "homepage": "https://nativeatom.github.io/;https://sgdgp.github.io/;https://www.ssriva.com/;https://cs.unc.edu/~rrmenon", "dblp": "14/5062;http://dblp.uni-trier.de/pers/hd/g/Ghosh_0002:Sayan;;206/6504.html", "google_scholar": "XdQcrwUAAAAJ;https://scholar.google.com/citations?hl=en;-vKI5s0AAAAJ;GyFb98kAAAAJ", "or_profile": "~Yiyuan_Li1;~Sayan_Ghosh2;~Shashank_Srivastava1;~Rakesh_R_Menon2", "aff": "Carnegie Mellon University;Department of Computer Science, University of North Carolina, Chapel Hill;University of North Carolina at Chapel Hill;Department of Computer Science, University of North Carolina, Chapel Hill", "aff_domain": "cmu.edu;cs.unc.edu;unc.edu;cs.unc.edu", "position": "MS student;PhD student;Assistant Professor;PhD student", "bibtex": "@inproceedings{\nli2023pragmatic,\ntitle={Pragmatic Reasoning Unlocks Quantifier Semantics for Foundation Models},\nauthor={Yiyuan Li and Rakesh R Menon and Sayan Ghosh and Shashank Srivastava},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=wirDXDQwYZ}\n}", "github": "", "project": "", "reviewers": "PLvc;VNWH;dNQH", "site": "https://openreview.net/forum?id=wirDXDQwYZ", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;2;3", "excitement": "4;4;4", "reproducibility": "4;3;3", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;", "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Carnegie Mellon University;University of North Carolina", "aff_unique_dep": ";Department of Computer Science", "aff_unique_url": "https://www.cmu.edu;https://www.unc.edu", "aff_unique_abbr": "CMU;UNC", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Chapel Hill", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "wnE8wDd61Z", "title": "Knowledge Graph Compression Enhances Diverse Commonsense Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Generating commonsense explanations requires reasoning about commonsense knowledge beyond what is explicitly mentioned in the context. Existing models use commonsense knowledge graphs such as ConceptNet to extract a subgraph of relevant knowledge pertaining to concepts in the input. However, due to the large coverage and, consequently, vast scale of ConceptNet, the extracted subgraphs may contain loosely related, redundant and irrelevant information, which can introduce noise into the model. We propose to address this by applying a differentiable graph compression algorithm that focuses on the relevant knowledge for the task. The compressed subgraphs yield considerably more diverse outputs when incorporated into models for the tasks of generating commonsense and abductive explanations. Moreover, our model achieves better quality-diversity tradeoff than a large language model with 100 times the number of parameters. Our generic approach can be applied to additional NLP tasks that can benefit from incorporating external knowledge.", "keywords": "commonsense generation;knowledge graph compression", "primary_area": "", "supplementary_material": "", "author": "EunJeong Hwang;Veronika Thost;Vered Shwartz;Tengfei Ma", "authorids": "~EunJeong_Hwang1;~Veronika_Thost1;~Vered_Shwartz1;~Tengfei_Ma1", "gender": "F;F;F;M", "homepage": "https://eujhwang.github.io/;https://mitibmwatsonailab.mit.edu/people/veronika-thost/;https://www.cs.ubc.ca/~vshwartz/;https://sites.google.com/site/matf0123/", "dblp": ";132/3874;166/2038;94/9023-1", "google_scholar": "Z0TA4NEAAAAJ;TyScgJ0AAAAJ;bbe4ResAAAAJ;9OvNakkAAAAJ", "or_profile": "~EunJeong_Hwang1;~Veronika_Thost1;~Vered_Shwartz1;~Tengfei_Ma1", "aff": "University of British Columbia;IBM Research;University of British Columbia;International Business Machines", "aff_domain": "cs.ubc.ca;ibm.com;ubc.ca;ibm.com", "position": "PhD student;Research Scientist;Assistant Professor;Researcher", "bibtex": "@inproceedings{\nhwang2023knowledge,\ntitle={Knowledge Graph Compression Enhances Diverse Commonsense Generation},\nauthor={EunJeong Hwang and Veronika Thost and Vered Shwartz and Tengfei Ma},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=wnE8wDd61Z}\n}", "github": "", "project": "", "reviewers": "1hNR;BbGf;wZ5V", "site": "https://openreview.net/forum?id=wnE8wDd61Z", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "4;3;3", "reproducibility": "4;3;3", "correctness": "4;2;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-4984-1532;;0000-0002-1086-529X", "linkedin": ";;vered-shwartz-99548633/;", "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of British Columbia;IBM;International Business Machines Corporation", "aff_unique_dep": ";IBM Research;", "aff_unique_url": "https://www.ubc.ca;https://www.ibm.com/research;https://www.ibm.com", "aff_unique_abbr": "UBC;IBM;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;1", "aff_country_unique": "Canada;United States" }, { "id": "wpjRa3d9OJ", "title": "Temporal Knowledge Graph Forecasting Without Knowledge Using In-Context Learning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Temporal knowledge graph (TKG) forecasting benchmarks challenge models to predict future facts using knowledge of past facts.\nIn this paper, we develop an approach to use in-context learning (ICL) with large language models (LLMs) for TKG forecasting.\nOur extensive evaluation compares diverse baselines, including both simple heuristics and state-of-the-art (SOTA) supervised models, against pre-trained LLMs across several popular benchmarks and experimental settings.\nWe observe that naive LLMs perform on par with SOTA models, which employ carefully designed architectures and supervised training for the forecasting task, falling within the (-3.6\\%, +1.5\\%) Hits@1 margin relative to the median performance.\nTo better understand the strengths of LLMs for forecasting, we explore different approaches for selecting historical facts, constructing prompts, controlling information propagation, and parsing outputs into a probability distribution.\nA surprising finding from our experiments is that LLM performance endures ($\\pm$0.4\\% Hit@1) even when semantic information is removed by mapping entities/relations to arbitrary numbers, suggesting that prior semantic knowledge is unnecessary; rather, LLMs can leverage the symbolic patterns in the context to achieve such a strong performance. \nOur analysis also reveals that ICL enables LLMs to learn irregular patterns from the historical context, going beyond frequency and recency biases", "keywords": "Temporal Knowledge Graph;In-context Learning;Large Language Model", "primary_area": "", "supplementary_material": "", "author": "Dong-Ho Lee;Kian Ahrabian;Woojeong Jin;Fred Morstatter;Jay Pujara", "authorids": "~Dong-Ho_Lee1;~Kian_Ahrabian1;~Woojeong_Jin1;~Fred_Morstatter1;~Jay_Pujara1", "gender": "M;M;;;", "homepage": "https://danny-lee.info;;https://woojeongjin.github.io;http://fred.science;https://www.jaypujara.org", "dblp": ";211/6774;194/4234;51/9687;65/10103", "google_scholar": "oei2TXwAAAAJ;pwUdiCYAAAAJ;;;yvdSr4AAAAAJ", "or_profile": "~Dong-Ho_Lee1;~Kian_Ahrabian1;~Woojeong_Jin1;~Fred_Morstatter1;~Jay_Pujara1", "aff": "Snap Inc.;University of Southern California;University of Southern California;USC/ISI;University of Southern California", "aff_domain": "snapchat.com;usc.edu;usc.edu;isi.edu;usc.edu", "position": "Intern;PhD student;PhD student;Research Scientist;Assistant Professor", "bibtex": "@inproceedings{\nlee2023temporal,\ntitle={Temporal Knowledge Graph Forecasting Without Knowledge Using In-Context Learning},\nauthor={Dong-Ho Lee and Kian Ahrabian and Woojeong Jin and Fred Morstatter and Jay Pujara},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=wpjRa3d9OJ}\n}", "github": "", "project": "", "reviewers": "KePy;rjDk;MDqV", "site": "https://openreview.net/forum?id=wpjRa3d9OJ", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;5", "excitement": "4;3;4", "reproducibility": "4;4;4", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0001-6921-1744", "linkedin": ";kahrabian/;;;pujara", "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Snap Inc.;University of Southern California", "aff_unique_dep": ";", "aff_unique_url": "https://www.snapinc.com;https://www.usc.edu", "aff_unique_abbr": "Snap;USC", "aff_campus_unique_index": "1;1;2;1", "aff_campus_unique": ";Los Angeles;ISI", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "wpsbUYi9nN", "title": "Large Language Models Know Your Contextual Search Intent: A Prompting Framework for Conversational Search", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Precisely understanding users' contextual search intent has been an important challenge for conversational search. As conversational search sessions are much more diverse and long-tailed, existing methods trained on limited data still show unsatisfactory effectiveness and robustness to handle real conversational search scenarios. Recently, large language models (LLMs) have demonstrated amazing capabilities for text generation and conversation understanding. In this work, we present a simple yet effective prompting framework, called LLM4CS, to leverage LLMs as a text-based search intent interpreter to help conversational search. Under this framework, we explore three prompting methods to generate multiple query rewrites and hypothetical responses, and propose to aggregate them into an integrated representation that can robustly represent the user's real contextual search intent. Extensive automatic evaluations and human evaluations on three widely used conversational search benchmarks, including CAsT-19, CAsT-20, and CAsT-21, demonstrate the remarkable performance of our simple LLM4CS framework compared with existing methods and even using human rewrites. Our findings provide important evidence to better understand and leverage LLMs for conversational search.", "keywords": "Conversational search; passage retrieval; large language models; contextual search intent understanding", "primary_area": "", "supplementary_material": "", "author": "Kelong Mao;Zhicheng Dou;Fengran Mo;Jiewen Hou;Haonan Chen;Hongjin Qian", "authorids": "~Kelong_Mao1;~Zhicheng_Dou1;~Fengran_Mo1;~Jiewen_Hou2;~Haonan_Chen5;~Hongjin_Qian1", "gender": ";;M;M;;M", "homepage": "https://kyriemao.github.io;https://playbigdata.ruc.edu.cn/dou;https://fengranmark.github.io/;https://github.com/jefferson814/;https://haon-chen.github.io/;https://qhjqhj00.github.io", "dblp": "270/6458;18/5740;278/7940;;121/7527-5;275/2898", "google_scholar": "SXAurKsAAAAJ;ChCjAAwAAAAJ;https://scholar.google.com/citations?hl=en;;;u9uPuxsAAAAJ", "or_profile": "~Kelong_Mao1;~Zhicheng_Dou1;~Fengran_Mo1;~Jiewen_Hou2;~Haonan_Chen5;~Hongjin_Qian1", "aff": "Renmin University of China;Renmin University of China;Universit\u00e9 de Montr\u00e9al;;Renmin University of China;Renmin University of China", "aff_domain": "ruc.edu.cn;ruc.edu.cn;umontreal.ca;;ruc.edu.cn;ruc.edu.cn", "position": "PhD student;Full Professor;PhD student;;PhD student;PhD student", "bibtex": "@inproceedings{\nmao2023large,\ntitle={Large Language Models Know Your Contextual Search Intent: A Prompting Framework for Conversational Search},\nauthor={Kelong Mao and Zhicheng Dou and Fengran Mo and Jiewen Hou and Haonan Chen and Hongjin Qian},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=wpsbUYi9nN}\n}", "github": "", "project": "", "reviewers": "ARYj;Un6E;Leu2", "site": "https://openreview.net/forum?id=wpsbUYi9nN", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "4;2;2", "reproducibility": "4;3;4", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 5, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-5648-568X;0000-0002-9781-948X;0000-0002-0838-6994;;0000-0001-9812-0438;0000-0003-4011-5673", "linkedin": ";;fengran-mo-7bb771185/;;;", "aff_unique_index": "0;0;1;0;0", "aff_unique_norm": "Renmin University of China;Universit\u00e9 de Montr\u00e9al", "aff_unique_dep": ";", "aff_unique_url": "http://www.ruc.edu.cn;https://www.umontreal.ca", "aff_unique_abbr": "RUC;UdeM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;1;0;0", "aff_country_unique": "China;Canada" }, { "id": "wrBIS6FOfV", "title": "MoqaGPT : Zero-Shot Multi-modal Open-domain Question Answering with Large Language Model", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Multi-modal open-domain question answering typically requires evidence retrieval from databases across diverse modalities, such as images, tables, passages, etc. Even Large Language Models (LLMs) like GPT-4 fall short in this task. To enable LLMs to tackle the task in a zero-shot manner, we introduce MoqaGPT, a straightforward and flexible framework. Using a divide-and-conquer strategy that bypasses intricate multi-modality ranking, our framework can accommodate new modalities and seamlessly transition to new models for the task. Built upon LLMs, MoqaGPT retrieves and extracts answers from each modality separately, then fuses this multi-modal information using LLMs to produce a final answer. Our methodology boosts performance on the MMCoQA dataset, improving F1 by +37.91 points and EM by +34.07 points over the supervised baseline. On the MultiModalQA dataset, MoqaGPT surpasses the zero-shot baseline, improving F1 by 9.5 points and EM by 10.1 points, and significantly closes the gap with supervised methods. Our codebase is available at https://github.com/lezhang7/MOQAGPT.", "keywords": "Large Language Model;Multimodal;Open-domain question answering", "primary_area": "", "supplementary_material": "", "author": "Le Zhang;Yihong Wu;Fengran Mo;Jian-Yun Nie;Aishwarya Agrawal", "authorids": "~Le_Zhang6;~Yihong_Wu6;~Fengran_Mo1;~Jian-Yun_Nie1;~Aishwarya_Agrawal1", "gender": "M;M;M;M;F", "homepage": "https://lezhang7.github.io/;;https://fengranmark.github.io/;http://rali.iro.umontreal.ca/nie-site/jian-yun-nie-en/;https://www.iro.umontreal.ca/~agrawal/", "dblp": "03/4043-12;;278/7940;n/JianYunNie;163/2109.html", "google_scholar": "NqbBXAsAAAAJ;MBsQnu4AAAAJ;https://scholar.google.com/citations?hl=en;W7uYg0UAAAAJ;znH6xJ8AAAAJ", "or_profile": "~Le_Zhang6;~Yihong_Wu6;~Fengran_Mo1;~Jian-Yun_Nie1;~Aishwarya_Agrawal1", "aff": "Mila - Quebec AI Institute & Universit\u00e9 de Montr\u00e9al;Universit\u00e9 de Montr\u00e9al;Universit\u00e9 de Montr\u00e9al;University of Montreal;Google DeepMind", "aff_domain": "mila.umontreal.ca;umontreal.ca;umontreal.ca;umontreal.ca;google.com", "position": "MS student;PhD student;PhD student;Full Professor;Research Scientist ", "bibtex": "@inproceedings{\nzhang2023moqagpt,\ntitle={Moqa{GPT} : Zero-Shot Multi-modal Open-domain Question Answering with Large Language Model},\nauthor={Le Zhang and Yihong Wu and Fengran Mo and Jian-Yun Nie and Aishwarya Agrawal},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=wrBIS6FOfV}\n}", "github": "", "project": "", "reviewers": "LC2A;PCcG;fDtg", "site": "https://openreview.net/forum?id=wrBIS6FOfV", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "3;3;4", "reproducibility": "4;4;4", "correctness": "4;4;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0009-2680-4107;0000-0002-0838-6994;;", "linkedin": ";;fengran-mo-7bb771185/;;", "aff_unique_index": "0;0;0;1;2", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;University of Montreal;Google", "aff_unique_dep": "Quebec AI Institute;;Google DeepMind", "aff_unique_url": "https://www.umontreal.ca;https://wwwumontreal.ca;https://deepmind.com", "aff_unique_abbr": "UdeM;UM;DeepMind", "aff_campus_unique_index": "0", "aff_campus_unique": "Montreal;", "aff_country_unique_index": "0;0;0;0;1", "aff_country_unique": "Canada;United Kingdom" }, { "id": "wtqb7pNL4e", "title": "Can ChatGPT Assess Human Personalities? A General Evaluation Framework", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large Language Models (LLMs) especially ChatGPT have produced impressive results in various areas, but their potential human-like psychology is still largely unexplored. Existing works study the virtual personalities of LLMs but rarely explore the possibility of analyzing human personalities via LLMs. This paper presents a generic evaluation framework for LLMs to assess human personalities based on Myers\u2013Briggs Type Indicator (MBTI) tests. Specifically, we first devise unbiased prompts by randomly permuting options in MBTI questions and adopt the average testing result to encourage more impartial answer generation. Then, we propose to replace the subject in question statements to enable flexible queries and assessments on different subjects from LLMs. Finally, we re-formulate the question instructions in a manner of correctness evaluation to facilitate LLMs to generate clearer responses. The proposed framework enables LLMs to flexibly assess personalities of different groups of people. We further propose three evaluation metrics to measure the consistency, robustness, and fairness of assessment results from state-of-the-art LLMs including ChatGPT and GPT-4. Our experiments reveal ChatGPT's ability to assess human personalities, and the average results demonstrate that it can achieve more consistent and fairer assessments in spite of lower robustness against prompt biases compared with InstructGPT.", "keywords": "Human personality assessment;Large language models;ChatGPT;Myers\u2013Briggs Type Indicator", "primary_area": "", "supplementary_material": "", "author": "Haocong Rao;Cyril Leung;Chunyan Miao", "authorids": "~Haocong_Rao1;~Cyril_Leung1;~Chunyan_Miao1", "gender": ";M;F", "homepage": ";;http://www.ntulily.org/ascymiao/", "dblp": ";76/3131;m/ChunyanMiao", "google_scholar": ";;https://scholar.google.com.tw/citations?user=fmXGRJgAAAAJ", "or_profile": "~Haocong_Rao1;~Cyril_Leung1;~Chunyan_Miao1", "aff": ";University of British Columbia;School of Computer Science and Engineering, Nanyang Technological University", "aff_domain": ";ubc.ca;scse.ntu.edu.sg", "position": ";Full Professor;Full Professor", "bibtex": "@inproceedings{\nrao2023can,\ntitle={Can Chat{GPT} Assess Human Personalities? A General Evaluation Framework},\nauthor={Haocong Rao and Cyril Leung and Chunyan Miao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=wtqb7pNL4e}\n}", "github": "", "project": "", "reviewers": "cmK8;4CLG;k717", "site": "https://openreview.net/forum?id=wtqb7pNL4e", "pdf_size": 0, "rating": "2;2;2", "confidence": "1;4;4", "excitement": "4;5;3", "reproducibility": "3;4;4", "correctness": "3;4;3", "rating_avg": 2.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-0300-3448", "linkedin": ";;", "aff_unique_index": "0;1", "aff_unique_norm": "University of British Columbia;Nanyang Technological University", "aff_unique_dep": ";School of Computer Science and Engineering", "aff_unique_url": "https://www.ubc.ca;https://www.ntu.edu.sg", "aff_unique_abbr": "UBC;NTU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Canada;Singapore" }, { "id": "wwm55qcNdK", "title": "SoulChat: Improving LLMs' Empathy, Listening, and Comfort Abilities through Fine-tuning with Multi-turn Empathy Conversations", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Large language models (LLMs) have been widely applied in various fields due to their excellent capability for memorizing knowledge and chain of thought (CoT). When these language models are applied in the field of psychological counseling, they often rush to provide universal advice. However, when users seek psychological support, they need to gain empathy, trust, understanding and comfort, rather than just reasonable advice. To this end, we constructed a multi-turn empathetic conversation dataset of more than 2 million samples, in which the input is the multi-turn conversation context, and the target is empathetic responses that cover expressions such as questioning, comfort, recognition, listening, trust, emotional support, etc. Experiments have shown that the empathy ability of LLMs can be significantly enhanced when finetuning by using multi-turn dialogue history and responses that are closer to the expression of a psychological consultant.", "keywords": "Empathy Conversation;Large Language Model;Mental Health AI;Multi-turn Empathetic Conversation Dataset;Psychological Counseling AI", "primary_area": "", "supplementary_material": "", "author": "Yirong Chen;Xiaofen Xing;Jingkai Lin;huimin zheng;Zhenyu Wang;Qi Liu;Xiangmin Xu", "authorids": "~Yirong_Chen1;~Xiaofen_Xing1;~Jingkai_Lin1;~huimin_zheng1;~Zhenyu_Wang9;~Qi_Liu19;~Xiangmin_Xu1", "gender": "M;F;M;M;M;M;M", "homepage": ";https://www2.scut.edu.cn/ft/2021/1102/c29779a449558/page.htm;;https://github.com/jackyLens;;https://drliuqi.github.io/;http://www2.scut.edu.cn/ft/2021/1102/c29779a449591/page.htm", "dblp": ";41/9939.html;;;;95/2446-5.html;28/9939.html", "google_scholar": "aG5aDKEAAAAJ;;;;;ekQx0bIAAAAJ;", "or_profile": "~Yirong_Chen1;~Xiaofen_Xing1;~Jingkai_Lin1;~huimin_zheng1;~Zhenyu_Wang9;~Qi_Liu19;~Xiangmin_Xu1", "aff": "South China University of Technology;South China University of Technology;South China University of Technology;South China University of Technology;South China University of Technology;South China University of Technology;South China University of Technology", "aff_domain": "scut.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn;scut.edu.cn", "position": "PhD student;Associate Professor;Intern;PhD student;Undergrad student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nchen2023soulchat,\ntitle={SoulChat: Improving {LLM}s' Empathy, Listening, and Comfort Abilities through Fine-tuning with Multi-turn Empathy Conversations},\nauthor={Yirong Chen and Xiaofen Xing and Jingkai Lin and huimin zheng and Zhenyu Wang and Qi Liu and Xiangmin Xu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=wwm55qcNdK}\n}", "github": "", "project": "", "reviewers": "1XLJ;sDvK;3WZK", "site": "https://openreview.net/forum?id=wwm55qcNdK", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "4;3;2", "reproducibility": "3;3;3", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-0207-0067;;0000-0003-4044-1604;;0000-0002-9946-1418;0000-0001-5378-6404;", "linkedin": "chenyirong;;;;;;", "aff_unique_index": "0;0;0;0;0;0;0", "aff_unique_norm": "South China University of Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.scut.edu.cn", "aff_unique_abbr": "SCUT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "x2W2dKdNI8", "title": "Selectively Answering Ambiguous Questions", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Trustworthy language models should abstain from answering questions when they do not know the answer. \nHowever, the answer to a question can be unknown for a variety of reasons. \nPrior research has focused on the case in which the question is clear and the answer is unambiguous but possibly unknown.\nHowever, the answer to a question can also be unclear due to uncertainty of the questioner\u2019s intent or context.\nWe investigate question answering from this perspective, focusing on answering a subset of questions with a high degree of accuracy, from a set of questions in which many are inherently ambiguous.\nIn this setting, we find that the most reliable approach to calibration involves quantifying repetition within a set of sampled model outputs, rather than the model's likelihood or self-verification as used in prior work. \nWe find this to be the case across different types of uncertainty, varying model scales and both with or without instruction tuning.\nOur results suggest that sampling-based confidence scores help calibrate answers to relatively unambiguous questions, with more dramatic improvements on ambiguous questions.", "keywords": "question answering;calibration;ambiguity", "primary_area": "", "supplementary_material": "", "author": "Jeremy R. Cole;Michael JQ Zhang;Daniel Gillick;Julian Martin Eisenschlos;Bhuwan Dhingra;Jacob Eisenstein", "authorids": "~Jeremy_R._Cole1;~Michael_JQ_Zhang1;~Daniel_Gillick1;~Julian_Martin_Eisenschlos1;~Bhuwan_Dhingra1;~Jacob_Eisenstein1", "gender": "M;M;M;M;M;", "homepage": "https://jrc436.github.io;;https://eisenjulian.github.io/;https://users.cs.duke.edu/~bdhingra/;https://jacobeisenstein.github.io;https://mikejqzhang.github.io/", "dblp": "189/4976;73/7157;262/3990;180/5692;82/2305;301/8020", "google_scholar": "WCzWsG0AAAAJ;LCeRsUcAAAAJ;2uAC2NQAAAAJ;2W2ttrQAAAAJ;Wb_lnjAAAAAJ;https://scholar.google.com/citations?view_op=list_works", "or_profile": "~Jeremy_R._Cole1;~Daniel_Gillick1;~Julian_Martin_Eisenschlos1;~Bhuwan_Dhingra1;~Jacob_Eisenstein1;~Michael_J_Zhang1", "aff": "Google DeepMind;;Universidad Nacional de C\u00f3rdoba;Duke University;Google;University of Texas at Austin", "aff_domain": "google.com;;unc.edu.ar;duke.edu;google.com;utexas.edu", "position": "Researcher;;PhD student;Assistant Professor;Research Scientist;PhD student", "bibtex": "@inproceedings{\ncole2023selectively,\ntitle={Selectively Answering Ambiguous Questions},\nauthor={Jeremy R. Cole and Michael JQ Zhang and Daniel Gillick and Julian Martin Eisenschlos and Bhuwan Dhingra and Jacob Eisenstein},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=x2W2dKdNI8}\n}", "github": "", "project": "", "reviewers": "DXG6;x5aR;RjGt", "site": "https://openreview.net/forum?id=x2W2dKdNI8", "pdf_size": 0, "rating": "5;5;5", "confidence": "1;4;3", "excitement": "4;2;4", "reproducibility": "1;3;4", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-7147-5888;;;;;", "linkedin": "jeremy-cole;;eisenjulian/;;;", "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "Google;Universidad Nacional de C\u00f3rdoba;Duke University;University of Texas at Austin", "aff_unique_dep": "Google DeepMind;;;", "aff_unique_url": "https://deepmind.com;https://www.unc.edu.ar;https://www.duke.edu;https://www.utexas.edu", "aff_unique_abbr": "DeepMind;UNC;Duke;UT Austin", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Mountain View;Austin", "aff_country_unique_index": "0;1;2;2;2", "aff_country_unique": "United Kingdom;Argentina;United States" }, { "id": "x32rlkzM69", "title": "The Past, Present, and Future of Typological Databases in NLP", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Typological information has the potential to be beneficial in the development of NLP models, particularly for low-resource languages. \nUnfortunately, current large-scale typological databases, notably WALS and Grambank, are inconsistent both with each other and with other sources of typological information, such as linguistic grammars. \nSome of these inconsistencies stem from coding errors or linguistic variation, but many of the disagreements are due to the discrete categorical nature of these databases. \nWe shed light on this issue by systematically exploring disagreements across typological databases and resources, and their uses in NLP, covering the past and present.\nWe next investigate the future of such work, offering an argument that a continuous view of typological features is clearly beneficial, echoing recommendations from linguistics.\nWe propose that such a view of typology has significant potential in the future, including in language modeling in low-resource scenarios.", "keywords": "typology;typological feature prediction;large language models", "primary_area": "", "supplementary_material": "", "author": "Emi Baylor;Esther Ploeger;Johannes Bjerva", "authorids": "~Emi_Baylor1;~Esther_Ploeger1;~Johannes_Bjerva1", "gender": "F;;M", "homepage": "https://emibaylor.github.io/;;https://vbn.aau.dk/en/persons/jbjerva", "dblp": ";;148/4464", "google_scholar": ";;F9zlUBcAAAAJ", "or_profile": "~Emi_Baylor1;~Esther_Ploeger1;~Johannes_Bjerva1", "aff": "McGill University;;Aalborg University", "aff_domain": "mcgill.ca;;aau.dk", "position": "MS student;;Associate Professor", "bibtex": "@inproceedings{\nbaylor2023the,\ntitle={The Past, Present, and Future of Typological Databases in {NLP}},\nauthor={Emi Baylor and Esther Ploeger and Johannes Bjerva},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=x32rlkzM69}\n}", "github": "", "project": "", "reviewers": "wc1P;mEdM;PNkm", "site": "https://openreview.net/forum?id=x32rlkzM69", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;2;4", "excitement": "2;2;3", "reproducibility": "2;3;4", "correctness": "2;2;4", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 2.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 2.6666666666666665, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-9512-0739", "linkedin": ";;bjerva/", "aff_unique_index": "0;1", "aff_unique_norm": "McGill University;Aalborg University", "aff_unique_dep": ";", "aff_unique_url": "https://www.mcgill.ca;https://www.aau.dk", "aff_unique_abbr": "McGill;AAU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Canada;Denmark" }, { "id": "x3e1zQ1ub1", "title": "In-Context Demonstration Selection with Cross Entropy Difference", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large language models (LLMs) can use in-context demonstrations to improve performance on zero-shot tasks. However, selecting the best in-context examples is challenging because model performance can vary widely depending on the selected examples. We present a cross-entropy difference (CED) method for selecting in-context demonstrations. Our method is based on the observation that the effectiveness of in-context demonstrations negatively correlates with the perplexity of the test example by a language model that was finetuned on that demonstration. We utilize parameter efficient finetuning to train small models on training data that are used for computing the cross-entropy difference between a test example and every candidate in-context demonstration. This metric is used to rank and select in-context demonstrations independently for each test input. We evaluate our method on a mix-domain dataset that combines 8 benchmarks, representing 4 text generation tasks, showing that CED for in-context demonstration selection can improve performance for a variety of LLMs over baseline selection methods.", "keywords": "in-context learning;data selection;peft", "primary_area": "", "supplementary_material": "", "author": "Dan Iter;Reid Pryzant;Ruochen Xu;Shuohang Wang;Yang Liu;Yichong Xu;Chenguang Zhu", "authorids": "~Dan_Iter1;~Reid_Pryzant1;~Ruochen_Xu2;~Shuohang_Wang1;~Yang_Liu50;~Yichong_Xu1;~Chenguang_Zhu1", "gender": "Not Specified;;M;M;M;M;M", "homepage": "https://daniter-cu.github.io/;;https://xrc10.github.io/;;https://nlp-yang.github.io/;http://xycking.wixsite.com/yichongxu;", "dblp": "63/10689.html;205/3986;188/3515;173/5469.html;;154/6421;48/7536-1.html", "google_scholar": "bg8RrSkAAAAJ;FkufKDgAAAAJ;HTp5S00AAAAJ;mN-IO6wAAAAJ;HxTr-CtMdrsC;sYza2XwAAAAJ;1b2kKWoAAAAJ", "or_profile": "~Dan_Iter1;~Reid_Pryzant1;~Ruochen_Xu2;~Shuohang_Wang1;~Yang_Liu50;~Yichong_Xu1;~Chenguang_Zhu1", "aff": "Microsoft;Microsoft Research;Microsoft Research;Microsoft;Microsoft;Microsoft;Zoom", "aff_domain": "microsoft.com;research.microsoft.com;research.microsoft.com;microsoft.com;microsoft.com;microsoft.com;zoom.us", "position": "Researcher;Researcher;Researcher;Researcher;Researcher;Senior Researcher;Principal Researcher", "bibtex": "@inproceedings{\niter2023incontext,\ntitle={In-Context Demonstration Selection with Cross Entropy Difference},\nauthor={Dan Iter and Reid Pryzant and Ruochen Xu and Shuohang Wang and Yang Liu and Yichong Xu and Chenguang Zhu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=x3e1zQ1ub1}\n}", "github": "", "project": "", "reviewers": "EXPc;uof7;YS4p", "site": "https://openreview.net/forum?id=x3e1zQ1ub1", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "4;3;3", "reproducibility": "4;3;2", "correctness": "4;3;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;", "linkedin": "daniter;;ruochenx/;;;;", "aff_unique_index": "0;0;0;0;0;0;1", "aff_unique_norm": "Microsoft;Zoom Video Communications Inc.", "aff_unique_dep": "Microsoft Corporation;", "aff_unique_url": "https://www.microsoft.com;https://zoom.us", "aff_unique_abbr": "Microsoft;Zoom", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "x6aiktiAl8", "title": "Compressing and Debiasing Vision-Language Pre-Trained Models for Visual Question Answering", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Despite the excellent performance of vision-language pre-trained models (VLPs) on conventional VQA task, they still suffer from two problems: First, VLPs tend to rely on language biases in datasets and fail to generalize to out-of-distribution (OOD) data. Second, they are inefficient in terms of memory footprint and computation. Although promising progress has been made in both problems, most existing works tackle them independently. To facilitate the application of VLP to VQA tasks, it is imperative to jointly study VLP compression and OOD robustness, which, however, has not yet been explored. This paper investigates whether a VLP can be compressed and debiased simultaneously by searching sparse and robust subnetworks. To this end, we systematically study the design of a training and compression pipeline to search the subnetworks, as well as the assignment of sparsity to different modality-specific modules. Our experiments involve 2 VLPs, 2 compression methods, 4 training methods, 2 datasets and a range of sparsity levels. Our results show that there indeed exist sparse and robust subnetworks, which are competitive with the debiased full VLP and clearly outperform the debiasing SoTAs with fewer parameters on OOD datasets VQA-CP v2 and VQA-VS. The codes can be found at https://github.com/PhoebusSi/Compress-Robust-VQA.", "keywords": "visual question answering;out-of-distribution;robustness;debiasing", "primary_area": "", "supplementary_material": "", "author": "Qingyi Si;Yuanxin Liu;Zheng Lin;Peng Fu;Yanan Cao;Weiping Wang", "authorids": "~Qingyi_Si1;~Yuanxin_Liu1;~Zheng_Lin5;~Peng_Fu1;~Yanan_Cao1;~Weiping_Wang4", "gender": "M;M;M;F;M;F", "homepage": "https://phoebussi.github.io/;https://llyx97.github.io/;http://fupeng.ac.cn;;https://teacher.ucas.ac.cn/~0012246;http://people.ucas.edu.cn/~linzheng", "dblp": "227/6822.html;55/5877;185/6822-8;97/5152-1;72/4134-5.html;51/3740-1.html", "google_scholar": "5oH_wMEAAAAJ;https://scholar.google.com/citations?hl=en;;;zH_wmdwAAAAJ;", "or_profile": "~Qingyi_Si1;~Yuanxin_Liu1;~Peng_Fu1;~Yanan_Cao1;~Weiping_Wang4;~zheng_Lin4", "aff": "Institute of Information Engineering, Chinese Academy of Sciences, Beijing, China;Peking University;Institute of Information Engineering, Chinese Academy of Sciences;Institute of Information Engineering, Chinese Academy of Sciences;IIE;Institute of Information Engineering, Chinese Academy of Sciences", "aff_domain": "iie.ac.cn;pku.edu.cn;iie.ac.cn;iie.ac.cn;iie.ac.cn;iie.ac.cn", "position": "PhD student;PhD student;Associate Professor;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nsi2023compressing,\ntitle={Compressing and Debiasing Vision-Language Pre-Trained Models for Visual Question Answering},\nauthor={Qingyi Si and Yuanxin Liu and Zheng Lin and Peng Fu and Yanan Cao and Weiping Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=x6aiktiAl8}\n}", "github": "", "project": "", "reviewers": "rJ51;AWxQ;HrWM;KVsc", "site": "https://openreview.net/forum?id=x6aiktiAl8", "pdf_size": 0, "rating": "5;5;5;5", "confidence": "4;3;2;3", "excitement": "3;3;4;3", "reproducibility": "4;3;4;3", "correctness": "3;3;4;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 3.25, "reproducibility_avg": 3.5, "correctness_avg": 3.5, "replies_avg": 13, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-9899-8566;0000-0003-3534-1094;0000-0002-8618-4992;0000-0002-8432-1658", "linkedin": ";;;;;", "aff_unique_index": "0;1;0;0;2;0", "aff_unique_norm": "Chinese Academy of Sciences;Peking University;Institute of Industrial Engineers", "aff_unique_dep": "Institute of Information Engineering;;", "aff_unique_url": "http://www.cas.cn;http://www.pku.edu.cn;https://www.iie.org", "aff_unique_abbr": "CAS;Peking U;IIE", "aff_campus_unique_index": "0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;0;0;0;1;0", "aff_country_unique": "China;United States" }, { "id": "x7zquRQfoB", "title": "How to Enhance Causal Discrimination of Utterances: A Case on Affective Reasoning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Our investigation into the Affective Reasoning in Conversation (ARC) task highlights the challenge of causal discrimination. Almost all existing models, including large language models (LLMs), excel at capturing semantic correlations within utterance embeddings but fall short in determining the specific causal relationships. To overcome this limitation, we propose the incorporation of \\textit{i.i.d.} noise terms into the conversation process, thereby constructing a structural causal model (SCM). It explores how distinct causal relationships of fitted embeddings can be discerned through independent conditions. To facilitate the implementation of deep learning, we introduce the cogn frameworks to handle unstructured conversation data, and employ an autoencoder architecture to regard the unobservable noise as learnable ``implicit causes.'' Moreover, we curate a synthetic dataset that includes i.i.d. noise. Through comprehensive experiments, we validate the effectiveness and interpretability of our approach. Our code is available in https://github.com/Zodiark-ch/mater-of-our-EMNLP2023-paper.", "keywords": "Causal Discrimination;Conversation;Independent Noise;SCM", "primary_area": "", "supplementary_material": "", "author": "Hang Chen;Xinyu Yang;Jing Luo;Wenjing Zhu", "authorids": "~Hang_Chen3;~Xinyu_Yang2;~Jing_Luo1;~Wenjing_Zhu1", "gender": "M;M;M;M", "homepage": "https://github.com/Zodiark-ch;http://gr.xjtu.edu.cn/web/xyyang;https://chinglohsiu.github.io/;https://github.com/thinkre", "dblp": ";;36/349-7;", "google_scholar": "https://scholar.google.com.hk/citations?user=8-PmU7QAAAAJ;;smEOqJYAAAAJ;", "or_profile": "~Hang_Chen3;~Xinyu_Yang2;~Jing_Luo1;~Wenjing_Zhu1", "aff": "Xi'an Jiaotong University;Xi'an Jiaotong University;;DuXiaoMan", "aff_domain": "xjtu.edu.cn;xjtu.edu.cn;;duxiaoman.com", "position": "PhD student;Full Professor;;Postdoc", "bibtex": "@inproceedings{\nchen2023how,\ntitle={How to Enhance Causal Discrimination of Utterances: A Case on Affective Reasoning},\nauthor={Hang Chen and Xinyu Yang and Jing Luo and Wenjing Zhu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=x7zquRQfoB}\n}", "github": "", "project": "", "reviewers": "28nr;2DCP;5DgJ", "site": "https://openreview.net/forum?id=x7zquRQfoB", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;3", "excitement": "5;3;3", "reproducibility": "3;3;4", "correctness": "4;4;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-9141-174X;;0000-0001-7138-3705;", "linkedin": ";;;", "aff_unique_index": "0;0;1", "aff_unique_norm": "Xi'an Jiao Tong University;Duxiaoman", "aff_unique_dep": ";", "aff_unique_url": "https://www.xjtu.edu.cn;", "aff_unique_abbr": "XJTU;", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China;" }, { "id": "x9BmfezTvD", "title": "Debiasing Made State-of-the-art: Revisiting the Simple Seed-based Weak Supervision for Text Classification", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Recent advances in weakly supervised text classification mostly focus on designing sophisticated methods to turn high-level human heuristics into quality pseudo-labels. In this paper, we revisit the seed matching-based method, which is arguably the simplest way to generate pseudo-labels, and show that its power was greatly underestimated. We show that the limited performance of seed matching is largely due to the label bias injected by the simple seed-match rule, which prevents the classifier from learning reliable confidence for selecting high-quality pseudo-labels. Interestingly, simply deleting the seed words present in the matched input texts can mitigate the label bias and help learn better confidence. Subsequently, the performance achieved by seed matching can be improved significantly, making it on par with or even better than the state-of-the-art. Furthermore, to handle the case when the seed words are not made known, we propose to simply delete the word tokens in the input text randomly with a high deletion ratio. Remarkably, seed matching equipped with this random deletion method can often achieve even better performance than that with seed deletion.", "keywords": "text classification;weak supervision;label noise;label bias", "primary_area": "", "supplementary_material": "", "author": "Chengyu Dong;Zihan Wang;Jingbo Shang", "authorids": "~Chengyu_Dong1;~Zihan_Wang1;~Jingbo_Shang2", "gender": ";M;M", "homepage": "https://www.chengyu-dong.me/;https://zihanwangki.github.io/;https://shangjingbo1226.github.io/", "dblp": "14/3155;152/5077-1;151/3145.html", "google_scholar": "Ppfi7j0AAAAJ;6UWtYZQAAAAJ;0SkFI4MAAAAJ", "or_profile": "~Chengyu_Dong1;~Zihan_Wang1;~Jingbo_Shang2", "aff": "University of California, San Diego;University of California, San Diego;University of California, San Diego", "aff_domain": "ucsd.edu;ucsd.edu;ucsd.edu", "position": "PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\ndong2023debiasing,\ntitle={Debiasing Made State-of-the-art: Revisiting the Simple Seed-based Weak Supervision for Text Classification},\nauthor={Chengyu Dong and Zihan Wang and Jingbo Shang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=x9BmfezTvD}\n}", "github": "", "project": "", "reviewers": "Lv81;wbLQ;PSwo", "site": "https://openreview.net/forum?id=x9BmfezTvD", "pdf_size": 0, "rating": "5;5;5", "confidence": "2;4;2", "excitement": "4;4;4", "reproducibility": "4;3;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of California, San Diego", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsd.edu", "aff_unique_abbr": "UCSD", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "San Diego", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "xCXlOmGimw", "title": "Diversity Enhanced Narrative Question Generation for Storybooks", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Question generation (QG) from a given context can enhance comprehension, engagement, assessment, and overall efficacy in learning or conversational environments. Despite recent advancements in QG, the challenge of enhancing or measuring the diversity of generated questions often remains unaddressed. In this paper, we introduce a multi-question generation model (mQG), which is capable of generating multiple, diverse, and answerable questions by focusing on context and questions. To validate the answerability of the generated questions, we employ a SQuAD 2.0 fine-tuned question answering model, classifying the questions as answerable or not. We train and evaluate mQG on the FairytaleQA dataset, a well-structured QA dataset based on storybooks, with narrative questions. We further apply a zero-shot adaptation on the TellMeWhy and SQuAD1.1 datasets. mQG shows promising results across various evaluation metrics, among strong baselines.", "keywords": "Question Generation;Natural Language Generation;NLP applications", "primary_area": "", "supplementary_material": "", "author": "Hokeun Yoon;JinYeong Bak", "authorids": "~Hokeun_Yoon1;~JinYeong_Bak2", "gender": ";M", "homepage": "https://github.com/hkyoon95;https://nosyu.kr", "dblp": ";22/11519", "google_scholar": ";https://scholar.google.co.kr/citations?user=oYK9Z_IAAAAJ", "or_profile": "~Hokeun_Yoon1;~JinYeong_Bak2", "aff": ";Sungkyunkwan University", "aff_domain": ";skku.edu", "position": ";Assistant Professor", "bibtex": "@inproceedings{\nyoon2023diversity,\ntitle={Diversity Enhanced Narrative Question Generation for Storybooks},\nauthor={Hokeun Yoon and JinYeong Bak},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=xCXlOmGimw}\n}", "github": "", "project": "", "reviewers": "t52J;V2An;h6Ka", "site": "https://openreview.net/forum?id=xCXlOmGimw", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "4;3;5", "reproducibility": "4;4;5", "correctness": "5;2;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-3212-5241", "linkedin": ";jybak/", "aff_unique_index": "0", "aff_unique_norm": "Sungkyunkwan University", "aff_unique_dep": "", "aff_unique_url": "https://www.skku.edu", "aff_unique_abbr": "SKKU", "aff_country_unique_index": "0", "aff_country_unique": "South Korea" }, { "id": "xDfyOL1unK", "title": "NovaCOMET: Open Commonsense Foundation Models with Symbolic Knowledge Distillation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "We present NovaCOMET, an open commonsense knowledge model, that combines the best aspects of knowledge and general task models. Compared to previous knowledge models, NovaCOMET allows open-format relations enabling direct application to reasoning tasks; compared to general task models like Flan-T5, it explicitly centers knowledge, enabling superior performance for commonsense reasoning. \n\nNovaCOMET leverages the knowledge of opaque proprietary models to create an open knowledge pipeline. First, knowledge is symbolically distilled into NovATOMIC, a publicly-releaseddiscrete knowledge graph which can be audited, critiqued, and filtered. Next, we train NovaCOMET on NovATOMIC by fine-tuning an open-source pretrained model. NovaCOMET uses an open-format training objective, replacing the fixed relation sets of past knowledge models, enabling arbitrary structures within the data to serve as inputs or outputs. \n\nThe resulting generation model, optionally augmented with human annotation, matches or exceeds comparable open task models like Flan-T5 on a range of commonsense generation tasks. NovaCOMET serves as a counterexample to the contemporary focus on instruction tuning only, demonstrating a distinct advantage to explicitly modeling commonsense knowledge as well.", "keywords": "Knowledge;Commonsense;Distillation;Model;Symbolic", "primary_area": "", "supplementary_material": "", "author": "Peter West;Ronan Le Bras;Taylor Sorensen;Bill Yuchen Lin;Liwei Jiang;Ximing Lu;Khyathi Chandu;Jack Hessel;Ashutosh Baheti;Chandra Bhagavatula;Yejin Choi", "authorids": "~Peter_West1;~Ronan_Le_Bras1;~Taylor_Sorensen1;~Bill_Yuchen_Lin1;~Liwei_Jiang2;~Ximing_Lu1;~Khyathi_Chandu1;~Jack_Hessel1;~Ashutosh_Baheti1;~Chandra_Bhagavatula1;~Yejin_Choi1", "gender": "M;M;M;M;F;F;;M;M;M;F", "homepage": "https://peterwestai.notion.site/;https://rlebras.github.io/index.html;https://tsor13.github.io;http://yuchenlin.xyz/;https://liweijiang.me;https://gloriaximinglu.github.io/;;https://www.jmhessel.com;https://abaheti95.github.io/;https://www.chandrab.page;https://yejinc.github.io/", "dblp": "179/4587;;294/0706;190/4518;;24/10879;;https://dblp.uni-trier.de/pid/132/5250.html;185/7370;151/3093;89/579-1", "google_scholar": "https://scholar.google.ca/citations?user=9ubCBYwAAAAJ;8dXLDSsAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en;lcPsDgUAAAAJ;https://scholar.google.com/citations?hl=en;;SxQQ1msAAAAJ;36wq_hwAAAAJ;AsgHp14AAAAJ;vhP-tlcAAAAJ", "or_profile": "~Peter_West1;~Ronan_Le_Bras1;~Taylor_Sorensen1;~Bill_Yuchen_Lin1;~Liwei_Jiang2;~Ximing_Lu1;~Khyathi_Chandu1;~Jack_Hessel1;~Ashutosh_Baheti1;~Chandra_Bhagavatula1;~Yejin_Choi1", "aff": "Allen Institute for Artificial Intelligence;Allen Institute for Artificial Intelligence;University of Washington;Allen Institute for Artificial Intelligence;University of Washington;University of Washington;;Allen Institute for Artificial Intelligence;Georgia Institute of Technology;Allen Institute for Artificial Intelligence;Department of Computer Science, University of Washington", "aff_domain": "allenai.org;allenai.org;uw.edu;allenai.org;washington.edu;cs.washington.edu;;allenai.org;gatech.edu;allenai.org;cs.washington.edu", "position": "Intern;Researcher;PhD student;Researcher;PhD student;Undergrad student;;Researcher;PhD student;Researcher;Full Professor", "bibtex": "@inproceedings{\nwest2023novacomet,\ntitle={Nova{COMET}: Open Commonsense Foundation Models with Symbolic Knowledge Distillation},\nauthor={Peter West and Ronan Le Bras and Taylor Sorensen and Bill Yuchen Lin and Liwei Jiang and Ximing Lu and Khyathi Chandu and Jack Hessel and Ashutosh Baheti and Chandra Bhagavatula and Yejin Choi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=xDfyOL1unK}\n}", "github": "", "project": "", "reviewers": "a9Hp;4pDd;eW5y", "site": "https://openreview.net/forum?id=xDfyOL1unK", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "2;4;3", "reproducibility": "4;3;4", "correctness": "3;4;2", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 11, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-3251-3527;;;;;0000-0002-4012-8979;0000-0003-4375-1835;;", "linkedin": ";;sorensen-taylor/;;;;;;ashutoshbaheti/;;", "aff_unique_index": "0;0;1;0;1;1;0;2;0;1", "aff_unique_norm": "Allen Institute for Artificial Intelligence;University of Washington;Georgia Institute of Technology", "aff_unique_dep": ";;", "aff_unique_url": "https://allenai.org;https://www.washington.edu;https://www.gatech.edu", "aff_unique_abbr": "AI2;UW;Georgia Tech", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "xF6ORNff2k", "title": "Adaptive Structure Induction for Aspect-based Sentiment Analysis with Spectral Perspective", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Recently, incorporating structure information (e.g. dependency syntactic tree) can enhance the performance of aspect-based sentiment analysis (ABSA). However, this structure information is obtained from off-the-shelf parsers, which is often sub-optimal and cumbersome. Thus, automatically learning adaptive structures is conducive to solving this problem. In this work, we concentrate on structure induction from pre-trained language models (PLMs) and throw the structure induction into a spectrum perspective to explore the impact of scale information in language representation on structure induction ability. Concretely, the main architecture of our model is composed of commonly used PLMs (e.g. RoBERTa, etc), and a simple yet effective graph structure learning (GSL) module (graph learner + GNNs). Subsequently, we plug in spectral filters with different bands respectively after the PLMs to produce filtered language representations and feed them into the GSL module to induce latent structures. We conduct extensive experiments on three public benchmarks for ABSA. The results and further analyses demonstrate that introducing this spectral approach can shorten Aspects-sentiment Distance (AsD) and be beneficial to structure induction. Even based on such a simple framework, the effects on three datasets can reach SOTA (state of the art) or near SOTA performance. Additionally, our exploration also has the potential to be generalized to other tasks or to bring inspiration to other similar domains.", "keywords": "Sentiment analysis;Spectral analysis;Structure induction", "primary_area": "", "supplementary_material": "", "author": "Hao Niu;Yun Xiong;Xiaosu Wang;Wenjing Yu;Yao Zhang;Zhonglei Guo", "authorids": "~Hao_Niu2;~Yun_Xiong1;~Xiaosu_Wang1;~Wenjing_Yu1;~Yao_Zhang6;~Zhonglei_Guo1", "gender": ";F;M;F;M;M", "homepage": "https://scholar.google.com.sg/citations?user=UHj1UuQAAAAJ&hl=zh-CN;https://dblp.org/pid/67/4330;;https://yiyayybj.github.io/;https://github.com/yzhang1918;https://github.com/gzgith", "dblp": "https://dblp.uni-trier.de/pid/06/10116.html?view=by-year;67/4330;;;57/3892-9;", "google_scholar": "https://scholar.google.com.sg/citations?user=UHj1UuQAAAAJ;;;;UwKOx_IAAAAJ;", "or_profile": "~Hao_Niu2;~Yun_Xiong1;~Xiaosu_Wang1;~Wenjing_Yu1;~Yao_Zhang6;~Zhonglei_Guo1", "aff": "Fudan University;Fudan University;Fudan University;Fudan University;Fudan University;Fudan University", "aff_domain": "fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn;fudan.edu.cn", "position": "PhD student;Full Professor;PhD student;MS student;Postdoc;PhD student", "bibtex": "@inproceedings{\nniu2023adaptive,\ntitle={Adaptive Structure Induction for Aspect-based Sentiment Analysis with Spectral Perspective},\nauthor={Hao Niu and Yun Xiong and Xiaosu Wang and Wenjing Yu and Yao Zhang and Zhonglei Guo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=xF6ORNff2k}\n}", "github": "", "project": "", "reviewers": "sCU9;ubU3;n3xo", "site": "https://openreview.net/forum?id=xF6ORNff2k", "pdf_size": 0, "rating": "4;4;4", "confidence": "5;3;4", "excitement": "4;3;3", "reproducibility": "3;3;3", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-3818-5816;0000-0002-8575-5415;0000-0002-8180-8604;;0000-0003-1481-8826;", "linkedin": ";;;;;", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Fudan University", "aff_unique_dep": "", "aff_unique_url": "https://www.fudan.edu.cn", "aff_unique_abbr": "Fudan", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "xJ3O94DnMZ", "title": "Make Your Decision Convincing! A Unified Two-Stage Framework: Self-Attribution and Decision-Making", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Explaining black-box model behavior with natural language has achieved impressive results in various NLP tasks. Recent research has explored the utilization of subsequences from the input text as a rationale, providing users with evidence to support the model decision. Although existing frameworks excel in generating high-quality rationales while achieving high task performance, they neglect to account for the unreliable link between the generated rationale and model decision. In simpler terms, a model may make correct decisions while attributing wrong rationales, or make poor decisions while attributing correct rationales. To mitigate this issue, we propose a unified two-stage framework known as Self-Attribution and Decision-Making (SADM). Through extensive experiments on five reasoning datasets from the ERASER benchmark, we demonstrate that our framework not only establishes a more reliable link between the generated rationale and model decision but also achieves competitive results in task performance and the quality of rationale. Furthermore, we explore the potential of our framework in semi-supervised scenarios.", "keywords": "rationale;reliable link;two-stage framework", "primary_area": "", "supplementary_material": "", "author": "Yanrui Du;Sendong Zhao;Haochun Wang;Yuhan Chen;rui bai;Zewen Qiang;Muzhen Cai;Bing Qin", "authorids": "~Yanrui_Du1;~Sendong_Zhao2;~Haochun_Wang1;~Yuhan_Chen4;~rui_bai1;~Zewen_Qiang1;~Muzhen_Cai1;~Bing_Qin2", "gender": "M;M;M;;;M;;", "homepage": ";https://sendongzhao.github.io/;http://ir.hit.edu.cn/~hcwang/;;http://ir.hit.edu.cn/~rbai/;https://blog.csdn.net/diligentboy001;;http://ir.hit.edu.cn/~qinb", "dblp": "280/1320;119/6283.html;329/5284;;;;;86/5934.html", "google_scholar": "SpS35C8AAAAJ;ZtIhRvwAAAAJ;https://scholar.google.com.hk/citations?user=olOglGgAAAAJ;;;;;LKnCub0AAAAJ", "or_profile": "~Yanrui_Du1;~Sendong_Zhao2;~Haochun_Wang1;~Yuhan_Chen4;~rui_bai1;~Zewen_Qiang1;~Muzhen_Cai1;~Bing_Qin2", "aff": "Harbin Institute of Technology;Harbin Institute of Technology;Harbin Institute of Technology;;Harbin Institute of Technology;Harbin Institute of Technology;;Harbin Institute of Technology", "aff_domain": "hit.edu.cn;hit.edu.cn;hit.edu.cn;;hit.edu.cn;hit.edu.cn;;hit.edu.cn", "position": "PhD student;Associate Professor;PhD student;;MS student;MS student;;Full Professor", "bibtex": "@inproceedings{\ndu2023make,\ntitle={Make Your Decision Convincing! A Unified Two-Stage Framework: Self-Attribution and Decision-Making},\nauthor={Yanrui Du and Sendong Zhao and Haochun Wang and Yuhan Chen and rui bai and Zewen Qiang and Muzhen Cai and Bing Qin},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=xJ3O94DnMZ}\n}", "github": "", "project": "", "reviewers": "TZrn;WBPQ;AqvU", "site": "https://openreview.net/forum?id=xJ3O94DnMZ", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;4", "excitement": "3;2;3", "reproducibility": "3;3;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;0000-0002-2543-5604", "linkedin": ";;;;;;;", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "Harbin Institute of Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.hit.edu.cn/", "aff_unique_abbr": "HIT", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Harbin", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "xKRg5dfWyv", "title": "Bootstrapping Small \\& High Performance Language Models with Unmasking-Removal Training Policy", "track": "main", "status": "Short Main", "tldr": "", "abstract": "BabyBERTa, a language model trained on small-scale child-directed speech while none of the words are unmasked during training, has been shown to achieve a level of grammaticality comparable to that of RoBERTa-base, which is trained on 6,000 times more words and 15 times more parameters. Relying on this promising result, we explore in this paper the performance of BabyBERTa-based models in downstream tasks, focusing on Semantic Role Labeling (SRL) and two Extractive Question Answering tasks, with the aim of building more efficient systems that rely on less data and smaller models. We investigate the influence of these models both alone and as a starting point to larger pre-trained models, separately examining the contribution of the pre-training data, the vocabulary, and the masking policy on the downstream task performance. Our results show that BabyBERTa trained with unmasking-removal policy is a much stronger starting point for downstream tasks compared to the use of RoBERTa masking policy when 10M words are used for training and that this tendency persists, although to a lesser extent, when adding more training data.", "keywords": "language models;efficient pre-training;masking policy", "primary_area": "", "supplementary_material": "", "author": "Yahan Yang;Elior Sulem;Insup Lee;Dan Roth", "authorids": "~Yahan_Yang1;~Elior_Sulem1;~Insup_Lee1;~Dan_Roth3", "gender": "F;M;;M", "homepage": "https://www.linkedin.com/in/yahan-yang-3637021a3/;https://www.eliorsulem.com;https://www.cis.upenn.edu/~lee/;https://www.cis.upenn.edu/~danroth/", "dblp": "131/7592.html;220/2087;l/InsupLee.html;r/DanRoth", "google_scholar": "E5CWhTAAAAAJ;https://scholar.google.co.il/citations?user=FLkJI0EAAAAJ;qPlUgrgAAAAJ;E-bpPWgAAAAJ", "or_profile": "~Yahan_Yang1;~Elior_Sulem1;~Insup_Lee1;~Dan_Roth3", "aff": "School of Engineering and Applied Science, University of Pennsylvania;Ben-Gurion University of the Negev;University of Pennsylvania;Amazon", "aff_domain": "seas.upenn.edu;bgu.ac.il;upenn.edu;amazon.com", "position": "PhD student;Lecturer;Full Professor;VP and Distinguished Scientist", "bibtex": "@inproceedings{\nyang2023bootstrapping,\ntitle={Bootstrapping Small {\\textbackslash}\\& High Performance Language Models with Unmasking-Removal Training Policy},\nauthor={Yahan Yang and Elior Sulem and Insup Lee and Dan Roth},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=xKRg5dfWyv}\n}", "github": "", "project": "", "reviewers": "oY1d;rmr4;bCR8", "site": "https://openreview.net/forum?id=xKRg5dfWyv", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;3;3", "excitement": "3;3;2", "reproducibility": "4;3;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 2.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-9859-7313;0000-0003-2672-1132;", "linkedin": ";;;dan-roth-8667361/", "aff_unique_index": "0;1;0;2", "aff_unique_norm": "University of Pennsylvania;Ben-Gurion University of the Negev;Amazon", "aff_unique_dep": "School of Engineering and Applied Science;;Amazon.com, Inc.", "aff_unique_url": "https://www.upenn.edu;https://www.bgu.ac.il;https://www.amazon.com", "aff_unique_abbr": "UPenn;BGU;Amazon", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;Israel" }, { "id": "xL8SLt02mt", "title": "An Expression Tree Decoding Strategy for Mathematical Equation Generation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Generating mathematical equations from natural language requires an accurate understanding of the relations among math expressions. Existing approaches can be broadly categorized into token-level and expression-level generation. The former treats equations as a mathematical language, sequentially generating math tokens. Expression-level methods generate each expression one by one. However, each expression represents a solving step, and there naturally exist parallel or dependent relations between these steps, which are ignored by current sequential methods. Therefore, we integrate tree structure into the expression-level generation and advocate an expression tree decoding strategy. To generate a tree with expression as its node, we employ a layer-wise parallel decoding strategy: we decode multiple independent expressions (leaf nodes) in parallel at each layer and repeat parallel decoding layer by layer to sequentially generate these parent node expressions that depend on others. Besides, a bipartite matching algorithm is adopted to align multiple predictions with annotations for each layer. Experiments show our method outperforms other baselines, especially for these equations with complex structures.", "keywords": "Expression tree;Equation;Parallel Decoding;Math Word Problem", "primary_area": "", "supplementary_material": "", "author": "Wenqi Zhang;Yongliang Shen;Qingpeng Nong;Zeqi Tan;Yanna Ma;Weiming Lu", "authorids": "~Wenqi_Zhang2;~Yongliang_Shen1;~Qingpeng_Nong1;~Zeqi_Tan1;~Yanna_Ma1;~Weiming_Lu1", "gender": ";M;;M;F;", "homepage": ";;;;;", "dblp": ";221/5612-1.html;;200/9648.html;;", "google_scholar": ";UT3NzFAAAAAJ;;https://scholar.google.com/citations?hl=zh-CN;https://scholar.google.com/citations?hl=zh-CN;", "or_profile": "~Wenqi_Zhang2;~Yongliang_Shen1;~Qingpeng_Nong1;~Zeqi_Tan1;~Yanna_Ma1;~Weiming_Lu1", "aff": ";;;University of Hong Kong;Shanghai University of Science and Technology;", "aff_domain": ";;;hku.hk;usst.edu.cn;", "position": ";;;Intern;Lecturer;", "bibtex": "@inproceedings{\nzhang2023an,\ntitle={An Expression Tree Decoding Strategy for Mathematical Equation Generation},\nauthor={Wenqi Zhang and Yongliang Shen and Qingpeng Nong and Zeqi Tan and Yanna Ma and Weiming Lu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=xL8SLt02mt}\n}", "github": "", "project": "", "reviewers": "1r8v;Apu7;GxCv", "site": "https://openreview.net/forum?id=xL8SLt02mt", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;2", "excitement": "3;4;3", "reproducibility": "4;4;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;", "linkedin": ";;;;;", "aff_unique_index": "0;1", "aff_unique_norm": "University of Hong Kong;Shanghai University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.hku.hk;https://www.sustech.edu.cn", "aff_unique_abbr": "HKU;SUSTech", "aff_campus_unique_index": "0", "aff_campus_unique": "Hong Kong SAR;", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "xMudYCcBum", "title": "Using Interpretation Methods for Model Enhancement", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In the age of neural natural language processing, there are plenty of works trying to derive interpretations of neural models. \nIntuitively, when gold rationales exist during training, one can additionally train the model to match its interpretation with the rationales.\nHowever, this intuitive idea has not been fully explored. In this paper, we propose a framework of utilizing interpretation methods and gold rationales to enhance models. Our framework is very general in the sense that it can incorporate various interpretation methods. Previously proposed gradient-based methods can be shown as an instance of our framework. We also propose two novel instances utilizing two other types of interpretation methods, erasure/replace-based and extractor-based methods, for model enhancement. We conduct comprehensive experiments on a variety of tasks. Experimental results show that our framework is effective especially in low-resource settings in enhancing models with various interpretation methods, and our two newly-proposed methods outperform gradient-based methods in most settings. Code is available at https://github.com/Chord-Chen-30/UIMER.", "keywords": "interpretation methods;few-shot", "primary_area": "", "supplementary_material": "", "author": "Zhuo Chen;Chengyue Jiang;Kewei Tu", "authorids": "~Zhuo_Chen16;~Chengyue_Jiang1;~Kewei_Tu1", "gender": "M;M;M", "homepage": "https://chord-chen-30.github.io/;https://jeffchy.github.io;https://faculty.sist.shanghaitech.edu.cn/faculty/tukw/", "dblp": ";;22/918", "google_scholar": ";;5gi3Pm0AAAAJ", "or_profile": "~Zhuo_Chen16;~Chengyue_Jiang1;~Kewei_Tu1", "aff": "ShanghaiTech University;ShanghaiTech University;ShanghaiTech University", "aff_domain": "shanghaitech.edu.cn;shanghaitech.edu.cn;shanghaitech.edu.cn", "position": "MS student;PhD student;Associate Professor", "bibtex": "@inproceedings{\nchen2023using,\ntitle={Using Interpretation Methods for Model Enhancement},\nauthor={Zhuo Chen and Chengyue Jiang and Kewei Tu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=xMudYCcBum}\n}", "github": "", "project": "", "reviewers": "PcW8;fRry;xBwV", "site": "https://openreview.net/forum?id=xMudYCcBum", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;3", "excitement": "3;2;3", "reproducibility": "4;3;3", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";%E6%89%BF%E8%B6%8A-%E8%92%8B-1a9496142/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "ShanghaiTech University", "aff_unique_dep": "", "aff_unique_url": "https://www.shanghaitech.edu.cn", "aff_unique_abbr": "ShanghaiTech", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "xNzu8DivUj", "title": "Continually Improving Extractive QA via Human Feedback", "track": "main", "status": "Long Main", "tldr": "", "abstract": "We study continually improving an extractive question answering (QA) system via human user feedback. We design and deploy an iterative approach, where information-seeking users ask questions, receive model-predicted answers, and provide feedback. We conduct experiments involving thousands of user interactions under diverse setups to broaden the understanding of learning from feedback over time. Our experiments show effective improvement from user feedback of extractive QA models over time across different data regimes, including significant potential for domain adaptation.", "keywords": "QA;human feedback;bandit learning", "primary_area": "", "supplementary_material": "", "author": "Ge Gao;Hung-Ting Chen;Yoav Artzi;Eunsol Choi", "authorids": "~Ge_Gao1;~Hung-Ting_Chen1;~Yoav_Artzi1;~Eunsol_Choi1", "gender": "F;M;;", "homepage": "https://gao-g.github.io/;https://timchen0618.github.io/;;https://eunsol.github.io/", "dblp": ";;;116/2765", "google_scholar": "https://scholar.google.com/citations?hl=en;dApuTpsAAAAJ;;6wulN88AAAAJ", "or_profile": "~Ge_Gao1;~Hung-Ting_Chen1;~Yoav_Artzi1;~Eunsol_Choi1", "aff": "Cornell University;University of Texas, Austin;;University of Texas, Austin", "aff_domain": "cornell.edu;utexas.edu;;cs.utexas.edu", "position": "PhD student;MS student;;Assistant Professor", "bibtex": "@inproceedings{\ngao2023continually,\ntitle={Continually Improving Extractive {QA} via Human Feedback},\nauthor={Ge Gao and Hung-Ting Chen and Yoav Artzi and Eunsol Choi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=xNzu8DivUj}\n}", "github": "", "project": "", "reviewers": "Qenp;CpKn;4d65", "site": "https://openreview.net/forum?id=xNzu8DivUj", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0003-3607-9104", "linkedin": ";hungtingchen/;;", "aff_unique_index": "0;1;1", "aff_unique_norm": "Cornell University;University of Texas at Austin", "aff_unique_dep": ";", "aff_unique_url": "https://www.cornell.edu;https://www.utexas.edu", "aff_unique_abbr": "Cornell;UT Austin", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Austin", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "xOyBEJq0O8", "title": "GATITOS: Using a New Multilingual Lexicon for Low-resource Machine Translation", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Modern machine translation models and language models are able to translate without having been trained on parallel data, greatly expanding the set of languages that they can serve. However, these models still struggle in a variety of predictable ways, a problem that cannot be overcome without at least some trusted bilingual data. This work expands on a cheap and abundant resource to combat this problem: bilingual lexica. We test the efficacy of bilingual lexica in a real-world set-up, on 200-language translation models trained on web-crawled text. We present several findings: (1) using lexical data augmentation, we demonstrate sizable performance gains for unsupervised translation; (2) we compare several families of data augmentation, demonstrating that they yield similar improvements, and can be combined for even greater improvements; (3) we demonstrate the importance of carefully curated lexica over larger, noisier ones, especially with larger models; and (4) we compare the efficacy of multilingual lexicon data versus human-translated parallel data. Based on results from (3), we develop and open-source GATITOS, a high-quality, curated dataset in 168 tail languages, one of the first human-translated resources to cover many of these languages.", "keywords": "machine translation;low-resource;lexicons;dictionaries;unsupervised;NMT;MT;data augmentation", "primary_area": "", "supplementary_material": "", "author": "Alexander Jones;Isaac Rayburn Caswell;Orhan Firat;Ishank Saxena", "authorids": "~Alexander_Jones1;~Isaac_Rayburn_Caswell1;~Orhan_Firat1;~Ishank_Saxena1", "gender": "M;;M;M", "homepage": ";;;http://ishank.me/", "dblp": ";236/5919.html;120/2225;", "google_scholar": "rIO9rpQAAAAJ;myh9l2AAAAAJ;https://scholar.google.com.tr/citations?user=dLaR9lgAAAAJ;", "or_profile": "~Alexander_Jones1;~Isaac_Rayburn_Caswell1;~Orhan_Firat1;~Ishank_Saxena1", "aff": "Dartmouth College;Google;Google;Google", "aff_domain": "dartmouth.edu;google.com;google.com;google.com", "position": "Undergrad student;Researcher;Research Scientist;Researcher", "bibtex": "@inproceedings{\njones2023gatitos,\ntitle={{GATITOS}: Using a New Multilingual Lexicon for Low-resource Machine Translation},\nauthor={Alexander Jones and Isaac Rayburn Caswell and Orhan Firat and Ishank Saxena},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=xOyBEJq0O8}\n}", "github": "", "project": "", "reviewers": "TqkJ;GrpG;htr1;zAXT", "site": "https://openreview.net/forum?id=xOyBEJq0O8", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;4;3;4", "excitement": "4;2;4;3", "reproducibility": "2;2;0;2", "correctness": "3;3;4;3", "rating_avg": 4.0, "confidence_avg": 3.75, "excitement_avg": 3.25, "reproducibility_avg": 1.5, "correctness_avg": 3.25, "replies_avg": 13, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;", "linkedin": ";;;ishank-saxena/", "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Dartmouth College;Google", "aff_unique_dep": ";Google", "aff_unique_url": "https://www.dartmouth.edu;https://www.google.com", "aff_unique_abbr": "Dartmouth;Google", "aff_campus_unique_index": "1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "xQbFsx8usC", "title": "Temporal Knowledge Graph Reasoning Based on N-tuple Modeling", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Reasoning over Temporal Knowledge Graphs (TKGs) that predicts temporal facts (e.g., events) in the future is crucial for many applications. The temporal facts in existing TKGs only contain their core entities (i.e., the entities playing core roles therein) and formulate them as quadruples, i.e., (subject entity, predicate, object entity, timestamp). This formulation oversimplifies temporal facts and inevitably causes information loss. Therefore, we propose to describe a temporal fact more accurately as an n-tuple, containing not only its predicate and core entities, but also its auxiliary entities, as well as the roles of all entities. By so doing, TKGs are augmented to N-tuple Temporal Knowledge Graphs (N-TKGs). To conduct reasoning over N-TKGs, we further propose N-tuple Evolutional Network (NE-Net). It recurrently learns the evolutional representations of entities and predicates in temporal facts at different timestamps in the history via modeling the relations among those entities and predicates. Based on the learned representations, reasoning tasks at future timestamps can be realized via task-specific decoders. Experiment results on two newly built datasets demonstrate the superiority of N-TKG and the effectiveness of NE-Net.", "keywords": "Knowledge graph;n-ary temporal knowledge graph;graph convolution network", "primary_area": "", "supplementary_material": "", "author": "Zhongni Hou;Xiaolong Jin;Zixuan Li;Long Bai;Saiping Guan;Yutao Zeng;Jiafeng Guo;Xueqi Cheng", "authorids": "~Zhongni_Hou2;~Xiaolong_Jin1;~Zixuan_Li1;~Long_Bai1;~Saiping_Guan1;~Yutao_Zeng1;~Jiafeng_Guo1;~Xueqi_Cheng1", "gender": "M;M;M;F;M;M;M;F", "homepage": "http://www.bigdatalab.ac.cn/jxl/;https://lee-zix.github.io/;https://waltbai.github.io/;;https://scholar.google.com/citations?user=4seOzHgAAAAJ&hl=zh-CN;http://www.bigdatalab.ac.cn/gjf/;https://people.ucas.ac.cn/~cxq?language=en;", "dblp": "00/1728-1.html;205/0187.html;65/7795-2.html;205/7534;260/2125;02/146;44/912;260/2086.html", "google_scholar": "5TRLpyIAAAAJ;fibOdOkAAAAJ;Zrd9pCMAAAAJ;mS2QCewAAAAJ;4seOzHgAAAAJ;https://scholar.google.com/citations?view_op=list_works;hY8aLqAAAAAJ;", "or_profile": "~Xiaolong_Jin1;~Zixuan_Li1;~Long_Bai1;~Saiping_Guan1;~Yutao_Zeng1;~Jiafeng_Guo1;~Xueqi_Cheng1;~Zhongni_HOU1", "aff": "Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences;Tencent Inc.;Institute of Computing Technolgy, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy;,Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;ict.ac.cn;ict.ac.cn;ict.ac.cn;tencent.com;ict.ac.cn;ict.ac.cn;ict.ac.cn", "position": "Full Professor;Assistant Professor;PhD student;Assistant Professor;Researcher;Researcher;Full Professor;PhD student", "bibtex": "@inproceedings{\nhou2023temporal,\ntitle={Temporal Knowledge Graph Reasoning Based on N-tuple Modeling},\nauthor={Zhongni Hou and Xiaolong Jin and Zixuan Li and Long Bai and Saiping Guan and Yutao Zeng and Jiafeng Guo and Xueqi Cheng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=xQbFsx8usC}\n}", "github": "", "project": "", "reviewers": "ivcT;NCMC;vaso", "site": "https://openreview.net/forum?id=xQbFsx8usC", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;5", "excitement": "3;3;3", "reproducibility": "3;4;4", "correctness": "2;3;4", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;0000-0002-9051-2127;;;;", "linkedin": ";;;;;;;", "aff_unique_index": "0;0;0;0;1;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences;Tencent", "aff_unique_dep": "Institute of Computing Technology;Tencent", "aff_unique_url": "http://www.ict.ac.cn;https://www.tencent.com", "aff_unique_abbr": "CAS;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "xVMV2IYbWH", "title": "An Adaptive Prompt Generation Framework for Task-oriented Dialogue System", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "The de facto way of utilizing black-box large language models (LLMs) to perform various downstream tasks is prompting. However, obtaining suitable prompts for specific tasks is still a challenging problem. While existing LLM-based methods demonstrate promising performance in task-oriented dialogue (TOD) task, they often require manual adjustment in prompt selection, or focus solely on dialogue understanding or generation. To address these issues, we propose an adaptive prompt generation framework to fully unleash the potential of LLMs for the comprehensive TOD system. Firstly, we design a trainable slot generator (TSG) that can generate domain and slot information in the belief state, which serves as prior knowledge for subsequent prompt generation. Next, we propose an adaptive prompt generator (APG) that utilizes the prior knowledge to generate prompts for the LLM, deriving the belief state and system response of the dialogue for evaluation. Finally, we evaluate our framework on the MultiWOZ 2.0 dataset. Extensive experiments demonstrate that our method outperforms existing methods. Our code and data will be released.", "keywords": "adaptive prompt;LLM;task-oriented dialogue;black-box;prompt learning", "primary_area": "", "supplementary_material": "", "author": "Jun Gao;Liuyu Xiang;Huijia Wu;Han Zhao;Yiqi Tong;Zhaofeng He", "authorids": "~Jun_Gao7;~Liuyu_Xiang1;~Huijia_Wu1;~Han_Zhao4;~Yiqi_Tong1;~Zhaofeng_He1", "gender": "M;;M;;M;M", "homepage": "https://github.com/gaogaocn;;;;;https://teacher.bupt.edu.cn/zhaofenghe/zh_CN/index.htm", "dblp": ";;188/6224;;280/0464.html;13/3992", "google_scholar": ";;;;Ch8bBhIAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN", "or_profile": "~Jun_Gao7;~Liuyu_Xiang1;~Huijia_Wu1;~Han_Zhao4;~Yiqi_Tong1;~Zhaofeng_He1", "aff": "Beijing University of Posts and Telecommunications;;Beijing University of Posts and Telecommunications;;Beihang University;Beijing University of Post and Telecommunication", "aff_domain": "bupt.edu.cn;;bupt.edu.cn;;buaa.edu.cn;bupt.edu.cn", "position": "PhD student;;Postdoc;;PhD student;Full Professor", "bibtex": "@inproceedings{\ngao2023an,\ntitle={An Adaptive Prompt Generation Framework for Task-oriented Dialogue System},\nauthor={Jun Gao and Liuyu Xiang and Huijia Wu and Han Zhao and Yiqi Tong and Zhaofeng He},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=xVMV2IYbWH}\n}", "github": "", "project": "", "reviewers": "RXui;E3rq;uoyj", "site": "https://openreview.net/forum?id=xVMV2IYbWH", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;4", "excitement": "2;3;3", "reproducibility": "2;4;3", "correctness": "2;3;2", "rating_avg": 3.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 2.3333333333333335, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-6671-9208;0000-0002-3433-8435", "linkedin": ";;;;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Beijing University of Posts and Telecommunications;Beihang University", "aff_unique_dep": ";", "aff_unique_url": "http://www.bupt.edu.cn/;http://www.buaa.edu.cn/", "aff_unique_abbr": "BUPT;BUAA", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Beijing;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "xX2KjzdFPH", "title": "Improving Image Captioning via Predicting Structured Concepts", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Having the difficulty of solving the semantic gap between images and texts for the image captioning task, conventional studies in this area paid some attention to treating semantic concepts as a bridge between the two modalities and improved captioning performance accordingly. Although promising results on concept prediction were obtained, the aforementioned studies normally ignore the relationship among concepts, which relies on not only objects in the image, but also word dependencies in the text, so that offers a considerable potential for improving the process of generating good descriptions. In this paper, we propose a structured concept predictor (SCP) to predict concepts and their structures, then we integrate them into captioning, so that enhance the contribution of visual signals in this task via concepts and further use their relations to distinguish cross-modal semantics for better description generation. Particularly, we design weighted graph convolutional networks (W-GCN) to depict concept relations driven by word dependencies, and then learns differentiated contributions from these concepts for following decoding process. Therefore, our approach captures potential relations among concepts and discriminatively learns different concepts, so that effectively facilitates image captioning with inherited information across modalities. Extensive experiments and their results demonstrate the effectiveness of our approach as well as each proposed module in this work.", "keywords": "Image captioning;GCN", "primary_area": "", "supplementary_material": "", "author": "Ting Wang;Weidong Chen;Yuanhe Tian;Yan Song;Zhendong Mao", "authorids": "~Ting_Wang11;~Weidong_Chen1;~Yuanhe_Tian1;~Yan_Song1;~Zhendong_Mao1", "gender": "Not Specified;M;;;", "homepage": "https://github.com/wangting0;;;https://clksong.github.io;", "dblp": ";;https://dblp.uni-trier.de/pid/246/0133;09/1398;", "google_scholar": ";Z-vKGdoAAAAJ;5GCwWZ8AAAAJ;;", "or_profile": "~Ting_Wang11;~Weidong_Chen1;~Yuanhe_Tian1;~Yan_Song1;~Zhendong_Mao1", "aff": "University of Science and Technology of China;University of Science and Technology of China;University of Washington, Seattle;University of Science and Technology of China;", "aff_domain": "ustc.edu.cn;ustc.edu.cn;uw.edu;ustc.edu.cn;", "position": "Undergrad student;Postdoc;PhD student;Full Professor;", "bibtex": "@inproceedings{\nwang2023improving,\ntitle={Improving Image Captioning via Predicting Structured Concepts},\nauthor={Ting Wang and Weidong Chen and Yuanhe Tian and Yan Song and Zhendong Mao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=xX2KjzdFPH}\n}", "github": "", "project": "", "reviewers": "y9d1;jqw3;hF5e;uJ4d", "site": "https://openreview.net/forum?id=xX2KjzdFPH", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "3;4;5;5", "excitement": "3;3;3;4", "reproducibility": "4;3;4;5", "correctness": "3;3;3;4", "rating_avg": 4.0, "confidence_avg": 4.25, "excitement_avg": 3.25, "reproducibility_avg": 4.0, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-2774-2875;;;", "linkedin": ";;;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "University of Science and Technology of China;University of Washington", "aff_unique_dep": ";", "aff_unique_url": "http://www.ustc.edu.cn;https://www.washington.edu", "aff_unique_abbr": "USTC;UW", "aff_campus_unique_index": "1", "aff_campus_unique": ";Seattle", "aff_country_unique_index": "0;0;1;0", "aff_country_unique": "China;United States" }, { "id": "xapBkUt0yf", "title": "CompoundPiece: Evaluating and Improving Decompounding Performance of Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "While many languages possess processes of joining two or more words to create compound words, previous studies have been typically limited only to languages with excessively productive compound formation (e.g., German, Dutch) and there is no public dataset containing compound and non-compound words across a large number of languages. In this work, we systematically study decompounding, the task of splitting compound words into their constituents, at a wide scale. We first address the data gap by introducing a dataset of 255k compound and non-compound words across 56 diverse languages obtained from Wiktionary. We then use this dataset to evaluate an array of Large Language Models (LLMs) on the decompounding task. We find that LLMs perform poorly, especially on words which are tokenized unfavorably by subword tokenization. We thus introduce a novel methodology to train dedicated models for decompounding. The proposed two-stage procedure relies on a fully self-supervised objective in the first stage, while the second, supervised learning stage optionally fine-tunes the model on the annotated Wiktionary data. Our self-supervised models outperform the prior best unsupervised decompounding models by 13.9% accuracy on average. Our fine-tuned models outperform all prior (language-specific) decompounding tools. Furthermore, we use our models to leverage decompounding during the creation of a subword tokenizer, which we refer to as CompoundPiece. CompoundPiece tokenizes compound words more favorably on average, leading to improved performance on decompounding over an otherwise equivalent model using SentencePiece tokenization.", "keywords": "segmentation;multilinguality;tokenization;compound;compounds", "primary_area": "", "supplementary_material": "", "author": "Benjamin Minixhofer;Jonas Pfeiffer;Ivan Vuli\u0107", "authorids": "~Benjamin_Minixhofer1;~Jonas_Pfeiffer1;~Ivan_Vuli\u01071", "gender": "M;M;M", "homepage": "https://github.com/bminixhofer;https://pfeiffer.ai;https://sites.google.com/site/ivanvulic/", "dblp": "292/4068;222/9866.html;77/9768", "google_scholar": "P5Z2Pj0AAAAJ;https://scholar.google.com/citations?hl=en;ZX8js60AAAAJ", "or_profile": "~Benjamin_Minixhofer1;~Jonas_Pfeiffer1;~Ivan_Vuli\u01071", "aff": "Johannes Kepler University Linz;Google DeepMind;PolyAI Limited", "aff_domain": "jku.at;google.com;poly-ai.com", "position": "Undergrad student;Researcher;Senior Scientist", "bibtex": "@inproceedings{\nminixhofer2023compoundpiece,\ntitle={CompoundPiece: Evaluating and Improving Decompounding Performance of Language Models},\nauthor={Benjamin Minixhofer and Jonas Pfeiffer and Ivan Vuli{\\'c}},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=xapBkUt0yf}\n}", "github": "", "project": "", "reviewers": "8bjC;WAo7;X8iv", "site": "https://openreview.net/forum?id=xapBkUt0yf", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;4;3", "reproducibility": "3;4;3", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";jonas-pfeiffer/;ivan-vuli%C4%87-286b4a81/", "aff_unique_index": "0;1;2", "aff_unique_norm": "Johannes Kepler University;Google;PolyAI Limited", "aff_unique_dep": ";Google DeepMind;", "aff_unique_url": "https://www.jku.at;https://deepmind.com;https://www.poly.ai", "aff_unique_abbr": "JKU;DeepMind;PolyAI", "aff_campus_unique_index": "0", "aff_campus_unique": "Linz;", "aff_country_unique_index": "0;1;1", "aff_country_unique": "Austria;United Kingdom" }, { "id": "xarWXEhhdy", "title": "Self-supervised Meta-Prompt Learning with Meta-Gradient Regularization for Few-shot Generalization", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Prompt tuning is a parameter-efficient method, which learns soft prompts and conditions frozen language models to perform specific downstream tasks. Though effective, prompt tuning under few-shot settings on the one hand heavily relies on a good initialization of soft prompts. On the other hand, it can easily overfit to few-shot training samples, thereby undermining generalizability. Existing works leverage pre-training or supervised meta-learning to initialize soft prompts but they fail to data-efficiently generalize to unseen downstream tasks. To address the above problems, this paper proposes a novel Self-sUpervised meta-Prompt learning framework with MEta-gradient Regularization for few-shot generalization (SUPMER). SUPMER leverages self-supervised meta-learning with a diverse set of well-designed meta-tasks to learn a universal prompt initialization for efficient adaptation using only unlabeled data. Additionally, it jointly meta-learns a gradient regularization function to transform raw gradients into a domain-generalizable direction, thus alleviating the problem of overfitting. Extensive experiments show that SUPMER achieves better performance for different few-shot downstream tasks, and also exhibits a stronger domain generalization ability. The code for SUPMER will be available at https://github.com/beepkh/SUPMER.", "keywords": "prompt tuning;self-supervised meta-learning;meta-gradient regularization", "primary_area": "", "supplementary_material": "", "author": "Kaihang Pan;Juncheng Li;Hongye SONG;Jun Lin;Xiaozhong Liu;Siliang Tang", "authorids": "~Kaihang_Pan1;~Juncheng_Li3;~Hongye_SONG1;~Jun_Lin2;~Xiaozhong_Liu2;~Siliang_Tang1", "gender": "M;M;F;M;M;M", "homepage": "https://github.com/1308024507pkh;;https://cn.linkedin.com/in/%E7%BA%A2%E5%8F%B6-%E5%AE%8B-804832198;https://scholar.google.com/citations?user=DvAsN5QAAAAJ&hl=zh-CN;https://www.wpi.edu/people/faculty/xliu14;https://person.zju.edu.cn/en/siliang", "dblp": "344/0647.html;182/7674-6;;;11/6389.html;44/5693", "google_scholar": "https://scholar.google.com.hk/citations?user=lMQADDUAAAAJ;lm9s-QgAAAAJ;;DvAsN5QAAAAJ;1BUByMcAAAAJ;8e7H3PcAAAAJ", "or_profile": "~Kaihang_Pan1;~Juncheng_Li3;~Hongye_SONG1;~Jun_Lin2;~Xiaozhong_Liu2;~Siliang_Tang1", "aff": "Zhejiang University;Zhejiang University;;Alibaba Group;Worcester Polytechnic Institute;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;;alibaba-inc.com;wpi.edu;zju.edu.cn", "position": "PhD student;PhD student;;Researcher;Associate Professor;Full Professor", "bibtex": "@inproceedings{\npan2023selfsupervised,\ntitle={Self-supervised Meta-Prompt Learning with Meta-Gradient Regularization for Few-shot Generalization},\nauthor={Kaihang Pan and Juncheng Li and Hongye SONG and Jun Lin and Xiaozhong Liu and Siliang Tang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=xarWXEhhdy}\n}", "github": "", "project": "", "reviewers": "gNne;XEMq;33e1;a2Py", "site": "https://openreview.net/forum?id=xarWXEhhdy", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;2;3;3", "excitement": "4;4;4;3", "reproducibility": "4;3;4;3", "correctness": "4;3;4;4", "rating_avg": 4.0, "confidence_avg": 3.0, "excitement_avg": 3.75, "reproducibility_avg": 3.5, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-2258-1291;;;;0000-0002-7356-9711", "linkedin": ";;;;;siliang-tang-4734272a/", "aff_unique_index": "0;0;1;2;0", "aff_unique_norm": "Zhejiang University;Alibaba Group;Worcester Polytechnic Institute", "aff_unique_dep": ";;", "aff_unique_url": "https://www.zju.edu.cn;https://www.alibaba.com;https://www.wpi.edu", "aff_unique_abbr": "ZJU;Alibaba;WPI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "China;United States" }, { "id": "xbnNgqGefc", "title": "Discourse Structures Guided Fine-grained Propaganda Identification", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Propaganda is a form of deceptive narratives that instigate or mislead the public, usually with a political purpose. In this paper, we aim to identify propaganda in political news at two fine-grained levels: sentence-level and token-level. We observe that propaganda content is more likely to be embedded in sentences that attribute causality or assert contrast to nearby sentences, as well as seen in opinionated evaluation, speculation and discussions of future expectation. Hence, we propose to incorporate both local and global discourse structures for propaganda discovery and construct two teacher models for identifying PDTB-style discourse relations between nearby sentences and common discourse roles of sentences in a news article respectively. We further devise two methods to incorporate the two types of discourse structures for propaganda identification by either using teacher predicted probabilities as additional features or soliciting guidance in a knowledge distillation framework. Experiments on the benchmark dataset demonstrate that leveraging guidance from discourse structures can significantly improve both precision and recall of propaganda content identification.", "keywords": "misinformation;propaganda;discourse structure", "primary_area": "", "supplementary_material": "", "author": "Yuanyuan Lei;Ruihong Huang", "authorids": "~Yuanyuan_Lei1;~Ruihong_Huang1", "gender": ";F", "homepage": ";https://people.engr.tamu.edu/huangrh/index.html", "dblp": ";42/4811.html", "google_scholar": ";https://scholar.google.com.tw/citations?user=NU2aHWUAAAAJ", "or_profile": "~Yuanyuan_Lei1;~Ruihong_Huang1", "aff": ";Texas A&M University", "aff_domain": ";cse.tamu.edu", "position": ";Associate Professor", "bibtex": "@inproceedings{\nlei2023discourse,\ntitle={Discourse Structures Guided Fine-grained Propaganda Identification},\nauthor={Yuanyuan Lei and Ruihong Huang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=xbnNgqGefc}\n}", "github": "", "project": "", "reviewers": "qWvx;xm9p;sTTb", "site": "https://openreview.net/forum?id=xbnNgqGefc", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;5;3", "excitement": "4;3;4", "reproducibility": "3;2;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0", "aff_unique_norm": "Texas A&M University", "aff_unique_dep": "", "aff_unique_url": "https://www.tamu.edu", "aff_unique_abbr": "TAMU", "aff_country_unique_index": "0", "aff_country_unique": "United States" }, { "id": "xeecFHJ4d4", "title": "IRFL: Image Recognition of Figurative Language", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Figures of speech such as metaphors, similes, and idioms are integral parts of human communication. They are ubiquitous in many forms of discourse, allowing people to convey complex, abstract ideas and evoke emotion. As figurative forms are often conveyed through multiple modalities (e.g., both text and images), understanding multimodal figurative language is an important AI challenge, weaving together profound vision, language, commonsense and cultural knowledge. \n\nIn this work, we develop the Image Recognition of Figurative Language (IRFL) dataset. We leverage human annotation and an automatic pipeline we created to generate a multimodal dataset, and introduce two novel tasks as a benchmark for multimodal figurative language understanding. We experimented with state-of-the-art vision and language models and found that the best (22%) performed substantially worse than humans (97%). We release our dataset, benchmark, and code in hopes of driving the development of models that can better understand figurative language.", "keywords": "Figurative Language;Multimodal Figurative Language;Resources", "primary_area": "", "supplementary_material": "", "author": "Ron Yosef;Yonatan Bitton;Dafna Shahaf", "authorids": "~Ron_Yosef1;~Yonatan_Bitton1;~Dafna_Shahaf1", "gender": "M;M;F", "homepage": ";https://yonatanbitton.github.io/;http://hyadatalab.com/", "dblp": "283/5799;277/7042;02/2672.html", "google_scholar": "https://scholar.google.com/citations?hl=en;P9Fpf4sAAAAJ;https://scholar.google.com.tw/citations?user=AgyW_90AAAAJ", "or_profile": "~Ron_Yosef1;~Yonatan_Bitton1;~Dafna_Shahaf1", "aff": ", Hebrew University of Jerusalem;Hebrew University of Jerusalem;Hebrew University of Jerusalem", "aff_domain": "mail.huji.ac.il;huji.ac.il;huji.ac.il", "position": "MS student;PhD student;Full Professor", "bibtex": "@inproceedings{\nyosef2023irfl,\ntitle={{IRFL}: Image Recognition of Figurative Language},\nauthor={Ron Yosef and Yonatan Bitton and Dafna Shahaf},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=xeecFHJ4d4}\n}", "github": "", "project": "", "reviewers": "W39D;Sk6i;nddL", "site": "https://openreview.net/forum?id=xeecFHJ4d4", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;4", "excitement": "4;2;4", "reproducibility": "5;5;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.666666666666667, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-3261-0818", "linkedin": "ron-yosef-85a88b170/;yonatanbitton/;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Hebrew University of Jerusalem", "aff_unique_dep": "", "aff_unique_url": "https://www.huji.ac.il", "aff_unique_abbr": "HUJI", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Jerusalem", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Israel" }, { "id": "xfTQmGPPtQ", "title": "Parameter-efficient Tuning for Large Language Model without Calculating Its Gradients", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Fine-tuning all parameters of large language models (LLMs) requires significant computational resources and is time-consuming. Recent parameter-efficient tuning methods such as Adapter tuning, Prefix tuning, and LoRA allow for updating a small subset of parameters in large language models. However, they can only save approximately 30\\% of the training memory requirements, due to the problem that gradient computation and backpropagation are still necessary for these methods. This paper proposes a novel parameter-efficient tuning method for LLMs without calculating their gradients. Leveraging the discernible similarities between the parameter-efficient modules of the same task learned by both large and small language models, we put forward a strategy for transferring the parameter-efficient modules, originally derived from small language models to much larger ones. To ensure a smooth and effective adaptation process, we further introduce a Bridge model to guarantee dimensional consistency while also stimulating a dynamic interaction between the models. We demonstrate the effectiveness of our method using the T5 and GPT-2 series of language models on the SuperGLUE benchmark. Our method achieves comparable performance to both fine-tuning and parameter-efficient tuning on large language models without needing gradient-based optimization. Additionally, our method achieves up to 5.7x memory reduction compared to parameter-efficient tuning.", "keywords": "Parameter-efficient Tuning;Large Language Model;Gradient-free", "primary_area": "", "supplementary_material": "", "author": "Feihu Jin;Jiajun Zhang;Chengqing Zong", "authorids": "~Feihu_Jin1;~Jiajun_Zhang1;~Chengqing_Zong1", "gender": "M;M;M", "homepage": "https://github.com/jinfeihu-stan;http://www.nlpr.ia.ac.cn/cip/jjzhang.htm;http://www.nlpr.ia.ac.cn/cip/english/zong.htm", "dblp": ";71/6950-1.html;38/6093", "google_scholar": ";93zngeYAAAAJ;l8lvKOQAAAAJ", "or_profile": "~Feihu_Jin1;~Jiajun_Zhang1;~Chengqing_Zong1", "aff": "University of Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences;Institute of Automation, Chinese Academy of Sciences", "aff_domain": "ucas.ac.cn;ia.ac.cn;ia.ac.cn", "position": "MS student;Full Professor;Researcher", "bibtex": "@inproceedings{\njin2023parameterefficient,\ntitle={Parameter-efficient Tuning for Large Language Model without Calculating Its Gradients},\nauthor={Feihu Jin and Jiajun Zhang and Chengqing Zong},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=xfTQmGPPtQ}\n}", "github": "", "project": "", "reviewers": "ZcTW;wTP7;djcv", "site": "https://openreview.net/forum?id=xfTQmGPPtQ", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;2;4", "excitement": "3;4;2", "reproducibility": "4;4;3", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": ";;", "aff_unique_index": "0;1;1", "aff_unique_norm": "University of Chinese Academy of Sciences;Chinese Academy of Sciences", "aff_unique_dep": ";Institute of Automation", "aff_unique_url": "http://www.ucas.ac.cn;http://www.ia.cas.cn", "aff_unique_abbr": "UCAS;CAS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "xhqICRykZk", "title": "Text Augmented Spatial Aware Zero-shot Referring Image Segmentation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "In this paper, we study a challenging task of zero-shot referring image segmentation. This task aims to identify the instance mask that is most related to a referring expression \\textbf{without} training on pixel-level annotations. Previous research takes advantage of pre-trained cross-modal models, e.g., CLIP, to align instance-level masks with referring expressions. Yet, CLIP only considers the global-level alignment of image-text pairs, neglecting fine-grained matching between the referring sentence and local image regions. To address this challenge, we introduce a Text Augmented Spatial-aware (TAS) zero-shot referring image segmentation framework that is training-free and robust to various visual encoders. TAS incorporates a mask proposal network for instance-level mask extraction, a text-augmented visual-text matching score for mining the image-text correlation, and a spatial rectifier for mask post-processing. Notably, the text-augmented visual-text matching score leverages a $P$-score and an $N$-score in addition to the typical visual-text matching score. The $P$-score is utilized to close the visual-text domain gap through a surrogate captioning model, where the score is computed between the surrogate model-generated texts and the referring expression. The $N$-score considers the fine-grained alignment of region-text pairs via negative phrase mining, encouraging the masked image to be repelled from the mined distracting phrases. Extensive experiments are conducted on various datasets, including RefCOCO, RefCOCO+, and RefCOCOg. The proposed method clearly outperforms state-of-the-art zero-shot referring image segmentation methods.", "keywords": "Zero-shot Referring Image Segmentation;Multi-modal Learning;Visual-text Matching", "primary_area": "", "supplementary_material": "", "author": "Yucheng Suo;Linchao Zhu;Yi Yang", "authorids": "~Yucheng_Suo1;~Linchao_Zhu1;~Yi_Yang4", "gender": "M;M;M", "homepage": ";http://ffmpbgrnn.github.io/;http://reler.net/", "dblp": "324/2048;172/1383.html;", "google_scholar": "zk-7BskAAAAJ;9ZukE28AAAAJ;https://scholar.google.com.au/citations?user=RMSuNFwAAAAJ", "or_profile": "~Yucheng_Suo1;~Linchao_Zhu1;~Yi_Yang4", "aff": "Zhejiang University;Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn;zju.edu.cn", "position": "PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nsuo2023text,\ntitle={Text Augmented Spatial Aware Zero-shot Referring Image Segmentation},\nauthor={Yucheng Suo and Linchao Zhu and Yi Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=xhqICRykZk}\n}", "github": "", "project": "", "reviewers": "xQwB;deYa;VMqU", "site": "https://openreview.net/forum?id=xhqICRykZk", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;2", "excitement": "2;4;3", "reproducibility": "4;4;3", "correctness": "3;4;4", "rating_avg": 3.0, "confidence_avg": 3.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-5358-6410;;", "linkedin": "yucheng-suo-288985192/;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "xlg5jVmPSg", "title": "Towards A Holistic Landscape of Situated Theory of Mind in Large Language Models", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large Language Models (LLMs) have generated considerable interest and debate regarding their potential emergence of Theory of Mind (ToM). Several recent inquiries reveal a lack of robust ToM in these models and pose a pressing demand to develop new benchmarks, as current ones primarily focus on different aspects of ToM and are prone to shortcuts and data leakage. In this position paper, we seek to answer two road-blocking questions: (1) How can we taxonomize a holistic landscape of machine ToM? (2) What is a more effective evaluation protocol for machine ToM? Following psychological studies, we taxonomize machine ToM into 7 mental state categories and delineate existing benchmarks to identify under-explored aspects of ToM. We argue for a holistic and situated evaluation of ToM to break ToM into individual components and treat LLMs as an agent who is physically situated in environments and socially situated in interactions with humans. Such situated evaluation provides a more comprehensive assessment of mental states and potentially mitigates the risk of shortcuts and data leakage. We further present a pilot study in a grid world setup as a proof of concept. We hope this position paper can facilitate future research to integrate ToM with LLMs and offer an intuitive means for researchers to better position their work in the landscape of ToM.", "keywords": "theory of mind;large language models;mental states", "primary_area": "", "supplementary_material": "", "author": "Ziqiao Ma;Jacob Sansom;Run Peng;Joyce Chai", "authorids": "~Ziqiao_Ma1;~Jacob_Sansom1;~Run_Peng1;~Joyce_Chai2", "gender": "Not Specified;M;M;F", "homepage": "http://mars-tin.github.io/;https://roihn.github.io/;https://jhsansom.github.io/;https://web.eecs.umich.edu/~chaijy/", "dblp": "287/7595-1.html;354/3815;;c/JoyceYChai", "google_scholar": "WbybssYAAAAJ;dqTJFVcAAAAJ;bgQmqdsAAAAJ;", "or_profile": "~Ziqiao_Ma1;~Run_Peng1;~Jacob_Hoke_Sansom1;~Joyce_Y_Chai1", "aff": "Amazon Science;University of Michigan - Ann Arbor;University of Michigan - Ann Arbor;University of Michigan", "aff_domain": "amazon.com;umich.edu;umich.edu;umich.edu", "position": "Research Intern;MS student;MS student;Full Professor", "bibtex": "@inproceedings{\nma2023towards,\ntitle={Towards A Holistic Landscape of Situated Theory of Mind in Large Language Models},\nauthor={Ziqiao Ma and Jacob Sansom and Run Peng and Joyce Chai},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=xlg5jVmPSg}\n}", "github": "", "project": "", "reviewers": "92YE;HssE;myQj", "site": "https://openreview.net/forum?id=xlg5jVmPSg", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;4", "excitement": "4;4;3", "reproducibility": "3;4;2", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-0760-4638;;;0000-0002-9658-2230", "linkedin": ";;jhsansom/;", "aff_unique_index": "0;1;1;1", "aff_unique_norm": "Amazon;University of Michigan", "aff_unique_dep": "Amazon Science;", "aff_unique_url": "https://www.amazon.science;https://www.umich.edu", "aff_unique_abbr": "Amazon Science;UM", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Ann Arbor", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "xn8NKZosDV", "title": "Event Ontology Completion with Hierarchical Structure Evolution Networks", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Traditional event detection methods require predefined event schemas. However, manually defining event schemas is expensive and the coverage of schemas is limited. To this end, some works study the event type induction (ETI) task, which discovers new event types via clustering. However, the setting of ETI suffers from two limitations: event types are not linked into the existing hierarchy and have no semantic names. In this paper, we propose a new research task named Event Ontology Completion (EOC), which aims to simultaneously achieve event clustering, hierarchy expansion and type naming. Furthermore, we develop a Hierarchical Structure Evolution Network (HalTon) for this new task. Specifically, we first devise a Neighborhood Contrastive Clustering module to cluster unlabeled event instances. Then, we propose a Hierarchy-Aware Linking module to incorporate the hierarchical information for event expansion. Finally, we generate meaningful names for new types via an In-Context Learning-based Naming module. Extensive experiments indicate that our method achieves the best performance, outperforming the baselines by 8.23%, 8.79% and 8.10% of ARI score on three datasets.", "keywords": "Event Ontology Completion;Event Type Induction;Hierarchy Expansion;Type Naming", "primary_area": "", "supplementary_material": "", "author": "Pengfei Cao;Yupu Hao;Yubo Chen;Kang Liu;Jiexin Xu;Huaijun Li;Xiaojian Jiang;Jun Zhao", "authorids": "~Pengfei_Cao1;~Yupu_Hao1;~Yubo_Chen1;~Kang_Liu1;~Jiexin_Xu1;~Huaijun_Li1;~Xiaojian_Jiang1;~Jun_Zhao4", "gender": ";M;M;M;F;M;M;M", "homepage": "https://cpf-nlpr.github.io/;;http://www.nlpr.ia.ac.cn/cip/yubochen/index.html;http://www.nlpr.ia.ac.cn/cip/~liukang/index.html;;;;http://nlpr-web.ia.ac.cn/cip/english/~junzhao/index.html", "dblp": "182/7941;;https://dblp.uni-trier.de/pid/90/7879.html;42/4903.html;270/0739;;72/7071;https://dblp.uni-trier.de/pid/47/2026-1.html", "google_scholar": "lP5_LJIAAAAJ;G8j_yVkAAAAJ;https://scholar.google.com.hk/citations?user=9z7GPxIAAAAJ;DtZCfl0AAAAJ;;;https://scholar.google.com.hk/citations?user=s_ih2cYAAAAJ;https://scholar.google.com.hk/citations?user=HljRttwAAAAJ", "or_profile": "~Pengfei_Cao1;~Yupu_Hao1;~Yubo_Chen1;~Kang_Liu1;~Jiexin_Xu1;~Huaijun_Li1;~Xiaojian_Jiang1;~Jun_Zhao4", "aff": "Institute of Automation, Chinese Academy of Sciences;Beijing Institute of Technology;Institute of automation, Chinese academy of science;Institute of Automation, Chinese Academy of Sciences;;;;Institute of automation, Chinese academy of science", "aff_domain": "ia.ac.cn;bit.edu.cn;nlpr.ia.ac.cn;ia.ac.cn;;;;nlpr.ia.ac.cn", "position": "PhD student;Undergrad student;Associate Professor;Professor;;;;Full Professor", "bibtex": "@inproceedings{\ncao2023event,\ntitle={Event Ontology Completion with Hierarchical Structure Evolution Networks},\nauthor={Pengfei Cao and Yupu Hao and Yubo Chen and Kang Liu and Jiexin Xu and Huaijun Li and Xiaojian Jiang and Jun Zhao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=xn8NKZosDV}\n}", "github": "", "project": "", "reviewers": "D6mY;ktxz;GXuj", "site": "https://openreview.net/forum?id=xn8NKZosDV", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;2;2", "excitement": "4;4;4", "reproducibility": "4;4;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;", "linkedin": ";%E7%85%9C%E6%9C%B4-%E9%83%9D-18a25327b/;;;;%E6%80%80%E4%BF%8A-%E6%9D%8E-67240a27b/;;", "aff_unique_index": "0;1;0;0;0", "aff_unique_norm": "Chinese Academy of Sciences;Beijing Institute of Technology", "aff_unique_dep": "Institute of Automation;", "aff_unique_url": "http://www.ia.cas.cn;http://www.bit.edu.cn/", "aff_unique_abbr": "CAS;BIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "xozJw0kZXF", "title": "Evaluating Object Hallucination in Large Vision-Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Inspired by the superior language abilities of large language models (LLM), large vision-language models (LVLM) have been recently proposed by integrating powerful LLMs for improving the performance on complex multimodal tasks. Despite the promising progress on LVLMs, we find that they suffer from object hallucinations, i.e., they tend to generate objects inconsistent with the target images in the descriptions. To investigate it, this work presents the first systematic study on object hallucination of LVLMs. We conduct the evaluation experiments on several representative LVLMs, and show that they mostly suffer from severe object hallucination issues. We further discuss that the visual instructions may influence the hallucination, and find that: objects that frequently appear in the visual instructions or co-occur with the image objects are obviously prone to be hallucinated by LVLMs. Besides, we further design a polling-based query method called POPE for better evaluation of object hallucination. Experiment results show that our POPE can evaluate object hallucination in a more stable and flexible way.", "keywords": "Large Vision-Language Model;Object Hallucination", "primary_area": "", "supplementary_material": "", "author": "Yifan Li;Yifan Du;Kun Zhou;Jinpeng Wang;Xin Zhao;Ji-Rong Wen", "authorids": "~Yifan_Li7;~Yifan_Du1;~Kun_Zhou2;~Jinpeng_Wang1;~Xin_Zhao10;~Ji-Rong_Wen1", "gender": "M;M;M;;M;M", "homepage": ";https://richar-du.github.io/;https://lancelot39.github.io/;;https://gsai.ruc.edu.cn/addons/teacher/index/info.html?user_id=5&ruccode=20140041&ln=cn;https://gsai.ruc.edu.cn/english/jrwen", "dblp": "43/5611-9;221/5969-2;48/3927-2.html;;https://dblp.uni-trier.de/pid/52/8700.html;w/JRWen", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;YJf-45cAAAAJ;bmRJVjwAAAAJ;;JNhNacoAAAAJ;tbxCHJgAAAAJ", "or_profile": "~Yifan_Li7;~Yifan_Du1;~Kun_Zhou2;~Jinpeng_Wang1;~Xin_Zhao10;~Ji-Rong_Wen1", "aff": "Beijing Institute of Technology;Renmin University of China;Renmin University of China;;Renmin University of China;Renmin University of China", "aff_domain": "bit.edu.cn;ruc.edu.cn;ruc.edu.cn;;ruc.edu.cn;ruc.edu.cn", "position": "Undergrad student;PhD student;PhD student;;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2023evaluating,\ntitle={Evaluating Object Hallucination in Large Vision-Language Models},\nauthor={Yifan Li and Yifan Du and Kun Zhou and Jinpeng Wang and Xin Zhao and Ji-Rong Wen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=xozJw0kZXF}\n}", "github": "", "project": "", "reviewers": "gyaq;Dmv1;tq5W", "site": "https://openreview.net/forum?id=xozJw0kZXF", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;5", "excitement": "4;4;3", "reproducibility": "3;4;4", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;0000-0002-8333-6196;0000-0002-9777-9676", "linkedin": ";;;;;", "aff_unique_index": "0;1;1;1;1", "aff_unique_norm": "Beijing Institute of Technology;Renmin University of China", "aff_unique_dep": ";", "aff_unique_url": "http://www.bit.edu.cn/;http://www.ruc.edu.cn", "aff_unique_abbr": "BIT;RUC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "xp4wEivhM9", "title": "Is a Prestigious Job the same as a Prestigious Country? A Case Study on Multilingual Sentence Embeddings and European Countries", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "We study how multilingual sentence representations capture European countries and occupations and how this differs across European languages. We prompt the models with templated sentences that we machine-translate into 12 European languages and analyze the most prominent dimensions in the embeddings. Our analysis reveals that the most prominent feature in the embedding is the political distinction between Eastern and Western Europe and the country's economic strength in terms of GDP. When prompted specifically for job prestige, the embedding space clearly distinguishes high and low-prestige jobs. The occupational dimension is uncorrelated with the most dominant country dimensions in three out of four studied models. The exception is a small distilled model that exhibits a connection between occupational prestige and country of origin, which is a potential source of nationality-based discrimination. Our findings are consistent across languages.", "keywords": "multilngual language models;nationality bias;sentence representation", "primary_area": "", "supplementary_material": "", "author": "Jind\u0159ich Libovick\u00fd", "authorids": "~Jind\u0159ich_Libovick\u00fd1", "gender": "M", "homepage": "https://ufal.mff.cuni.cz/jindrich-libovicky", "dblp": "160/8774", "google_scholar": "47pkcSAAAAAJ", "or_profile": "~Jind\u0159ich_Libovick\u00fd1", "aff": "Charles University Prague", "aff_domain": "cuni.cz", "position": "Researcher", "bibtex": "@inproceedings{\nlibovick{\\'y}2023is,\ntitle={Is a Prestigious Job the same as a Prestigious Country? A Case Study on Multilingual Sentence Embeddings and European Countries},\nauthor={Jind{\\v{r}}ich Libovick{\\'y}},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=xp4wEivhM9}\n}", "github": "", "project": "", "reviewers": "QesP;os6q;3DFr", "site": "https://openreview.net/forum?id=xp4wEivhM9", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;3", "excitement": "4;3;3", "reproducibility": "4;3;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 1, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-7717-4090", "linkedin": "jind%C5%99ich-libovick%C3%BD-13281046/", "aff_unique_index": "0", "aff_unique_norm": "Charles University", "aff_unique_dep": "", "aff_unique_url": "https://www.cuni.cz", "aff_unique_abbr": "Charles University", "aff_campus_unique_index": "0", "aff_campus_unique": "Prague", "aff_country_unique_index": "0", "aff_country_unique": "Czech Republic" }, { "id": "xxTtwEuOpS", "title": "Understanding Compositional Data Augmentation in Typologically Diverse Morphological Inflection", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Data augmentation techniques are widely used in low-resource automatic morphological inflection to address the issue of data sparsity. However, the full implications of these techniques remain poorly understood. In this study, we aim to shed light on the theoretical aspects of the data augmentation strategy StemCorrupt, a method that generates synthetic examples by randomly substituting stem characters in existing gold standard training examples. Our analysis uncovers that StemCorrupt brings about fundamental changes in the underlying data distribution, revealing inherent compositional concatenative structure. To complement our theoretical analysis, we investigate the data-efficiency of StemCorrupt. Through evaluation across a diverse set of seven typologically distinct languages, we demonstrate that selecting a subset of datapoints with both high diversity \\textit{and} high predictive uncertainty significantly enhances the data-efficiency of compared to competitive baselines. Furthermore, we explore the impact of typological features on the choice of augmentation strategy and find that languages incorporating non-concatenativity, such as morphonological alternations, derive less benefit from synthetic examples with high predictive uncertainty. We attribute this effect to phonotactic violations induced by StemCorrupt, emphasizing the need for further research to ensure optimal performance across the entire spectrum of natural language morphology.", "keywords": "morphological inflection;computational morphology;data augmentation", "primary_area": "", "supplementary_material": "", "author": "Farhan Samir;Miikka Silfverberg", "authorids": "~Farhan_Samir1;~Miikka_Silfverberg1", "gender": "M;M", "homepage": "https://smfsamir.github.io/;https://mpsilfve.github.io", "dblp": "284/4690;99/671k", "google_scholar": "YQOEOXYAAAAJ;0ey1PKYAAAAJ", "or_profile": "~Farhan_Samir1;~Miikka_Silfverberg1", "aff": "University of British Columbia;University of British Columbia", "aff_domain": "mail.ubc.ca;ubc.ca", "position": "PhD student;Assistant Professor", "bibtex": "@inproceedings{\nsamir2023understanding,\ntitle={Understanding Compositional Data Augmentation in Typologically Diverse Morphological Inflection},\nauthor={Farhan Samir and Miikka Silfverberg},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=xxTtwEuOpS}\n}", "github": "", "project": "", "reviewers": "CMbW;AGZF;xBw7", "site": "https://openreview.net/forum?id=xxTtwEuOpS", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;4;3", "excitement": "4;4;4", "reproducibility": "4;4;5", "correctness": "4;3;4", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "University of British Columbia", "aff_unique_dep": "", "aff_unique_url": "https://www.ubc.ca", "aff_unique_abbr": "UBC", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "id": "xyvTFX7hDs", "title": "Cultural Concept Adaptation on Multimodal Reasoning", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Developing cultural adaptation methods is important, which can improve the model performance on the low-resource ones and provide more equitable opportunities for everyone to benefit from advanced technology. Past methods primarily focused on multilingual and multimodal capabilities, and the improvement of multicultural competence is still an unexplored problem. This is largely due to the difficulty of data scarcity and expensive annotation. In this paper, we navigate this uncharted territory by leveraging high-resource cultures to facilitate comprehension of low-resource ones. We first introduce an annotation-free method for cultural-concept adaptation and construct a concept mapping set. To facilitate the model's comprehension of cultural-concept mappings, we propose a new multimodal data augmentation called CultureMixup. This approach employs a three-tier code-switching strategy on textual sentences. Additionally, it uses a cultural concept-based mixup method for the images. This combination effectively generates new data instances across culture, phrase, word, and image levels. For visually grounded reasoning across languages and cultures, experimental results on five languages show that our method consistently improves performance for four existing multilingual and multimodal models on both zero-shot and few-shot settings.", "keywords": "Cross-cultural;Adaptation;Low-resource;Multi-modal;Data augmentation", "primary_area": "", "supplementary_material": "", "author": "Zhi Li;Yin Zhang", "authorids": "~Zhi_Li6;~Yin_Zhang3", "gender": ";M", "homepage": "https://person.zju.edu.cn/en/zhangyin;", "dblp": "91/3045-6;", "google_scholar": "vCoh6tYAAAAJ;https://scholar.google.com.hk/citations?hl=zh-CN", "or_profile": "~Yin_Zhang3;~Zhi_Ii1", "aff": "Zhejiang University;Zhejiang University", "aff_domain": "zju.edu.cn;zju.edu.cn", "position": "Associate Professor;PhD student", "bibtex": "@inproceedings{\nli2023cultural,\ntitle={Cultural Concept Adaptation on Multimodal Reasoning},\nauthor={Zhi Li and Yin Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=xyvTFX7hDs}\n}", "github": "", "project": "", "reviewers": "bqm3;sDGb;eiJt", "site": "https://openreview.net/forum?id=xyvTFX7hDs", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "3;4;4", "reproducibility": "3;4;4", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Zhejiang University", "aff_unique_dep": "", "aff_unique_url": "https://www.zju.edu.cn", "aff_unique_abbr": "ZJU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "xzveggFhiQ", "title": "Multi-Modal Knowledge Graph Transformer Framework for Multi-Modal Entity Alignment", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Multi-Modal Entity Alignment (MMEA) is a critical task that aims to identify equivalent entity pairs across multi-modal knowledge graphs (MMKGs). However, this task faces challenges due to the presence of different types of information, including neighboring entities, multi-modal attributes, and entity types. Directly incorporating the above information (e.g., concatenation or attention) can lead to an unaligned information space. To address these challenges, we propose a novel MMEA transformer, called Meaformer, that hierarchically introduces neighbor features, multi-modal attributes, and entity types to enhance the alignment task. Taking advantage of the transformer's ability to better integrate multiple information, we design a hierarchical modifiable self-attention block in a transformer encoder to preserve the unique semantics of different information. Furthermore, we design two entity-type prefix injection methods to redintegrate entity-type information using type prefixes, which help to restrict the global information of entities not present in the MMKGs.", "keywords": "Multi-Modal Entity Alignment;Multi-Modal Knowledge Graph;Transformer", "primary_area": "", "supplementary_material": "", "author": "Qian Li;Cheng Ji;Shu Guo;Zhaoji Liang;Lihong Wang;Jianxin Li", "authorids": "~Qian_Li8;~Cheng_Ji1;~Shu_Guo1;~Zhaoji_Liang1;~Lihong_Wang2;~Jianxin_Li3", "gender": "M;F;M;F;M;F", "homepage": "https://scholar.google.com/citations?hl=en&user=fRAeIZAAAAAJ;;https://github.com/NatsusakiYomi;;http://myjianxin.github.io;https://xiaoqian19940510.github.io/", "dblp": "32/598-1.html;121/2156;358/8865;;l/JianxinLi-2.html;69/5902-9.html", "google_scholar": "https://scholar.google.com/citations?hl=en;;jL2SALkAAAAJ;;EY2lqD0AAAAJ;AHg-JGIAAAAJ", "or_profile": "~Cheng_Ji1;~Shu_Guo1;~Zhaoji_Liang1;~Lihong_Wang2;~Jianxin_Li3;~qian_li6", "aff": "Beihang University;CNCERT;Beihang University;cncert;Beihang University ;Beihang University", "aff_domain": "buaa.edu.cn;cert.org.cn;buaa.edu.cn;cert.org;buaa.edu.cn;buaa.edu.cn", "position": "PhD student;Associate Professor;Undergrad student;Senior Engineer;Full Professor;PhD student", "bibtex": "@inproceedings{\nli2023multimodal,\ntitle={Multi-Modal Knowledge Graph Transformer Framework for Multi-Modal Entity Alignment},\nauthor={Qian Li and Cheng Ji and Shu Guo and Zhaoji Liang and Lihong Wang and Jianxin Li},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=xzveggFhiQ}\n}", "github": "", "project": "", "reviewers": "Up9k;adWR;h5aT", "site": "https://openreview.net/forum?id=xzveggFhiQ", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;3;5", "excitement": "4;3;4", "reproducibility": "4;3;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-2513-3822;;0009-0000-3333-4056;0000-0003-0179-2364;0000-0001-5152-0055;0000-0002-1612-4644", "linkedin": ";;;;;", "aff_unique_index": "0;1;0;2;0;0", "aff_unique_norm": "Beihang University;China National Cyber Emergency Response Team;China National Certification and Accreditation Administration", "aff_unique_dep": ";;", "aff_unique_url": "http://www.buaa.edu.cn/;http://www.cncert.org.cn/;http://www.sac.gov.cn/cncert/", "aff_unique_abbr": "BUAA;CNCERT;CNCERT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "y0P5KXN5X1", "title": "Factual Relation Discrimination for Factuality-oriented Abstractive Summarization", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Most neural abstractive summarization models are capable of producing high-quality summaries. However, they still frequently contain factual errors. Existing factuality-oriented abstractive summarization models only consider the integration of factual information and ignore the causes of factual errors. To address this issue, we propose a factuality-oriented abstractive summarization model DASum, which is based on a new task factual relation discrimination that is able to identify the causes of factual errors. First, we use data augmentation methods to construct counterfactual summaries (i. e., negative samples), and build a factual summarization dataset. Then, we propose the factual relation discrimination task, which determines the factuality of the dependency relations in summaries during summary generation and guides our DASum to generate factual relations, thereby improving the factuality of summaries. Experimental results on the CNN/DM and XSUM datasets show that our DASum outperforms several state-of-the-art benchmarks in terms of the factual metrics.", "keywords": "Factuality-oriented Abstractive Summarization;Factual Relation Discrimination", "primary_area": "", "supplementary_material": "", "author": "Zhiguang Gao;PEIFENG LI;Feng Jiang;Xiaomin Chu;Qiaoming Zhu", "authorids": "~Zhiguang_Gao1;~PEIFENG_LI2;~Feng_Jiang4;~Xiaomin_Chu1;~Qiaoming_Zhu1", "gender": "M;M;M;F;M", "homepage": "https://github.com/gaozhiguang;http://web.suda.edu.cn/pfli/;;;https://scst.suda.edu.cn/0f/a2/c11250a528290/page.htm", "dblp": "329/6163;00/1996.html;75/1693-7;178/7275;28/1279", "google_scholar": ";NY3GrVIAAAAJ;zrxpiWYAAAAJ;;6BXGJK8AAAAJ", "or_profile": "~Zhiguang_Gao1;~PEIFENG_LI2;~Feng_Jiang4;~Xiaomin_Chu1;~Qiaoming_Zhu1", "aff": "Department of Computer Science and Technology, Soochow University;Soochow University, China;The Chinese University of Hong Kong, Shenzhen;;Soochow University", "aff_domain": "cs.umass.edu;suda.edu.cn;cuhk.edu.cn;;suda.edu.cn", "position": "MS student;Full Professor;Postdoc;;Full Professor", "bibtex": "@inproceedings{\ngao2023factual,\ntitle={Factual Relation Discrimination for Factuality-oriented Abstractive Summarization},\nauthor={Zhiguang Gao and PEIFENG LI and Feng Jiang and Xiaomin Chu and Qiaoming Zhu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=y0P5KXN5X1}\n}", "github": "", "project": "", "reviewers": "FTBC;QpAJ;FEBY", "site": "https://openreview.net/forum?id=y0P5KXN5X1", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "3;3;3", "reproducibility": "4;3;3", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-4850-3128;0000-0002-3465-311X;;0000-0002-2708-8976", "linkedin": ";;;;", "aff_unique_index": "0;0;1;0", "aff_unique_norm": "Soochow University;Chinese University of Hong Kong", "aff_unique_dep": "Department of Computer Science and Technology;", "aff_unique_url": "https://www.soochow.edu.cn;https://www.cuhk.edu.cn", "aff_unique_abbr": "Soochow U;CUHK", "aff_campus_unique_index": "1", "aff_campus_unique": ";Shenzhen", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "y2V6YgLaW7", "title": "The Internal State of an LLM Knows When It's Lying", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "While Large Language Models (LLMs) have shown exceptional performance in various tasks, one of their most prominent drawbacks is generating inaccurate or false information with a confident tone. In this paper, we provide evidence that the LLM's internal state can be used to reveal the truthfulness of statements. This includes both statements provided to the LLM, and statements that the LLM itself generates.\nOur approach is to train a classifier that outputs the probability that a statement is truthful, based on the hidden layer activations of the LLM as it reads or generates the statement. Experiments demonstrate that given a set of test sentences, of which half are true and half false, our trained classifier achieves an average of 71\\% to 83\\% accuracy labeling which sentences are true versus false, depending on the LLM base model. \nFurthermore, we explore the relationship between our classifier's performance and approaches based on the probability assigned to the sentence by the LLM. We show that while LLM-assigned sentence probability is related to sentence truthfulness, this probability is also dependent on sentence length and the frequencies of words in the sentence, resulting in our trained classifier providing a more reliable approach to detecting truthfulness, highlighting its potential to enhance the reliability of LLM-generated content and its practical applicability in real-world scenarios.", "keywords": "Large Language Models;hallucination in LLM;LLM veracity;LLM activations", "primary_area": "", "supplementary_material": "", "author": "Amos Azaria;Tom Mitchell", "authorids": "~Amos_Azaria1;~Tom_Mitchell2", "gender": "Not Specified;M", "homepage": "http://azariaa.com;http://www.cs.cmu.edu/~tom", "dblp": "18/9923;", "google_scholar": "https://scholar.google.com.tw/citations?user=sdfKs_sAAAAJ;", "or_profile": "~Amos_Azaria1;~Tom_Mitchell2", "aff": "Ariel University;School of Computer Science, Carnegie Mellon University", "aff_domain": "ariel.ac.il;cs.cmu.edu", "position": "Associate Professor;Full Professor", "bibtex": "@inproceedings{\nazaria2023the,\ntitle={The Internal State of an {LLM} Knows When It's Lying},\nauthor={Amos Azaria and Tom Mitchell},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=y2V6YgLaW7}\n}", "github": "", "project": "", "reviewers": "ayRU;UwiC;DLxR;72bu", "site": "https://openreview.net/forum?id=y2V6YgLaW7", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;4;3;2", "excitement": "4;4;3;4", "reproducibility": "4;5;3;4", "correctness": "4;3;2;3", "rating_avg": 4.0, "confidence_avg": 3.25, "excitement_avg": 3.75, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-5057-1309;", "linkedin": ";", "aff_unique_index": "0;1", "aff_unique_norm": "Ariel University;Carnegie Mellon University", "aff_unique_dep": ";School of Computer Science", "aff_unique_url": "https://www.ariel.ac.il;https://www.cmu.edu", "aff_unique_abbr": "Ariel U;CMU", "aff_campus_unique_index": "1", "aff_campus_unique": ";Pittsburgh", "aff_country_unique_index": "0;1", "aff_country_unique": "Israel;United States" }, { "id": "y34lg6q50A", "title": "Fusing Temporal Graphs into Transformers for Time-Sensitive Question Answering", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Answering time-sensitive questions from long documents requires temporal reasoning over the times in questions and documents. An important open question is whether large language models can perform such reasoning solely using a provided text document, or whether they can benefit from additional temporal information extracted using other systems. We address this research question by applying existing temporal information extraction systems to construct temporal graphs of events, times, and temporal relations in questions and documents. We then investigate different approaches for fusing these graphs into Transformer models. Experimental results show that our proposed approach for fusing temporal graphs into input text substantially enhances the temporal reasoning capabilities of Transformer models with or without fine-tuning. Additionally, our proposed method outperforms various graph convolution-based approaches and establishes a new state-of-the-art performance on SituatedQA and three splits of TimeQA.", "keywords": "Time-sensitive Question Answering;Temporal Graph Fusion;Temporal Reasoning", "primary_area": "", "supplementary_material": "", "author": "Xin Su;Phillip Howard;Nagib Hakim;Steven Bethard", "authorids": "~Xin_Su2;~Phillip_Howard1;~Nagib_Hakim1;~Steven_Bethard1", "gender": "M;M;M;M", "homepage": "https://xinsu.name/;;;https://bethard.github.io/", "dblp": "54/3643-8.html;212/2868;;52/5246", "google_scholar": "4YlcxsoAAAAJ;EKh822gAAAAJ;;https://scholar.google.com.tw/citations?user=sXM8J5EAAAAJ", "or_profile": "~Xin_Su2;~Phillip_Howard1;~Nagib_Hakim1;~Steven_Bethard1", "aff": "University of Arizona;Intel;;University of Arizona", "aff_domain": "arizona.edu;intel.com;;arizona.edu", "position": "PhD student;Researcher;;Associate Professor", "bibtex": "@inproceedings{\nsu2023fusing,\ntitle={Fusing Temporal Graphs into Transformers for Time-Sensitive Question Answering},\nauthor={Xin Su and Phillip Howard and Nagib Hakim and Steven Bethard},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=y34lg6q50A}\n}", "github": "", "project": "", "reviewers": "ay7N;D8zG;XQnr", "site": "https://openreview.net/forum?id=y34lg6q50A", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;4;3", "reproducibility": "4;4;3", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 10, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-2712-2804;;;0000-0001-9560-6491", "linkedin": "xin-su-7a5297125/;;;bethard/", "aff_unique_index": "0;1;0", "aff_unique_norm": "University of Arizona;Intel", "aff_unique_dep": ";Intel Corporation", "aff_unique_url": "https://www.arizona.edu;https://www.intel.com", "aff_unique_abbr": "UA;Intel", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "y5UTUcTQU5", "title": "Dual-Channel Span for Aspect Sentiment Triplet Extraction", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Aspect Sentiment Triplet Extraction (ASTE) is one of the compound tasks of fine-grained aspect-based sentiment analysis (ABSA), aiming at extracting the triplets of aspect terms, corresponding opinion terms and the associated sentiment orientation. Recent efforts in exploiting span-level semantic interaction shown superior performance on ASTE task. However, most of the existing span-based approaches suffer from enumerating all possible spans, since it can introduce too much noise in sentiment triplet extraction. To ease this burden, we propose a dual-channel span generation method to coherently constrain the search space of span candidates. Specifically, we leverage the syntactic relations among aspect/opinion terms and the associated part-of-speech characteristics in those terms to generate span candidates, which reduces span enumeration by nearly half. Besides, feature representations are learned from syntactic and part-of-speech correlation among terms, which renders span representation fruitful linguistic information. Extensive experiments on two versions of public datasets demonstrate both the effectiveness of our design and the superiority on ASTE/ATE/OTE tasks~\\footnote{We release our code at \\url{https://github.com/bert-ply/Dual_Span}}.", "keywords": "aspect sentiment triplet extraction;dual-channel;span generation;noise reduction", "primary_area": "", "supplementary_material": "", "author": "Pan Li;Ping Li;Kai Zhang", "authorids": "~Pan_Li10;~Ping_Li12;~Kai_Zhang5", "gender": "M;F;", "homepage": "https://github.com/bert-ply;;", "dblp": ";62/5860-24;", "google_scholar": ";TwSm5CUAAAAJ;", "or_profile": "~Pan_Li10;~Ping_Li12;~Kai_Zhang5", "aff": "Southwest Petroleum University;Southwest Petroleum University;", "aff_domain": "edu.cn;swpu.edu.cn;", "position": "MS student;Full Professor;", "bibtex": "@inproceedings{\nli2023dualchannel,\ntitle={Dual-Channel Span for Aspect Sentiment Triplet Extraction},\nauthor={Pan Li and Ping Li and Kai Zhang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=y5UTUcTQU5}\n}", "github": "", "project": "", "reviewers": "V7Ee;khna;vjaB", "site": "https://openreview.net/forum?id=y5UTUcTQU5", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;4", "excitement": "4;4;2", "reproducibility": "3;4;2", "correctness": "3;4;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-8391-6510;", "linkedin": ";;", "aff_unique_index": "0;0", "aff_unique_norm": "Southwest Petroleum University", "aff_unique_dep": "", "aff_unique_url": "https://www.swpu.edu.cn", "aff_unique_abbr": "SWPU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "China" }, { "id": "y5ctUSk99X", "title": "GPTAraEval: A Comprehensive Evaluation of ChatGPT on Arabic NLP", "track": "main", "status": "Long Main", "tldr": "", "abstract": "ChatGPT's emergence heralds a transformative phase in NLP, particularly demonstrated through its excellent performance on many English benchmarks. However, the model's efficacy across diverse linguistic contexts remains largely uncharted territory. This work aims to bridge this knowledge gap, with a primary focus on assessing ChatGPT's capabilities on Arabic languages and dialectal varieties. Our comprehensive study conducts a large-scale automated and human evaluation of ChatGPT, encompassing 44 distinct language understanding and generation tasks on over 60 different datasets. To our knowledge, this marks the first extensive performance analysis of ChatGPT's deployment in Arabic NLP. Our findings indicate that, despite its remarkable performance in English, ChatGPT is consistently surpassed by smaller models that have undergone finetuning on Arabic. We further undertake a meticulous comparison of ChatGPT and GPT-4's Modern Standard Arabic (MSA) and Dialectal Arabic (DA), unveiling the relative shortcomings of both models in handling Arabic dialects compared to MSA. Although we further explore and confirm the utility of employing GPT-4 as a potential alternative for human evaluation, our work adds to a growing body of research underscoring the limitations of ChatGPT.", "keywords": "Arabic NLP;Arabic Dialects;ChatGPT;GPT4", "primary_area": "", "supplementary_material": "", "author": "Md Tawkat Islam Khondaker;Abdul Waheed;El Moatez Billah Nagoudi;Muhammad Abdul-Mageed", "authorids": "~Md_Tawkat_Islam_Khondaker1;~Abdul_Waheed1;~El_Moatez_Billah_Nagoudi1;~Muhammad_Abdul-Mageed2", "gender": ";M;;", "homepage": "https://sites.google.com/view/tawkat;https://macabdul9.github.io/;;", "dblp": "241/5971.html;;;", "google_scholar": "https://scholar.google.ca/citations?user=koKhlhwAAAAJ;I0hRiBYAAAAJ;;", "or_profile": "~Md_Tawkat_Islam_Khondaker1;~Abdul_Waheed1;~El_Moatez_Billah_Nagoudi1;~Muhammad_Abdul-Mageed2", "aff": "University of British Columbia;Mohamed bin Zayed University of Artificial Intelligence;;", "aff_domain": "ubc.ca;mbzuai.ac.ae;;", "position": "MS student;Researcher;;", "bibtex": "@inproceedings{\nkhondaker2023gptaraeval,\ntitle={{GPTA}raEval: A Comprehensive Evaluation of Chat{GPT} on Arabic {NLP}},\nauthor={Md Tawkat Islam Khondaker and Abdul Waheed and El Moatez Billah Nagoudi and Muhammad Abdul-Mageed},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=y5ctUSk99X}\n}", "github": "", "project": "", "reviewers": "Utir;YeJa;ikv4", "site": "https://openreview.net/forum?id=y5ctUSk99X", "pdf_size": 0, "rating": "5;5;5", "confidence": "5;4;3", "excitement": "3;4;3", "reproducibility": "3;3;3", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-5335-0723;;;", "linkedin": "md-tawkat-islam-khondaker-781962149;;;", "aff_unique_index": "0;1", "aff_unique_norm": "University of British Columbia;Mohamed bin Zayed University of Artificial Intelligence", "aff_unique_dep": ";", "aff_unique_url": "https://www.ubc.ca;https://mbzuai.ac.ae", "aff_unique_abbr": "UBC;MBZUAI", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Canada;United Arab Emirates" }, { "id": "y6Ej5BZkrR", "title": "Let's Think Frame by Frame with VIP: A Video Infilling and Prediction Dataset for Evaluating Video Chain-of-Thought", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Despite exciting recent results showing vision-language systems\u2019 capacity to reason about images using natural language, their capacity for video reasoning remains underexplored. We motivate framing video reasoning as the sequential understanding of a small number of keyframes, thereby leveraging the power and robustness of vision-language while alleviating the computational complexities of processing videos. To evaluate this novel application, we introduce VIP, an inference-time challenge dataset designed to explore models\u2019 reasoning capabilities through video chain-of-thought. Inspired by visually descriptive scene plays, we propose two formats for keyframe description: unstructured dense captions and structured scene descriptions that identify the focus, action, mood, objects, and setting (FAMOuS) of the keyframe. To evaluate video reasoning, we propose two tasks: Video Infilling and Video Prediction, which test abilities to generate multiple intermediate keyframes and predict future keyframes, respectively. We benchmark GPT-4, GPT-3, and VICUNA on VIP, demonstrate the performance gap in these complex video reasoning tasks, and encourage future work to prioritize language models for efficient and generalized video reasoning.", "keywords": "chain of thought;video reasoning;large language models;dataset;vision and language", "primary_area": "", "supplementary_material": "", "author": "Vaishnavi Himakunthala;Andy Ouyang;Daniel Philip Rose;Ryan He;Alex Mei;Yujie Lu;Chinmay Sonar;Michael Saxon;William Yang Wang", "authorids": "~Vaishnavi_Himakunthala1;~Andy_Ouyang1;~Daniel_Philip_Rose1;~Ryan_He1;~Alex_Mei1;~Yujie_Lu1;~Chinmay_Sonar1;~Michael_Saxon1;~William_Yang_Wang2", "gender": "F;M;M;;;;M;M;M", "homepage": ";;;;http://sites.cs.ucsb.edu/~alexmei/;https://yujielu10.github.io/;https://chinmaysonar.github.io/;https://saxon.me;https://www.cs.ucsb.edu/~william/", "dblp": ";;;;;;207/0890;222/6656;08/9282", "google_scholar": ";;;;GOrfNGAAAAAJ;pcmr6GMAAAAJ;-6Rg0WcAAAAJ;pAlwjdgAAAAJ;gf8Ms_8AAAAJ", "or_profile": "~Vaishnavi_Himakunthala1;~Andy_Ouyang1;~Daniel_Philip_Rose1;~Ryan_He1;~Alex_Mei1;~Yujie_Lu1;~Chinmay_Sonar1;~Michael_Saxon1;~William_Wang1", "aff": ", University of California, Santa Barbara;University of California, Santa Barbara;, University of California, Santa Barbara;University of California, Santa Barbara;UC Santa Barbara;UC Santa Barbara;University of California, Santa Barbara;UC Santa Barbara;UC Santa Barbara", "aff_domain": "cs.ucsb.edu;ucsb.edu;cs.ucsb.edu;ucsb.edu;ucsb.edu;ucsb.edu;ucsb.edu;ucsb.edu;ucsb.edu", "position": "Undergrad student;Undergrad student;Undergrad student;Intern;MS student;PhD student;PhD student;PhD student;Full Professor", "bibtex": "@inproceedings{\nhimakunthala2023lets,\ntitle={Let's Think Frame by Frame with {VIP}: A Video Infilling and Prediction Dataset for Evaluating Video Chain-of-Thought},\nauthor={Vaishnavi Himakunthala and Andy Ouyang and Daniel Philip Rose and Ryan He and Alex Mei and Yujie Lu and Chinmay Sonar and Michael Saxon and William Yang Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=y6Ej5BZkrR}\n}", "github": "", "project": "", "reviewers": "VoFV;6krL;JM3S", "site": "https://openreview.net/forum?id=y6Ej5BZkrR", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;2", "excitement": "4;4;4", "reproducibility": "4;4;4", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;;;;", "linkedin": "vaishnavihimakunthala/;andy--ouyang/;danny-rose-2075651a7/;ryanhe02/;alexmeigz/;;;;", "aff_unique_index": "0;0;0;0;0;0;0;0;0", "aff_unique_norm": "University of California, Santa Barbara", "aff_unique_dep": "", "aff_unique_url": "https://www.ucsb.edu", "aff_unique_abbr": "UCSB", "aff_campus_unique_index": "0;0;0;0;0;0;0;0;0", "aff_campus_unique": "Santa Barbara", "aff_country_unique_index": "0;0;0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "y8ebFPsyET", "title": "TESTA: Temporal-Spatial Token Aggregation for Long-form Video-Language Understanding", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Large-scale video-language pre-training has made remarkable strides in advancing video-language understanding tasks. However, the heavy computational burden of video encoding remains a formidable efficiency bottleneck, particularly for long-form videos. These videos contain massive visual tokens due to their inherent 3D properties and spatiotemporal redundancy, making it challenging to capture complex temporal and spatial relationships. To tackle this issue, we propose an efficient method called TEmporal-Spatial Token Aggregation (TESTA). TESTA condenses video semantics by adaptively aggregating similar frames, as well as similar patches within each frame. TESTA can reduce the number of visual tokens by 75% and thus accelerate video encoding. Building upon TESTA, we introduce a pre-trained video-language model equipped with a divided space-time token aggregation module in each video encoder block. We evaluate our model on five datasets for paragraph-to-video retrieval and long-form VideoQA tasks. Experimental results show that TESTA improves computing efficiency by 1.7 times, and achieves significant performance gains from its scalability in processing longer input frames, e.g., +13.7 R@1 on QuerYD and +6.5 R@1 on Condensed Movie.", "keywords": "Video-Language Understanding;Token Aggregation;Long-form Video Understanding", "primary_area": "", "supplementary_material": "", "author": "Shuhuai Ren;Sishuo Chen;Shicheng Li;Xu Sun;Lu Hou", "authorids": "~Shuhuai_Ren1;~Sishuo_Chen1;~Shicheng_Li1;~Xu_Sun1;~Lu_Hou2", "gender": "M;M;;M;F", "homepage": "https://renshuhuai-andy.github.io/;https://pkucss.github.io/;https://lscpku.github.io/;https://xusun.org/;https://houlu369.github.io/", "dblp": "50/9511.html;279/6225;;37/1971-1;", "google_scholar": "https://scholar.google.com.hk/citations?user=3X8yS-cAAAAJ;Jn6gAIAAAAAJ;hsTCc1MAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.hk/citations?user=rnjoL5cAAAAJ", "or_profile": "~Shuhuai_Ren1;~Sishuo_Chen1;~Shicheng_Li1;~Xu_Sun1;~LU_HOU1", "aff": "Peking University;Peking University;Peking University;Peking University;Huawei Technologies Ltd.", "aff_domain": "pku.edu.cn;pku.edu.cn;pku.edu.cn;pku.edu.cn;huawei.com", "position": "PhD student;MS student;PhD student;Associate Professor;researcher", "bibtex": "@inproceedings{\nren2023testa,\ntitle={{TESTA}: Temporal-Spatial Token Aggregation for Long-form Video-Language Understanding},\nauthor={Shuhuai Ren and Sishuo Chen and Shicheng Li and Xu Sun and Lu Hou},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=y8ebFPsyET}\n}", "github": "", "project": "", "reviewers": "YtSY;jb6J;E2NS", "site": "https://openreview.net/forum?id=y8ebFPsyET", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "2;3;4", "reproducibility": "4;4;3", "correctness": "2;3;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "shuhuai-ren-69580817a/;;;;", "aff_unique_index": "0;0;0;0;1", "aff_unique_norm": "Peking University;Huawei", "aff_unique_dep": ";Huawei Technologies", "aff_unique_url": "http://www.pku.edu.cn;https://www.huawei.com", "aff_unique_abbr": "Peking U;Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "China" }, { "id": "yAZSZob2dN", "title": "Boot and Switch: Alternating Distillation for Zero-Shot Dense Retrieval", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Neural 'dense' retrieval models are state of the art for many datasets, however these models often exhibit limited domain transfer ability.\nExisting approaches to adaptation are unwieldy, such as requiring explicit supervision, complex model architectures, or massive external models.\nWe present $\\texttt{ABEL}$, a simple but effective unsupervised method to enhance passage retrieval in zero-shot settings. \nOur technique follows a straightforward loop: a dense retriever learns from supervision signals provided by a reranker, and subsequently, the reranker is updated based on feedback from the improved retriever.\nBy iterating this loop, the two components mutually enhance one another's performance. \nExperimental results demonstrate that our unsupervised $\\texttt{ABEL}$ model outperforms both leading supervised and unsupervised retrievers on the BEIR benchmark.\nMeanwhile, it exhibits strong adaptation abilities to tasks and domains that were unseen during training.\nBy either fine-tuning $\\texttt{ABEL}$ on labelled data or integrating it with existing supervised dense retrievers, we achieve state-of-the-art results.\\footnote{Source code is available at \\url{https://github.com/Fantabulous-J/BootSwitch}.}", "keywords": "dense retrieval;iterated learning;alternating distillation;bootstrapping", "primary_area": "", "supplementary_material": "", "author": "Fan Jiang;Qiongkai Xu;Tom Drummond;Trevor Cohn", "authorids": "~Fan_Jiang2;~Qiongkai_Xu1;~Tom_Drummond1;~Trevor_Cohn1", "gender": ";M;M;M", "homepage": ";https://xuqiongkai.github.io;;https://people.eng.unimelb.edu.au/tcohn/", "dblp": ";127/0174;50/1633;66/4613", "google_scholar": ";https://scholar.google.com.au/citations?user=wCer2WUAAAAJ;https://scholar.google.com.au/citations?user=6sWGL5wAAAAJ;https://scholar.google.com.au/citations?user=FCom398AAAAJ", "or_profile": "~Fan_Jiang2;~Qiongkai_Xu1;~Tom_Drummond1;~Trevor_Cohn1", "aff": ";University of Melbourne;University of Melbourne;The University of Melbourne", "aff_domain": ";unimelb.edu;unimelb.edu.au;unimelb.edu.au", "position": ";Postdoc;Full Professor;Professor", "bibtex": "@inproceedings{\njiang2023boot,\ntitle={Boot and Switch: Alternating Distillation for Zero-Shot Dense Retrieval},\nauthor={Fan Jiang and Qiongkai Xu and Tom Drummond and Trevor Cohn},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=yAZSZob2dN}\n}", "github": "", "project": "", "reviewers": "ACNC;3ATb;jdQL", "site": "https://openreview.net/forum?id=yAZSZob2dN", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;4;2", "excitement": "3;4;3", "reproducibility": "4;4;4", "correctness": "2;4;3", "rating_avg": 2.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-3312-6825;0000-0001-8204-5904;", "linkedin": ";;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "University of Melbourne", "aff_unique_dep": "", "aff_unique_url": "https://www.unimelb.edu.au", "aff_unique_abbr": "UniMelb", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Australia" }, { "id": "yB8cQIICqe", "title": "EZ-STANCE: A Large Dataset for Zero-Shot Stance Detection", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Zero-shot stance detection (ZSSD) aims to determine whether the author of a text is in favor of, against, or neutral toward a target that is unseen during training. In this paper, we present EZ-STANCE, a large English ZSSD dataset with 30,606 annotated text-target pairs. In contrast to VAST, the only other existing ZSSD dataset, EZ-STANCE includes both noun-phrase targets and claim targets, covering a wide range of domains. In addition, we introduce two challenging subtasks for ZSSD: target-based ZSSD and domain-based ZSSD. We provide an in-depth description and analysis of our dataset. We evaluate EZ-STANCE using state-of-the-art deep learning models. Furthermore, we propose to transform ZSSD into the NLI task by applying two simple yet effective prompts to noun-phrase targets. Our experimental results show that EZ-STANCE is a challenging new benchmark, which provides significant research opportunities on ZSSD. We will make our dataset and code available on GitHub.", "keywords": "dataset;stance detection;zero-shot", "primary_area": "", "supplementary_material": "", "author": "Chenye Zhao;Cornelia Caragea", "authorids": "~Chenye_Zhao1;~Cornelia_Caragea2", "gender": "M;", "homepage": "https://www.linkedin.com/in/chenye-zhao-3316a4117/;https://www.cs.uic.edu/~cornelia/", "dblp": "234/8821.htmlv;69/6680.html", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;vkX6VV4AAAAJ", "or_profile": "~Chenye_Zhao1;~Cornelia_Caragea2", "aff": "University of Illinois at Chicago;University of Illinois at Chicago", "aff_domain": "uic.edu;uic.edu", "position": "PhD student;Full Professor", "bibtex": "@misc{\nanonymous2024ezstance,\ntitle={{EZ}-{STANCE}: A Large Dataset for Zero-Shot Stance Detection},\nauthor={Anonymous},\nyear={2024},\nurl={https://openreview.net/forum?id=yB8cQIICqe}\n}", "github": "", "project": "", "reviewers": "6i45;zFVb;rLV2", "site": "https://openreview.net/forum?id=yB8cQIICqe", "pdf_size": 0, "rating": "4;4;4", "confidence": "3;4;5", "excitement": "3;4;4", "reproducibility": "3;4;4", "correctness": "3;3;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-3904-345X;", "linkedin": "chenye-zhao-3316a4117/;", "aff_unique_index": "0;0", "aff_unique_norm": "University of Illinois at Chicago", "aff_unique_dep": "", "aff_unique_url": "https://www.uic.edu", "aff_unique_abbr": "UIC", "aff_campus_unique_index": "0;0", "aff_campus_unique": "Chicago", "aff_country_unique_index": "0;0", "aff_country_unique": "United States" }, { "id": "yBd2UREDNL", "title": "MixTEA: Semi-supervised Entity Alignment with Mixture Teaching", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Semi-supervised entity alignment (EA) is a practical and challenging task because of the lack of adequate labeled mappings as training data. Most works address this problem by generating pseudo mappings for unlabeled entities. However, they either suffer from the erroneous (noisy) pseudo mappings or largely ignore the uncertainty of pseudo mappings. In this paper, we propose a novel semi-supervised EA method, termed as MixTEA, which guides the model learning with an end-to-end mixture teaching of manually labeled mappings and probabilistic pseudo mappings. We firstly train a student model using few labeled mappings as standard. More importantly, in pseudo mapping learning, we propose a bi-directional voting (BDV) strategy that fuses the alignment decisions in different directions to estimate the uncertainty via the joint matching confidence score. Meanwhile, we also design a matching diversity-based rectification (MDR) module to adjust the pseudo mapping learning, thus reducing the negative influence of noisy mappings. Extensive results on benchmark datasets as well as further analyses demonstrate the superiority and the effectiveness of our proposed method.", "keywords": "Knowledge Graph;Entity Alignment;Knowledge Representation", "primary_area": "", "supplementary_material": "", "author": "Feng Xie;Xin Song;Xiang Zeng;Xuechen Zhao;Lei Tian;Bin Zhou;Yusong Tan", "authorids": "~Feng_Xie3;~Xin_Song2;~Xiang_Zeng3;~Xuechen_Zhao1;~Lei_Tian3;~Bin_Zhou7;~Yusong_Tan2", "gender": "M;F;;;M;M;M", "homepage": "https://xiefeng69.github.io/;https://github.com/songxin0318;https://github.com/rain-in-night;;;;", "dblp": ";;;;;66/3973-4.html;42/1274", "google_scholar": "nF5tqy4AAAAJ;;;;;;", "or_profile": "~Feng_Xie3;~Xin_Song2;~Xiang_Zeng3;~Xuechen_Zhao1;~Lei_Tian3;~Bin_Zhou7;~Yusong_Tan2", "aff": "National University of Defense Technology;National University of Defense Technology;National University of Defense Technology;;National University of Defense Technology;National University of Defense Technology;National University of Defense Technology", "aff_domain": "nudt.edu.cn;nudt.edu.cn;nudt.edu.cn;;nudt.edu.cn;nudt.edu.cn;nudt.edu.cn", "position": "MS student;PhD student;MS student;;PhD student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nxie2023mixtea,\ntitle={Mix{TEA}: Semi-supervised Entity Alignment with Mixture Teaching},\nauthor={Feng Xie and Xin Song and Xiang Zeng and Xuechen Zhao and Lei Tian and Bin Zhou and Yusong Tan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=yBd2UREDNL}\n}", "github": "", "project": "", "reviewers": "KV6A;ce7N;8XC3", "site": "https://openreview.net/forum?id=yBd2UREDNL", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;3;5", "excitement": "3;2;3", "reproducibility": "4;4;3", "correctness": "3;2;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-3944-236X;0000-0001-8883-7857;;;0000-0002-3353-0951;;", "linkedin": ";;;;;;", "aff_unique_index": "0;0;0;0;0;0", "aff_unique_norm": "National University of Defense Technology", "aff_unique_dep": "", "aff_unique_url": "http://www.nudt.edu.cn/", "aff_unique_abbr": "NUDT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "yDeIWA7ICp", "title": "Social Commonsense-Guided Search Query Generation for Open-Domain Knowledge-Powered Conversations", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Open-domain dialog involves generating search queries that help obtain relevant knowledge for holding informative conversations. However, it can be challenging to determine what information to retrieve when the user is passive and does not express a clear need or request. To tackle this issue, we present a novel approach that focuses on generating internet search queries that are guided by social commonsense. Specifically, we leverage a commonsense dialog system to establish connections related to the conversation topic, which subsequently guides our query generation. Our proposed framework addresses passive user interactions by integrating topic tracking, commonsense response generation and instruction-driven query generation. Through extensive evaluations, we show that our approach overcomes limitations of existing query generation techniques that rely solely on explicit dialog information, and produces search queries that are more relevant, specific, and compelling, ultimately resulting in more engaging responses.", "keywords": "Conversational AI;Knowledge-Powered Dialog;Commonsense Knowledge", "primary_area": "", "supplementary_material": "", "author": "Revanth Gangi Reddy;Hao Bai;Wentao Yao;Sharath Chandra Etagi Suresh;Heng Ji;ChengXiang Zhai", "authorids": "~Revanth_Gangi_Reddy1;~Hao_Bai1;~Wentao_Yao2;~Sharath_Chandra_Etagi_Suresh1;~Heng_Ji3;~ChengXiang_Zhai1", "gender": "M;M;M;M;F;M", "homepage": "https://gangiswag.github.io;https://www.jackgethome.com;https://github.com/Wentaoy-19;;http://blender.cs.illinois.edu/hengji.html;http://czhai.cs.illinois.edu/", "dblp": ";53/8975;;;;z/ChengXiangZhai", "google_scholar": "SXP5Ej0AAAAJ;https://scholar.google.com/citations?hl=zh-CN;;;z7GCqT4AAAAJ;YU-baPIAAAAJ", "or_profile": "~Revanth_Gangi_Reddy1;~Hao_Bai1;~Wentao_Yao2;~Sharath_Chandra_Etagi_Suresh1;~Heng_Ji3;~ChengXiang_Zhai1", "aff": "University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;Department of Computer Science, UIUC;University of Illinois, Urbana Champaign;University of Illinois, Urbana-Champaign;University of Illinois, Urbana Champaign", "aff_domain": "illinois.edu;illinois.edu;cs.illinois.edu;uiuc.edu;uiuc.edu;illinois.edu", "position": "PhD student;MS student;MS student;MS student;Full Professor;Full Professor", "bibtex": "@inproceedings{\nreddy2023social,\ntitle={Social Commonsense-Guided Search Query Generation for Open-Domain Knowledge-Powered Conversations},\nauthor={Revanth Gangi Reddy and Hao Bai and Wentao Yao and Sharath Chandra Etagi Suresh and Heng Ji and ChengXiang Zhai},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=yDeIWA7ICp}\n}", "github": "", "project": "", "reviewers": "XjaA;Vk7b;TgDw", "site": "https://openreview.net/forum?id=yDeIWA7ICp", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "4;3;3", "reproducibility": "4;4;4", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.0, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-9723-7490;;;;0000-0002-6434-3702", "linkedin": "revanth-gangi-reddy-5b7257ba/;jackgethome/;;sharath07chandra/;;", "aff_unique_index": "0;0;0;0;1;0", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of Illinois", "aff_unique_dep": ";", "aff_unique_url": "https://illinois.edu;https://illinois.edu", "aff_unique_abbr": "UIUC;UIUC", "aff_campus_unique_index": "0;0;0;0;0;0", "aff_campus_unique": "Urbana-Champaign", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "yE44WcphJY", "title": "Dissecting In-Context Learning of Translations in GPT-3", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Most of the recent work in leveraging Large Language Models (LLMs) such as GPT-3 for Machine Translation (MT) has focused on selecting the few-shot samples for prompting. In this work, we try to better understand the role of demonstration attributes for the in-context learning of translations through perturbations of high-quality, in-domain demonstrations. We find that asymmetric perturbation of the source-target mappings yield vastly different results. We show that the perturbation of the source side has surprisingly little impact, while target perturbation can drastically reduce translation quality, suggesting that it is the output text distribution that provides the most important learning signal during in-context learning of translations. We propose a method named Zero-Shot-Context to add this signal automatically in Zero-Shot prompting. We demonstrate that it improves upon the zero-shot translation performance of GPT-3, even making it competitive with few-shot prompted translations.", "keywords": "translation;large language models;in context learning", "primary_area": "", "supplementary_material": "", "author": "Vikas Raunak;Arul Menezes;Hany Hassan Awadalla", "authorids": "~Vikas_Raunak2;~Arul_Menezes1;~Hany_Hassan_Awadalla1", "gender": "M;M;M", "homepage": "https://vyraun.github.io/;https://www.linkedin.com/in/arulmenezes;", "dblp": "205/2388;89/2869;83/64", "google_scholar": "25Tjnq4AAAAJ;DnhOg3YAAAAJ;", "or_profile": "~Vikas_Raunak2;~Arul_Menezes1;~Hany_Hassan1", "aff": "Microsoft;Microsoft Research;Microsoft", "aff_domain": "microsoft.com;research.microsoft.com;microsoft.com", "position": "Researcher;Distinguished Engineer;Research Scientist", "bibtex": "@inproceedings{\nraunak2023dissecting,\ntitle={Dissecting In-Context Learning of Translations in {GPT}-3},\nauthor={Vikas Raunak and Arul Menezes and Hany Hassan Awadalla},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=yE44WcphJY}\n}", "github": "", "project": "", "reviewers": "999s;uUav;gFZk", "site": "https://openreview.net/forum?id=yE44WcphJY", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;5;4", "excitement": "3;4;3", "reproducibility": "4;1;4", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;", "linkedin": "vraunak;arulmenezes;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Microsoft", "aff_unique_dep": "Microsoft Corporation", "aff_unique_url": "https://www.microsoft.com", "aff_unique_abbr": "Microsoft", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "United States" }, { "id": "yF3lSXb82y", "title": "InvGC: Robust Cross-Modal Retrieval by Inverse Graph Convolution", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Over recent decades, significant advancements in cross-modal retrieval is mainly driven by breakthroughs in visual and linguistic modeling. However, a recent study shows that multi-modal data representations tend to cluster within a limited convex cone (as representation degeneration problem), which hinders retrieval performance due to the inseparability of these representations. In our study, we first empirically validate the presence of the representation degeneration problem across multiple cross-modal benchmarks and methods. Next, to address it, we introduce a novel method, called InvGC, a post-processing technique inspired by graph convolution and average pooling. Specifically, InvGC defines the graph topology within the datasets and then applies graph convolution in a subtractive manner. This method effectively separates representations by increasing the distances between data points. To improve the efficiency and effectiveness of InvGC, we propose an advanced graph topology, LocalAdj, which only aims to increase the distances between each data point and its nearest neighbors. To understand why InvGC works, we present a detailed theoretical analysis, proving that the lower bound of recall will be improved after deploying InvGC. Extensive empirical results show that InvGC and InvGC w/LocalAdj significantly mitigate the representation degeneration problem, thereby enhancing retrieval performance.", "keywords": "Cross-Modal Retrieval;Data Degeneration;Graph Convolution", "primary_area": "", "supplementary_material": "", "author": "Xiangru Jian;Yimu Wang", "authorids": "~Xiangru_Jian1;~Yimu_Wang1", "gender": "M;M", "homepage": "https://edward-jianqaq.github.io/;https://yimuwangcs.github.io", "dblp": "326/8022;140/7766", "google_scholar": "kq17trAAAAAJ;TV2vnN8AAAAJ", "or_profile": "~Xiangru_Jian1;~Yimu_Wang1", "aff": "University of Waterloo;University of Waterloo", "aff_domain": "uwaterloo.ca;uwaterloo.ca", "position": "PhD student;PhD student", "bibtex": "@inproceedings{\njian2023invgc,\ntitle={Inv{GC}: Robust Cross-Modal Retrieval by Inverse Graph Convolution},\nauthor={Xiangru Jian and Yimu Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=yF3lSXb82y}\n}", "github": "", "project": "", "reviewers": "LxR3;6sSa;2BeH", "site": "https://openreview.net/forum?id=yF3lSXb82y", "pdf_size": 0, "rating": "3;3;3", "confidence": "5;4;4", "excitement": "4;3;3", "reproducibility": "4;4;0", "correctness": "3;3;3", "rating_avg": 3.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";yimu-wang-854743151/", "aff_unique_index": "0;0", "aff_unique_norm": "University of Waterloo", "aff_unique_dep": "", "aff_unique_url": "https://uwaterloo.ca", "aff_unique_abbr": "UW", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Canada" }, { "id": "yKLUvxMCQ3", "title": "Establishing Trustworthiness: Rethinking Tasks and Model Evaluation", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Language understanding is a multi-faceted cognitive capability, which the Natural Language Processing (NLP) community has striven to model computationally for decades. Traditionally, facets of linguistic intelligence have been compartmentalized into tasks with specialized model architectures and corresponding evaluation protocols. With the advent of large language models (LLMs) the community has witnessed a dramatic shift towards general purpose, task-agnostic approaches powered by generative models. As a consequence, the traditional compartmentalized notion of language tasks is breaking down, followed by an increasing challenge for evaluation and analysis. At the same time, LLMs are being deployed in more real-world scenarios, including previously unforeseen zero-shot setups, increasing the need for trustworthy and reliable systems. Therefore, we argue that it is time to rethink what constitutes tasks and model evaluation in NLP, and pursue a more holistic view on language, placing trustworthiness at the center. Towards this goal, we review existing compartmentalized approaches for understanding the origins of a model's functional capacity, and provide recommendations for more multi-faceted evaluation protocols.", "keywords": "Trustworthiness;Tasks;Evaluation;Skills;Trust", "primary_area": "", "supplementary_material": "", "author": "Robert Litschko;Max M\u00fcller-Eberstein;Rob van der Goot;Leon Weber-Genzel;Barbara Plank", "authorids": "~Robert_Litschko1;~Max_M\u00fcller-Eberstein1;~Rob_van_der_Goot1;~Leon_Weber-Genzel1;~Barbara_Plank2", "gender": ";;M;;M", "homepage": "https://rlitschk.github.io/;https://mxij.me;https://robvanderg.github.io/;https://bplank.github.io/;https://www.leonweber.me", "dblp": "220/3207;301/9477;184/8526;46/521;209/7969", "google_scholar": "https://scholar.google.de/citations?user=LFKL_o8AAAAJ;mI392-4AAAAJ;lU4zpOEAAAAJ;;https://scholar.google.de/citations?user=OKbS2VAAAAAJ", "or_profile": "~Robert_Litschko1;~Max_M\u00fcller-Eberstein1;~Rob_van_der_Goot1;~Barbara_Plank2;~Leon_Weber1", "aff": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Apple;IT University of Copenhagen;IT University of Copenhagen;Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen", "aff_domain": "lmu.de;apple.com;itu.dk;itu.dk;lmu.de", "position": "Postdoc;Intern;Assistant Professor;Full Professor;Postdoc", "bibtex": "@inproceedings{\nlitschko2023establishing,\ntitle={Establishing Trustworthiness: Rethinking Tasks and Model Evaluation},\nauthor={Robert Litschko and Max M{\\\"u}ller-Eberstein and Rob van der Goot and Leon Weber-Genzel and Barbara Plank},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=yKLUvxMCQ3}\n}", "github": "", "project": "", "reviewers": "8KkL;kEVc;H6uV", "site": "https://openreview.net/forum?id=yKLUvxMCQ3", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;2;4", "excitement": "4;3;4", "reproducibility": "0;2;0", "correctness": "4;3;4", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 0.6666666666666666, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0003-4637-983X;0000-0002-0006-0658;;;", "linkedin": "robertlitschko/;;;;", "aff_unique_index": "0;1;2;2;0", "aff_unique_norm": "Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen;Apple;IT University of Copenhagen", "aff_unique_dep": ";Apple Inc.;", "aff_unique_url": "https://www.lmu.de;https://www.apple.com;https://itu.dk", "aff_unique_abbr": "LMU;Apple;ITU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;2;2;0", "aff_country_unique": "Germany;United States;Denmark" }, { "id": "yO4cAfFjlp", "title": "Theory of Mind for Multi-Agent Collaboration via Large Language Models", "track": "main", "status": "Long Main", "tldr": "", "abstract": "While Large Language Models (LLMs) have demonstrated impressive accomplishments in both reasoning and planning, their abilities in multi-agent collaborations remains largely unexplored. This study evaluates LLM-based agents in a multi-agent cooperative text game with Theory of Mind (ToM) inference tasks, comparing their performance with Multi-Agent Reinforcement Learning (MARL) and planning-based baselines. We observed evidence of emergent collaborative behaviors and high-order Theory of Mind capabilities among LLM-based agents. Our results reveal limitations in LLM-based agents' planning optimization due to systematic failures in managing long-horizon contexts and hallucination about the task state. We explore the use of explicit belief state representations to mitigate these issues, finding that it enhances task performance and the accuracy of ToM inferences for LLM-based agents.", "keywords": "Large Language Models;Multi-Agent Reinforcement Learning;Theory of Mind", "primary_area": "", "supplementary_material": "", "author": "Huao Li;Yu Quan Chong;Simon Stepputtis;Joseph Campbell;Dana Hughes;Charles Michael Lewis;Katia P. Sycara", "authorids": "~Huao_Li1;~Yu_Quan_Chong1;~Simon_Stepputtis1;~Joseph_Campbell1;~Dana_Hughes1;~Charles_Michael_Lewis1;~Katia_P._Sycara1", "gender": "M;M;;;M;M;F", "homepage": "https://www.huao-li.com;;https://simonstepputtis.com/;;http://danathughes.com;http://www.pitt.edu/~cmlewis;", "dblp": "30/783;;192/7092;179/2732;;;s/KatiaPSycara", "google_scholar": "7YPYztQAAAAJ;https://scholar.google.com/citations?hl=en;WUQgzsAAAAAJ;1NmM6OUAAAAJ;mv_fbkkAAAAJ;BBS25qkAAAAJ;VWv6a9kAAAAJ", "or_profile": "~Huao_Li1;~Yu_Quan_Chong1;~Simon_Stepputtis1;~Joseph_Campbell1;~Dana_Hughes1;~Charles_Michael_Lewis1;~Katia_P._Sycara1", "aff": "University of Pittsburgh;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;Carnegie Mellon University;University of Pittsburgh;Carnegie Mellon University", "aff_domain": "pitt.edu;cmu.edu;cmu.edu;cmu.edu;cmu.edu;pitt.edu;cmu.edu", "position": "PhD student;MS student;Postdoc;Postdoc;Postdoc;Full Professor;Full Professor", "bibtex": "@inproceedings{\nli2023theory,\ntitle={Theory of Mind for Multi-Agent Collaboration via Large Language Models},\nauthor={Huao Li and Yu Quan Chong and Simon Stepputtis and Joseph Campbell and Dana Hughes and Charles Michael Lewis and Katia P. Sycara},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=yO4cAfFjlp}\n}", "github": "", "project": "", "reviewers": "sosq;tQsZ;Bhvf", "site": "https://openreview.net/forum?id=yO4cAfFjlp", "pdf_size": 0, "rating": "5;5;5", "confidence": "2;3;4", "excitement": "4;4;4", "reproducibility": "4;4;3", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-0027-615X;;0009-0003-0519-3454;;;0000-0002-1013-9482;", "linkedin": ";cyuquan8/;simon-stepputtis/;;;;", "aff_unique_index": "0;1;1;1;1;0;1", "aff_unique_norm": "University of Pittsburgh;Carnegie Mellon University", "aff_unique_dep": ";", "aff_unique_url": "https://www.pitt.edu;https://www.cmu.edu", "aff_unique_abbr": "Pitt;CMU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "yThuxNysaJ", "title": "DelucionQA: Detecting Hallucinations in Domain-specific Question Answering", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Hallucination is a well-known phenomenon in text generated by large language models (LLMs). The existence of hallucinatory responses is found in almost all application scenarios e.g., summarization, question-answering (QA) etc. For applications requiring high reliability (e.g., customer-facing assistants), the potential existence of hallucination in LLM-generated text is a critical problem. The amount of hallucination can be reduced by leveraging information retrieval to provide relevant background information to the LLM. However, LLMs can still generate hallucinatory content for various reasons (e.g., prioritizing its parametric knowledge over the context, failure to capture the relevant information from the context, etc.). Detecting hallucinations through automated methods is thus paramount. To facilitate research in this direction, we introduce a sophisticated dataset, DelucionQA, that captures hallucinations made by retrieval-augmented LLMs for a domain-specific QA task. Furthermore, we propose a set of hallucination detection methods to serve as baselines for future works from the research community. Analysis and case study are also provided to share valuable insights on hallucination phenomena in the target scenario.", "keywords": "hallucination;LLM;large-language-model;natural-language-generation;question-answering", "primary_area": "", "supplementary_material": "", "author": "Mobashir Sadat;Zhengyu Zhou;Lukas Lange;Jun Araki;Arsalan Gundroo;Bingqing Wang;Rakesh R Menon;Md Rizwan Parvez;Zhe Feng", "authorids": "~Mobashir_Sadat1;~Zhengyu_Zhou2;~Lukas_Lange1;~Jun_Araki1;~Arsalan_Gundroo2;~Bingqing_Wang1;~Rakesh_R_Menon3;~Md_Rizwan_Parvez1;~Zhe_Feng2", "gender": "M;F;M;;M;M;;M;M", "homepage": ";;;;;;;https://rizwan09.github.io/;", "dblp": "315/9523;69/7824;219/5288;;;75/2793;;180/3830.html;36/1508-3", "google_scholar": "wej9T2YAAAAJ;KRdnthgAAAAJ;https://scholar.google.co.in/citations?user=yBM4CMcAAAAJ;;;EQjfn0EAAAAJ;;KhC8rtcAAAAJ;zXta31UAAAAJ", "or_profile": "~Mobashir_Sadat1;~Zhengyu_Zhou2;~Lukas_Lange1;~Jun_Araki1;~Arsalan_Gundroo2;~Bingqing_Wang1;~Rakesh_R_Menon3;~Md_Rizwan_Parvez1;~Zhe_Feng2", "aff": "University of Illinois Chicago;Bosch Research and Technology Center;Robert Bosch GmbH, Bosch;;Bosch Research;Bosch Research Center North America;;Bosch;Fudan University", "aff_domain": "uic.edu;bosch.com;de.bosch.com;;us.bosch.com;bosch.com;;bosch.com;fudan.edu.cn", "position": "PhD student;Researcher;Researcher;;Researcher;Researcher;;Researcher;PhD student", "bibtex": "@inproceedings{\nsadat2023delucionqa,\ntitle={Delucion{QA}: Detecting Hallucinations in Domain-specific Question Answering},\nauthor={Mobashir Sadat and Zhengyu Zhou and Lukas Lange and Jun Araki and Arsalan Gundroo and Bingqing Wang and Rakesh R Menon and Md Rizwan Parvez and Zhe Feng},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=yThuxNysaJ}\n}", "github": "", "project": "", "reviewers": "Q4DD;1Jdn;6jrh", "site": "https://openreview.net/forum?id=yThuxNysaJ", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;3;3", "reproducibility": "4;4;3", "correctness": "4;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;;0000-0001-8715-3859;;0000-0002-3708-7803;", "linkedin": "mobashir-sadat-2b32a3112/;zhengyu-zhou-3071801a/;;;arsalangundroo/;;;rizwanparvez/;", "aff_unique_index": "0;1;2;3;4;2;5", "aff_unique_norm": "University of Illinois at Chicago;Bosch Research and Technology Center;Robert Bosch GmbH;Bosch Research;Bosch Research Center;Fudan University", "aff_unique_dep": ";;;;;", "aff_unique_url": "https://www.uic.edu;https://research.bosch.com;https://www.bosch.com;https://research.bosch.com;https://research.bosch.com;https://www.fudan.edu.cn", "aff_unique_abbr": "UIC;;Bosch;Bosch;Bosch RC;Fudan", "aff_campus_unique_index": "0", "aff_campus_unique": "Chicago;", "aff_country_unique_index": "0;0;1;1;2;1;3", "aff_country_unique": "United States;Germany;Unknown;China" }, { "id": "yVoLLzLwdp", "title": "Is ChatGPT a Financial Expert? Evaluating Language Models on Financial Natural Language Processing", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "The emergence of Large Language Models (LLMs), such as ChatGPT, has revolutionized general natural language preprocessing (NLP) tasks. However, their expertise in the financial domain lacks a comprehensive evaluation. To assess the ability of LLMs to solve financial NLP tasks, we present FinLMEval, a framework for Financial Language Model Evaluation, comprising nine datasets designed to evaluate the performance of language models.\nThis study compares the performance of fine-tuned auto-encoding language models (BERT, RoBERTa, FinBERT) and the LLM ChatGPT. Our findings reveal that while ChatGPT demonstrates notable performance across most financial tasks, it generally lags behind the fine-tuned expert models, especially when dealing with proprietary datasets. We hope this study builds foundation evaluation benchmarks for continuing efforts to build more advanced LLMs in the financial domain.", "keywords": "Financial Natural Language Processing;Large Language Models;ChatGPT", "primary_area": "", "supplementary_material": "", "author": "Yue Guo;Zian Xu;Yi Yang", "authorids": "~Yue_Guo4;~Zian_Xu1;~Yi_Yang7", "gender": "F;F;", "homepage": "https://irenehere.github.io/;;http://yya518.github.io/", "dblp": ";;", "google_scholar": "ZhBvjJUAAAAJ;;https://scholar.google.com.hk/citations?user=Prh_dHkAAAAJ", "or_profile": "~Yue_Guo4;~Zian_Xu1;~Yi_Yang7", "aff": "Hong Kong University of Science and Technology;Hong Kong University of Science and Technology;Hong Kong University of Science and Technology", "aff_domain": "ust.hk;ust.hk;ust.hk", "position": "PhD student;Undergrad student;Assistant Professor", "bibtex": "@inproceedings{\nguo2023is,\ntitle={Is Chat{GPT} a Financial Expert? Evaluating Language Models on Financial Natural Language Processing},\nauthor={Yue Guo and Zian Xu and Yi Yang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=yVoLLzLwdp}\n}", "github": "", "project": "", "reviewers": "VqpE;nbqJ;Fsxu", "site": "https://openreview.net/forum?id=yVoLLzLwdp", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;3;3", "reproducibility": "3;4;5", "correctness": "2;4;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 4.0, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-8603-8904;;0000-0001-8863-112X", "linkedin": ";https://linkedin.com/in/zian-xu-8712a021b;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Hong Kong University of Science and Technology", "aff_unique_dep": "", "aff_unique_url": "https://www.ust.hk", "aff_unique_abbr": "HKUST", "aff_campus_unique_index": "0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" }, { "id": "yXVLsdvyg9", "title": "Improving Question Generation with Multi-level Content Planning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "This paper addresses the problem of generating questions from a given context and an answer, specifically focusing on questions that require multi-hop reasoning across an extended context. Previous studies have suggested that key phrase selection is essential for question generation (QG), yet it is still challenging to connect such disjointed phrases into meaningful questions, particularly for long context. To mitigate this issue, we propose MultiFactor, a novel QG framework based on multi-level content planning. Specifically, MultiFactor includes two components: FA-Model, which simultaneously selects key phrases and generates full answers, and Q-Model which takes the generated full answer as an additional input to generate questions. Here, full answer generation is introduced to connect the short answer with the selected key phrases, thus forming an answer-aware summary to facilitate QG. Both FA-Model and Q-Model are formalized as simple-yet-effective Phrase-Enhanced Transformers, our joint model for phrase selection and text generation. Experimental results show that our method outperforms strong baselines on two popular QG datasets. Our code is available at https://github.com/zeaver/MultiFactor.", "keywords": "questin generation;multi-level content planning", "primary_area": "", "supplementary_material": "", "author": "Zehua Xia;Qi Gou;Bowen Yu;Haiyang Yu;Fei Huang;Yongbin Li;Nguyen Cam-Tu", "authorids": "~Zehua_Xia1;~Qi_Gou1;~Bowen_Yu3;~Haiyang_Yu3;~Fei_Huang2;~Yongbin_Li2;~Nguyen_Cam-Tu2", "gender": "M;M;M;M;M;M;F", "homepage": "https://dbm1.github.io/;https://gouqi666.github.io/;https://yubowen-ph.github.io/;;https://sites.google.com/view/fei-huang;https://yongbin-li.github.io/;https://ai.nju.edu.cn/main.htm", "dblp": ";;95/10266-2.html;90/6643-3;h/FeiHuang.html;;14/5079.html", "google_scholar": ";;oHoEp34AAAAJ;VhWV-1wAAAAJ;9r98PpoAAAAJ;xF5VrokAAAAJ;https://scholar.google.com/citations?hl=en", "or_profile": "~Zehua_Xia1;~Qi_Gou1;~Bowen_Yu3;~Haiyang_Yu3;~Fei_Huang2;~Yongbin_Li2;~Nguyen_Cam-Tu2", "aff": "Nanjing University;Nanjing University;Alibaba Group;Alibaba Group;Alibaba Group US;Alibaba Group;Nanjing University", "aff_domain": "nju.edu.cn;nju.edu.cn;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;alibaba-inc.com;nju.edu.cn", "position": "MS student;MS student;Researcher;Researcher;Senior Research Director;Researcher;Associate Professor", "bibtex": "@inproceedings{\nxia2023improving,\ntitle={Improving Question Generation with Multi-level Content Planning},\nauthor={Zehua Xia and Qi Gou and Bowen Yu and Haiyang Yu and Fei Huang and Yongbin Li and Nguyen Cam-Tu},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=yXVLsdvyg9}\n}", "github": "", "project": "", "reviewers": "SaFE;uHqT;kFf7;MLhS", "site": "https://openreview.net/forum?id=yXVLsdvyg9", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;4;5;4", "excitement": "3;4;3;3", "reproducibility": "4;4;4;5", "correctness": "2;3;3;3", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.25, "reproducibility_avg": 4.25, "correctness_avg": 2.75, "replies_avg": 13, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-6804-1859;;;;", "linkedin": ";;;;fei-huang-cas-cmu;;", "aff_unique_index": "0;0;1;1;1;1;0", "aff_unique_norm": "Nanjing University;Alibaba Group", "aff_unique_dep": ";", "aff_unique_url": "https://www.nju.edu.cn;https://www.alibaba.com", "aff_unique_abbr": "Nanjing U;Alibaba", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;1;0;0", "aff_country_unique": "China;United States" }, { "id": "yXYJPAlLqn", "title": "Sparse Universal Transformer", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The Universal Transformer (UT) is a variant of the Transformer that shares parameters across its layers and is Turing-complete under certain assumptions. \nEmpirical evidence also shows that UTs have better compositional generalization than Vanilla Transformers (VTs) in formal language tasks.\nThe parameter-sharing also affords it better parameter efficiency than VTs.\nDespite its many advantages, most state-of-the-art NLP systems use VTs as their backbone model instead of UTs. \nThis is mainly because scaling UT parameters is more compute and memory intensive than scaling up a VT.\nThis paper proposes the Sparse Universal Transformer (SUT), which leverages Sparse Mixture of Experts (SMoE) to reduce UT's computation complexity while retaining its parameter efficiency and generalization ability. \nExperiments show that SUT combines the best of both worlds, achieving strong generalization results on formal language tasks (Logical inference and CFQ) and impressive parameter and computation efficiency on standard natural language benchmarks like WMT'14.", "keywords": "efficient UT;transformers;sparse moe;conditional computation;NLP;wmt14;cfq;compositional generalization;natural language processing;sparse", "primary_area": "", "supplementary_material": "", "author": "Shawn Tan;Yikang Shen;Zhenfang Chen;Aaron Courville;Chuang Gan", "authorids": "~Shawn_Tan1;~Yikang_Shen1;~Zhenfang_Chen1;~Aaron_Courville3;~Chuang_Gan1", "gender": "M;M;M;;M", "homepage": "https://blog.wtf.sg;;https://zfchenunique.github.io;;http://people.csail.mit.edu/ganchuang/", "dblp": ";152/8226;207/5321;56/1688;139/6993", "google_scholar": "57Nf7EYAAAAJ;qff5rRYAAAAJ;QSRdIzAAAAAJ;https://scholar.google.ca/citations?user=km6CP8cAAAAJ;PTeSCbIAAAAJ", "or_profile": "~Shawn_Tan1;~Yikang_Shen1;~Zhenfang_Chen1;~Aaron_Courville3;~Chuang_Gan1", "aff": "Universit\u00e9 de Montr\u00e9al;International Business Machines;MIT-IBM Watson AI lab;Universit\u00e9 de Montr\u00e9al;MIT-IBM Watson AI Lab", "aff_domain": "umontreal.ca;ibm.com;ibm.com; ;ibm.com", "position": "PhD student;Researcher;Researcher;Assistant Professor;PhD student", "bibtex": "@inproceedings{\ntan2023sparse,\ntitle={Sparse Universal Transformer},\nauthor={Shawn Tan and Yikang Shen and Zhenfang Chen and Aaron Courville and Chuang Gan},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=yXYJPAlLqn}\n}", "github": "", "project": "", "reviewers": "PDdV;EnEq;T1no", "site": "https://openreview.net/forum?id=yXYJPAlLqn", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;4", "excitement": "3;4;4", "reproducibility": "4;4;4", "correctness": "4;5;3", "rating_avg": 5.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "tanshawn/;;\u632f\u65b9-\u9648-512011bb/;;", "aff_unique_index": "0;1;2;0;2", "aff_unique_norm": "Universit\u00e9 de Montr\u00e9al;International Business Machines Corporation;Massachusetts Institute of Technology", "aff_unique_dep": ";;IBM Watson AI lab", "aff_unique_url": "https://www.umontreal.ca;https://www.ibm.com;https://www.mitibmwatsonailab.org", "aff_unique_abbr": "UdeM;IBM;MIT-IBM AI Lab", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;1;0;1", "aff_country_unique": "Canada;United States" }, { "id": "ybc9V6Cbq2", "title": "Better Quality Pre-training Data and T5 Models for African Languages", "track": "main", "status": "Short Main", "tldr": "", "abstract": "In this study, we highlight the importance of enhancing the quality of pretraining data in multilingual language models. \nExisting web crawls have demonstrated quality issues, particularly in the context of low-resource languages.\nConsequently, we introduce a new multilingual pretraining corpus for $16$ African languages, designed by carefully auditing existing pretraining corpora to understand and rectify prevalent quality issues. To compile this dataset, we undertake a rigorous examination of current data sources for thirteen languages within one of the most extensive multilingual web crawls, mC4, and extract cleaner data through meticulous auditing and improved web crawling strategies. Subsequently, we pretrain a new T5-based model on this dataset and evaluate its performance on multiple downstream tasks. \nOur model demonstrates better downstream effectiveness over existing pretrained models across four NLP tasks, underscoring the critical role data quality plays in pretraining language models in low-resource scenarios. Specifically, on cross-lingual QA evaluation, our new model is more than twice as effective as multilingual T5. All code, data and models are publicly available at https://github.com/castorini/AfriTeVa-keji.", "keywords": "multilingual;low-resource languages;african languages", "primary_area": "", "supplementary_material": "", "author": "Akintunde Oladipo;Mofetoluwa Adeyemi;Orevaoghene Ahia;Abraham Toluwase Owodunni;Odunayo Ogundepo;David Ifeoluwa Adelani;Jimmy Lin", "authorids": "~Akintunde_Oladipo1;~Mofetoluwa_Adeyemi1;~Orevaoghene_Ahia1;~Abraham_Toluwase_Owodunni1;~Odunayo_Ogundepo1;~David_Ifeoluwa_Adelani1;~Jimmy_Lin2", "gender": "M;F;;M;M;M;", "homepage": "https://theyorubayesian.github.io/;;;;;https://dadelani.github.io/;https://cs.uwaterloo.ca/~jimmylin/", "dblp": "341/4148;276/0211;;;;230/6973;00/7739", "google_scholar": "QXGZ_yQAAAAJ;nqj3mJYAAAAJ;;yW-2hooAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.ca/citations?user=W9sTkS0AAAAJ;", "or_profile": "~Akintunde_Oladipo1;~Mofetoluwa_Adeyemi1;~Orevaoghene_Ahia1;~Abraham_Toluwase_Owodunni1;~Odunayo_Ogundepo1;~David_Ifeoluwa_Adelani1;~Jimmy_Lin2", "aff": "University of Waterloo;University of Waterloo;;;University of Waterloo;University College London, University of London;University of Waterloo", "aff_domain": "uwaterloo.ca;uwaterloo.ca;;;cs.uwaterloo.ca;ucl.ac.uk;waterloo.ca", "position": "MS student;MS student;;;MS student;Postdoc;Full Professor", "bibtex": "@inproceedings{\noladipo2023better,\ntitle={Better Quality Pre-training Data and T5 Models for African Languages},\nauthor={Akintunde Oladipo and Mofetoluwa Adeyemi and Orevaoghene Ahia and Abraham Toluwase Owodunni and Odunayo Ogundepo and David Ifeoluwa Adelani and Jimmy Lin},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ybc9V6Cbq2}\n}", "github": "", "project": "", "reviewers": "ckvB;A7Gh;Rqmg", "site": "https://openreview.net/forum?id=ybc9V6Cbq2", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;4", "excitement": "3;3;5", "reproducibility": "2;2;4", "correctness": "3;2;4", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 2.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0000-2630-8167;;;0000-0002-2561-256X;;0000-0002-0193-2083;", "linkedin": "olasakins/;;;abraham-owodunni;ogundepo-odunayo-b69191111/;david-adelani-7557b337/;", "aff_unique_index": "0;0;0;1;0", "aff_unique_norm": "University of Waterloo;University College London", "aff_unique_dep": ";", "aff_unique_url": "https://uwaterloo.ca;https://www.ucl.ac.uk", "aff_unique_abbr": "UW;UCL", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;1;0", "aff_country_unique": "Canada;United Kingdom" }, { "id": "yjqgHcTLnP", "title": "ARKitSceneRefer: Text-based Localization of Small Objects in Diverse Real-World 3D Indoor Scenes", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "3D referring expression comprehension is a task to ground text representations onto objects in 3D scenes. It is a crucial task for indoor household robots or augmented reality devices to localize objects referred to in user instructions. However, existing indoor 3D referring expression comprehension datasets typically cover larger object classes that are easy to localize, such as chairs, tables, or doors, and often overlook small objects, such as cooking tools or office supplies. Based on the recently proposed diverse and high-resolution 3D scene dataset of ARKitScenes, we construct the ARKitSceneRefer dataset focusing on small daily-use objects that frequently appear in real-world indoor scenes.\nARKitSceneRefer contains 15k objects of 1,605 indoor scenes, which are significantly larger than those of the existing 3D referring datasets, and covers diverse object classes of 583 from the LVIS dataset.\nIn empirical experiments with both 2D and 3D state-of-the-art referring expression comprehension models, we observed the task difficulty of the localization in the diverse small object classes.", "keywords": "3D;Dataset;Visual Grounding;Referring Expression Comprehension", "primary_area": "", "supplementary_material": "", "author": "Shunya Kato;Shuhei Kurita;Chenhui Chu;Sadao Kurohashi", "authorids": "~Shunya_Kato1;~Shuhei_Kurita1;~Chenhui_Chu1;~Sadao_Kurohashi1", "gender": "M;;M;M", "homepage": ";;http://researchmap.jp/chu/?lang=en;https://nlp.ist.i.kyoto-u.ac.jp/member/kuro/index.html", "dblp": ";;126/8755;42/2149", "google_scholar": "https://scholar.google.co.jp/citations?user=Jrsg3jMAAAAJ;;https://scholar.google.co.jp/citations?user=6ef0qbgAAAAJ;https://scholar.google.co.jp/citations?user=gpKS5P0AAAAJ", "or_profile": "~Shunya_Kato1;~Shuhei_Kurita1;~Chenhui_Chu1;~Sadao_Kurohashi1", "aff": "Kyoto University;;Kyoto University;Kyoto University", "aff_domain": "kyoto-u.ac.jp;;kyoto-u.ac.jp;kyoto-u.ac.jp", "position": "MS student;;Associate Professor;Full Professor", "bibtex": "@inproceedings{\nkato2023arkitscenerefer,\ntitle={{ARK}itSceneRefer: Text-based Localization of Small Objects in Diverse Real-World 3D Indoor Scenes},\nauthor={Shunya Kato and Shuhei Kurita and Chenhui Chu and Sadao Kurohashi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=yjqgHcTLnP}\n}", "github": "", "project": "", "reviewers": "2zzf;E4hY;R12s", "site": "https://openreview.net/forum?id=yjqgHcTLnP", "pdf_size": 0, "rating": "4;4;4", "confidence": "2;5;4", "excitement": "3;3;3", "reproducibility": "4;4;3", "correctness": "3;3;3", "rating_avg": 4.0, "confidence_avg": 3.6666666666666665, "excitement_avg": 3.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0001-9848-6384;0000-0001-5398-8399", "linkedin": "shunya-kato-ba674b225/;;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Kyoto University", "aff_unique_dep": "", "aff_unique_url": "https://www.kyoto-u.ac.jp", "aff_unique_abbr": "Kyoto U", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Japan" }, { "id": "ytQFU2XsBR", "title": "Automatic Model Selection with Large Language Models for Reasoning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Chain-of-Thought (CoT) and Program-Aided Language Models (PAL) represent two distinct reasoning methods, each with its own strengths. CoT employs natural language, offering flexibility and interpretability, while PAL utilizes programming language, yielding more structured and rigorous logic. We introduce a model selection method to combine the best of both worlds by employing a large language model (LLM) to dynamically select between them. Our theoretical analysis underscores the feasibility of this method, which is further corroborated by empirical results. Our proposed method demonstrates significant performance improvements across eight reasoning datasets with Codex, ChatGPT, and GPT-4. Additionally, our method is complementary to self-consistency; when integrated, it can further enhance performance while significantly reducing computation costs. Moreover, we achieve new state-of-the-art results on GSM8K and SVAMP, with respective accuracies of 96.8% and 93.7%.", "keywords": "Large Language Models;In-Context Learning;Reasoning", "primary_area": "", "supplementary_material": "", "author": "James Xu Zhao;Yuxi Xie;Kenji Kawaguchi;Junxian He;Michael Qizhe Xie", "authorids": "~James_Xu_Zhao1;~Yuxi_Xie1;~Kenji_Kawaguchi1;~Junxian_He1;~Michael_Qizhe_Xie1", "gender": ";F;;M;", "homepage": ";https://yuxixie.github.io/;https://ml.comp.nus.edu.sg/#members;https://jxhe.github.io;", "dblp": ";;;188/6127.html;", "google_scholar": ";LNLECx0AAAAJ;aLl3rYoAAAAJ;BIFGeoUAAAAJ;", "or_profile": "~James_Xu_Zhao1;~Yuxi_Xie1;~Kenji_Kawaguchi1;~Junxian_He1;~Michael_Qizhe_Xie1", "aff": ";National University of Singapore;National University of Singapore;Hong Kong University of Science and Technology;", "aff_domain": ";u.nus.edu;nus.edu;ust.hk;", "position": ";PhD student;Presidential Young Professor;Assistant Professor;", "bibtex": "@inproceedings{\nzhao2023automatic,\ntitle={Automatic Model Selection with Large Language Models for Reasoning},\nauthor={James Xu Zhao and Yuxi Xie and Kenji Kawaguchi and Junxian He and Michael Qizhe Xie},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=ytQFU2XsBR}\n}", "github": "", "project": "", "reviewers": "eeSr;wWYq;C5gG;iYd8", "site": "https://openreview.net/forum?id=ytQFU2XsBR", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "3;4;4;3", "excitement": "4;3;3;4", "reproducibility": "3;3;3;4", "correctness": "4;2;2;3", "rating_avg": 3.0, "confidence_avg": 3.5, "excitement_avg": 3.5, "reproducibility_avg": 3.25, "correctness_avg": 2.75, "replies_avg": 14, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": ";yuxi-xie-494265181;;;", "aff_unique_index": "0;0;1", "aff_unique_norm": "National University of Singapore;Hong Kong University of Science and Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.nus.edu.sg;https://www.ust.hk", "aff_unique_abbr": "NUS;HKUST", "aff_campus_unique_index": "1", "aff_campus_unique": ";Hong Kong SAR", "aff_country_unique_index": "0;0;1", "aff_country_unique": "Singapore;China" }, { "id": "z1RYLqEpuP", "title": "Evaluating and Modeling Attribution for Cross-Lingual Question Answering", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Trustworthy answer content is abundant in many high-resource languages and is instantly accessible through question answering systems \u2014 yet this content can be hard to access for those that do not speak these languages. The leap forward in cross-lingual modeling quality offered by generative language models offers much promise, yet their raw generations often fall short in factuality. To improve trustworthiness in these systems, a promising direction is to attribute the answer to a retrieved source, possibly in a content-rich language different from the query. Our work is the first to study attribution for cross-lingual question answering. First, we collect data in 5 languages to assess the attribution level of a state-of-the-art cross-lingual QA system. To our surprise, we find that a substantial portion of the answers is not attributable to any retrieved passages (up to 50% of answers exactly matching a gold reference) despite the system being able to attend directly to the retrieved text. Second, to address this poor attribution level, we experiment with a wide range of attribution detection techniques. We find that Natural Language Inference models and PaLM 2 fine-tuned on a very small amount of attribution data can accurately detect attribution. With these models, we improve the attribution level of a cross-lingual QA system. Overall, we show that current academic generative cross-lingual QA systems have substantial shortcomings in attribution and we build tooling to mitigate these issues.", "keywords": "Attribution;Cross-Lingual Question Answering;Multilingual Modeling;Open-Retrieval Question Answering;Attribution Detection", "primary_area": "", "supplementary_material": "", "author": "Benjamin Muller;John Frederick Wieting;Jonathan H. Clark;Tom Kwiatkowski;Sebastian Ruder;Livio Baldini Soares;Roee Aharoni;Jonathan Herzig;Xinyi Wang", "authorids": "~Benjamin_Muller1;~John_Frederick_Wieting1;~Jonathan_H._Clark1;~Tom_Kwiatkowski1;~Sebastian_Ruder2;~Livio_Baldini_Soares2;~Roee_Aharoni1;~Jonathan_Herzig2;~Xinyi_Wang1", "gender": "M;M;M;M;M;M;F;M;M", "homepage": "https://scholar.google.com/citations?user=Ecl07CkAAAAJ&hl=en;;;https://research.google.com/pubs/105075.html;http://www.roeeaharoni.com;https://jonathanherzig.github.io/;;https://liviosoares.github.io/;http://sebastianruder.com/", "dblp": ";156/0158;02/786;33/9012;148/9506;133/3687.html;;178/3562;186/7066", "google_scholar": ";;WfWxwlIAAAAJ;https://scholar.google.no/citations?user=MpZ6dTEAAAAJ;https://scholar.google.co.il/citations?user=wV0mHWgAAAAJ;https://scholar.google.co.il/citations?view_op=list_works;https://scholar.google.com/citations?view_op=list_works;C3s1jqIAAAAJ;https://scholar.google.de/citations?user=8ONXPV8AAAAJ", "or_profile": "~Benjamin_Muller1;~John_Frederick_Wieting1;~Jonathan_H._Clark1;~Tom_Kwiatkowski1;~Roee_Aharoni1;~Jonathan_Herzig2;~Xinyi_Wang1;~Livio_Baldini_Soares1;~Sebastian_Ruder1", "aff": "Meta;Google DeepMind;Google DeepMind;;Google;Research, Google;Google;Google Deepmind;Google", "aff_domain": "meta.com;google.com;google.com;;google.com;research.google.com;google.com;google.com;google.com", "position": "Postdoc;Researcher;Researcher;;Researcher;Researcher;Researcher;Software Engineer;Research scientist", "bibtex": "@inproceedings{\nmuller2023evaluating,\ntitle={Evaluating and Modeling Attribution for Cross-Lingual Question Answering},\nauthor={Benjamin Muller and John Frederick Wieting and Jonathan H. Clark and Tom Kwiatkowski and Sebastian Ruder and Livio Baldini Soares and Roee Aharoni and Jonathan Herzig and Xinyi Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=z1RYLqEpuP}\n}", "github": "", "project": "", "reviewers": "zM5q;TzMb;PwG7", "site": "https://openreview.net/forum?id=z1RYLqEpuP", "pdf_size": 0, "rating": "5;5;5", "confidence": "2;4;4", "excitement": "4;3;4", "reproducibility": "3;4;2", "correctness": "4;4;3", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0009-0008-2036-2777;;;;;;", "linkedin": ";;;;roeeaharoni;;;;sebastianruder", "aff_unique_index": "0;1;1;1;1;1;2;1", "aff_unique_norm": "Meta;Google;DeepMind", "aff_unique_dep": "Meta Platforms, Inc.;Google DeepMind;DeepMind", "aff_unique_url": "https://meta.com;https://deepmind.com;https://deepmind.com", "aff_unique_abbr": "Meta;DeepMind;DeepMind", "aff_campus_unique_index": "1;1;1;1", "aff_campus_unique": ";Mountain View", "aff_country_unique_index": "0;1;1;0;0;0;1;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "z2JVmJ6Tlq", "title": "Self-supervised Post-processing Method to Enrich Pretrained Word Vectors", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "Retrofitting techniques, which inject external resources into word representations, have compensated for the weakness of distributed representations in semantic and relational knowledge between words. However, the previous methods require additional external resources and strongly depend on the lexicon. To address the issues, we propose a simple extension of extrofitting, self-supervised extrofitting: extrofitting by its own word vector distribution. Our methods improve the vanilla embeddings on all of word similarity tasks without any external resources. Moreover, the method is also effective in various languages, which implies that our method will be useful in lexicon-scarce languages. As downstream tasks, we show its benefits in dialogue state tracking and text classification tasks, reporting better and generalized results compared to other word vector specialization methods.", "keywords": "Retrofitting;Word Embedding;Word Semantics", "primary_area": "", "supplementary_material": "", "author": "Hwiyeol Jo", "authorids": "~Hwiyeol_Jo1", "gender": "", "homepage": "", "dblp": "", "google_scholar": "", "or_profile": "", "aff": "", "aff_domain": "", "position": "", "bibtex": "@inproceedings{\njo2023selfsupervised,\ntitle={Self-supervised Post-processing Method to Enrich Pretrained Word Vectors},\nauthor={Hwiyeol Jo},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=z2JVmJ6Tlq}\n}", "github": "", "project": "", "reviewers": "dUyB;fe3o;FCUs", "site": "https://openreview.net/forum?id=z2JVmJ6Tlq", "pdf_size": 0, "rating": "3;3;3", "confidence": "2;4;4", "excitement": "4;4;3", "reproducibility": "2;5;4", "correctness": "3;4;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.3333333333333335, "replies_avg": 11, "authors#_avg": 1, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0 }, { "id": "z69tlSxAwf", "title": "Novel Slot Detection With an Incremental Setting", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Current dialogue systems face diverse user requests and rapid change domains, making quickly adapt to scenarios with previous unseen slot types become a major challenge.\nRecently, researchers have introduced novel slot detection (NSD) to discover potential new types. However, dialogue system with NSD does not bring practical improvements due to the system still cannot handle novel slots in subsequent interactions.\nIn this paper, we define incremental novel slot detection (INSD), which separates the dialogue system to deal with novel types as two major phrases: 1) model discovers unknown slots, 2) training model to possess the capability to handle new classes.\nWe provide an effective model to extract novel slots with set prediction strategy and propose a query-enhanced approach to overcome catastrophic forgetting during the process of INSD.\nWe construct two INSD datasets to evaluate our method and experimental results show that our approach exhibits superior performance.", "keywords": "dialog system; novel slot detection; incremental learning", "primary_area": "", "supplementary_material": "", "author": "Chen Liang;Hongliang Li;Changhao Guan;Qingbin Liu;Jian Liu;Jinan Xu;Zhe Zhao", "authorids": "~Chen_Liang15;~Hongliang_Li5;~Changhao_Guan1;~Qingbin_Liu1;~Jian_Liu7;~Jinan_Xu1;~Zhe_Zhao1", "gender": "M;M;M;M;M;M;M", "homepage": ";https://github.com/HLiang-Lee;https://github.com/jingtian11/guanchanghao.github.io/blob/master/_pages/about.md;https://scholar.google.com.hk/citations?user=FGxyOtYAAAAJ&hl=zh-CN;http://jianliu-ml.github.io;;http://faculty.bjtu.edu.cn/8300/", "dblp": ";;;137/6023.html;;28/6429-6.html;67/3124", "google_scholar": "https://scholar.google.com.hk/citations?hl=zh-CN;p_l_X-EAAAAJ;;https://scholar.google.com.hk/citations?user=FGxyOtYAAAAJ;https://scholar.google.de/citations?hl=en;https://scholar.google.com.hk/citations?hl=zh-CN;wMuW0W4AAAAJ", "or_profile": "~Chen_Liang15;~Hongliang_Li5;~Changhao_Guan1;~Qingbin_Liu1;~Jian_Liu7;~Zhe_Zhao1;~Xu_Jinan1", "aff": "Beijing Jiaotong University;Beijing Jiaotong University;Beijing Jiaotong University;Tencent;Beijing Jiaotong University;Tencent AI Lab;Beijing Jiaotong University", "aff_domain": "bjtu.edu.cn;bjtu.edu.cn;bjtu.edu.cn;tencent.com;bjtu.edu.cn;tencent.com;bjtu.edu.cn", "position": "MS student;Undergrad student;MS student;Researcher;Lecturer;Researcher;Full Professor", "bibtex": "@inproceedings{\nliang2023novel,\ntitle={Novel Slot Detection With an Incremental Setting},\nauthor={Chen Liang and Hongliang Li and Changhao Guan and Qingbin Liu and Jian Liu and Jinan Xu and Zhe Zhao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=z69tlSxAwf}\n}", "github": "", "project": "", "reviewers": "Mmoy;Eu35;J6xJ;XgD7", "site": "https://openreview.net/forum?id=z69tlSxAwf", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "4;4;4;3", "excitement": "3;3;4;3", "reproducibility": "4;3;4;3", "correctness": "3;3;4;2", "rating_avg": 3.0, "confidence_avg": 3.75, "excitement_avg": 3.25, "reproducibility_avg": 3.5, "correctness_avg": 3.0, "replies_avg": 14, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0009-0007-7950-5712;;;;;", "linkedin": ";;;;;;jinan-xu-3544b137/", "aff_unique_index": "0;0;0;1;0;1;0", "aff_unique_norm": "Beijing Jiao Tong University;Tencent", "aff_unique_dep": ";Tencent Holdings Limited", "aff_unique_url": "http://www.njtu.edu.cn/en;https://www.tencent.com", "aff_unique_abbr": "BJTU;Tencent", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "z8gM4ZfK8l", "title": "Improving Cross-lingual Transfer through Subtree-aware Word Reordering", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Despite the impressive growth of the abilities of multilingual language models, such as XLM-R and mT5, it has been shown that they still face difficulties when tackling typologically-distant languages, particularly in the low-resource setting. One obstacle for effective cross-lingual transfer is variability in word-order patterns. It can be potentially mitigated via source- or target-side word reordering, and numerous approaches to reordering have been proposed. \nHowever, they rely on language-specific rules, work on the level of POS tags, or only target the main clause, leaving subordinate clauses intact.\nTo address these limitations, we present a new powerful reordering method, defined in terms of Universal Dependencies, that is able to learn fine-grained word-order patterns conditioned on the syntactic context from a small amount of annotated data and can be applied at all levels of the syntactic tree.\nWe conduct experiments on a diverse set of tasks and show that our method consistently outperforms strong baselines over different language pairs and model architectures. This performance advantage holds true in both zero-shot and few-shot scenarios.", "keywords": "multilingual;cross lingual;cross lingual transfer;reordering;syntax", "primary_area": "", "supplementary_material": "", "author": "Ofir Arviv;Dmitry Nikolaev;Taelin Karidi;Omri Abend", "authorids": "~Ofir_Arviv1;~Dmitry_Nikolaev1;~Taelin_Karidi1;~Omri_Abend1", "gender": "M;M;F;M", "homepage": ";https://dnikolaev.com;;http://www.cs.huji.ac.il/~oabend/", "dblp": ";264/5979;237/9894.html;30/8159", "google_scholar": "vMC7k0MAAAAJ;Myl8EpkAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com.tw/citations?user=BD_hRzYAAAAJ", "or_profile": "~Ofir_Arviv1;~Dmitry_Nikolaev1;~Taelin_Karidi1;~Omri_Abend1", "aff": "International Business Machines;University of Stuttgart, Universit\u00e4t Stuttgart;Hebrew University of Jerusalem;Hebrew University of Jerusalem", "aff_domain": "ibm.com;ims.uni-stuttgart.de;huji.ac.il;huji.ac.il", "position": "Researcher;Postdoc;PhD student;Associate Professor", "bibtex": "@inproceedings{\narviv2023improving,\ntitle={Improving Cross-lingual Transfer through Subtree-aware Word Reordering},\nauthor={Ofir Arviv and Dmitry Nikolaev and Taelin Karidi and Omri Abend},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=z8gM4ZfK8l}\n}", "github": "", "project": "", "reviewers": "gucq;YA2U;W6Rf", "site": "https://openreview.net/forum?id=z8gM4ZfK8l", "pdf_size": 0, "rating": "2;2;2", "confidence": "4;2;4", "excitement": "3;4;4", "reproducibility": "4;5;4", "correctness": "3;4;4", "rating_avg": 2.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-3034-9794;;", "linkedin": "ofir-arviv-0523a8b9/;dmitry-nikolaev-9421405a/;;", "aff_unique_index": "0;1;2;2", "aff_unique_norm": "International Business Machines Corporation;University of Stuttgart;Hebrew University of Jerusalem", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ibm.com;https://www.uni-stuttgart.de;https://www.huji.ac.il", "aff_unique_abbr": "IBM;Uni Stuttgart;HUJI", "aff_campus_unique_index": "1;1", "aff_campus_unique": ";Jerusalem", "aff_country_unique_index": "0;1;2;2", "aff_country_unique": "United States;Germany;Israel" }, { "id": "z9CqYTwOiO", "title": "Solving the Right Problem is Key for Translational NLP: A Case Study in UMLS Vocabulary Insertion", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "As the immense opportunities enabled by large language models become more apparent, NLP systems will be increasingly expected to excel in real-world settings. \nHowever, in many instances, powerful models alone will not yield translational NLP solutions, especially if the formulated problem is not well aligned with the real-world task.\nIn this work, we study the case of UMLS vocabulary insertion, an important real-world task in which hundreds of thousands of new terms, referred to as atoms, are added to the UMLS, one of the most comprehensive open-source biomedical knowledge bases. \nPrevious work aimed to develop an automated NLP system to make this time-consuming, costly, and error-prone task more efficient. \nNevertheless, practical progress in this direction has been difficult to achieve due to a problem formulation and evaluation gap between research output and the real-world task. \nIn order to address this gap, we introduce a new formulation for UMLS vocabulary insertion which mirrors the real-world task, datasets which faithfully represent it and several strong baselines we developed through re-purposing existing solutions.\nAdditionally, we propose an effective rule-enhanced biomedical language model which enables important new model behavior, outperforms all strong baselines and provides measurable qualitative improvements to editors who carry out the UVI task.\nWe hope this case study provides insight into the considerable importance of problem formulation for the success of translational NLP solutions.", "keywords": "biomedical NLP;translational NLP;synonymy prediction;knowledge base construction", "primary_area": "", "supplementary_material": "", "author": "Bernal Jimenez Gutierrez;Yuqing Mao;Vinh Nguyen;KIN WAH FUNG;Yu Su;Olivier Bodenreider", "authorids": "~Bernal_Jimenez_Gutierrez1;~Yuqing_Mao1;~Vinh_Nguyen2;~KIN_WAH_FUNG1;~Yu_Su2;~Olivier_Bodenreider2", "gender": ";;;M;M;M", "homepage": ";;;http://ysu1989.github.io;https://mor.nlm.nih.gov/;https://bernaljg.github.io/", "dblp": ";64/6223;;38/1070-1;29/3051.html;264/4620", "google_scholar": "dXiyl14AAAAJ;GU60-gYAAAAJ;;rIh5OqoAAAAJ;https://scholar.google.com/citations?hl=en;https://scholar.google.com/citations?hl=en", "or_profile": "~Yuqing_Mao1;~Vinh_Nguyen2;~KIN_WAH_FUNG1;~Yu_Su2;~Olivier_Bodenreider2;~Bernal_Jimenez1", "aff": ";U.S. National Library of Medicine;National Institutes of Health;Microsoft;National Library of Medicine;The Ohio State University", "aff_domain": ";nlm.nih.gov;nih.gov;microsoft.com;nlm.nih.gov;osu.edu", "position": ";Postdoc;Researcher;Senior Researcher;Senior Scientist;PhD student", "bibtex": "@inproceedings{\ngutierrez2023solving,\ntitle={Solving the Right Problem is Key for Translational {NLP}: A Case Study in {UMLS} Vocabulary Insertion},\nauthor={Bernal Jimenez Gutierrez and Yuqing Mao and Vinh Nguyen and KIN WAH FUNG and Yu Su and Olivier Bodenreider},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=z9CqYTwOiO}\n}", "github": "", "project": "", "reviewers": "F1a9;SKdh;wXDg", "site": "https://openreview.net/forum?id=z9CqYTwOiO", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;5;4", "excitement": "3;3;4", "reproducibility": "3;4;4", "correctness": "4;3;4", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-0593-5377;;0000-0003-4769-4217;", "linkedin": ";vinhtknguyen/;;;;bernal-jimenez/", "aff_unique_index": "0;1;2;0;3", "aff_unique_norm": "National Library of Medicine;National Institutes of Health;Microsoft;Ohio State University", "aff_unique_dep": ";;Microsoft Corporation;", "aff_unique_url": "https://www.nlm.nih.gov;https://www.nih.gov;https://www.microsoft.com;https://www.osu.edu", "aff_unique_abbr": "NLM;NIH;Microsoft;OSU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "z9l6nHpTyT", "title": "Adapter-TST: A Parameter Efficient Method for Multiple-Attribute Text Style Transfer", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Adapting a large language model for multiple-attribute text style transfer via fine-tuning can be challenging due to the substantial amount of computational resources and labeled data required for the specific downstream task. In this paper, we address this challenge by introducing \\textsf{Adapter-TST}, a framework that freezes the pre-trained model's original parameters and enables the development of a multiple-attribute text style transfer model. Using BART as the backbone model, \\textsf{Adapter-TST} utilizes different neural adapters to model different types of attribute information, similar to a plug-in connected to BART. Our method allows control over multiple attributes (e.g. sentiment, tense, active or passive voice) and configures the adapters' architecture to generate multiple outputs in respect to attributes or compositional editing on the same sentence. We evaluate the proposed model on both traditional sentiment transfer and multiple-attribute transfer tasks. The experiment results demonstrate that \\textsf{Adapter-TST} outperforms all the state-of-the-art baselines with significantly less computational resources. We have also empirically shown that each adapter is able to characterize specific stylistic attributes effectively and can be configured to perform compositional editing.", "keywords": "Text style transfer;Parameter-efficient;Adapter", "primary_area": "", "supplementary_material": "", "author": "Zhiqiang Hu;Nancy F. Chen;Roy Ka-Wei Lee", "authorids": "~Zhiqiang_Hu3;~Nancy_F._Chen1;~Roy_Ka-Wei_Lee1", "gender": ";;M", "homepage": "https://hzq950419.github.io/HomePage/;http://alum.mit.edu/www/nancychen;https://www.socialai.studio/team", "dblp": ";84/8761;139/2266", "google_scholar": "vjQQUnwAAAAJ;https://scholar.google.com.sg/citations?user=K3Z9UiAAAAAJ;https://scholar.google.com.sg/citations?user=uQxdOlsAAAAJ", "or_profile": "~Zhiqiang_Hu3;~Nancy_F._Chen1;~Roy_Ka-Wei_Lee1", "aff": "Singapore University of Technology and Design;I2R, A*STAR;Singapore University of Technology and Design", "aff_domain": "sutd.edu.sg;i2r.a-star.edu.sg;sutd.edu.sg", "position": "PhD student;Principal Researcher;Assistant Professor", "bibtex": "@inproceedings{\nhu2023adaptertst,\ntitle={Adapter-{TST}: A Parameter Efficient Method for Multiple-Attribute Text Style Transfer},\nauthor={Zhiqiang Hu and Nancy F. Chen and Roy Ka-Wei Lee},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=z9l6nHpTyT}\n}", "github": "", "project": "", "reviewers": "f2rF;Jb8X;2k94", "site": "https://openreview.net/forum?id=z9l6nHpTyT", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;5;5", "excitement": "3;2;3", "reproducibility": "4;4;4", "correctness": "4;3;4", "rating_avg": 3.0, "confidence_avg": 4.666666666666667, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 3.6666666666666665, "replies_avg": 10, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-0872-5877;0000-0002-1986-7750", "linkedin": ";nancy-chen-4644865/?originalSubdomain=sg;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Singapore University of Technology and Design;A*STAR", "aff_unique_dep": ";Institute for Infocomm Research", "aff_unique_url": "https://www.sutd.edu.sg;https://www.a-star.edu.sg", "aff_unique_abbr": "SUTD;A*STAR", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Singapore" }, { "id": "zByqDt16qZ", "title": "Evaluating the Rationale Understanding of Critical Reasoning in Logical Reading Comprehension", "track": "main", "status": "Long Main", "tldr": "", "abstract": "To precisely evaluate a language model's capability for logical reading comprehension, we present a dataset for testing the understanding of the rationale behind critical reasoning.\nFor questions taken from an existing multiple-choice logical reading comprehension dataset, we crowdsource rationale texts that explain why we should select or eliminate answer options, resulting in 3,003 multiple-choice subquestions that are associated with 943 main questions.\nExperiments on our dataset show that recent large language models (e.g., InstructGPT) struggle to answer the subquestions even if they are able to answer the main questions correctly.\nWe find that the models perform particularly poorly in answering subquestions written for the incorrect options of the main questions, implying that the models have a limited capability for explaining why incorrect alternatives should be eliminated.\nThese results suggest that our dataset encourages further investigation into the critical reasoning ability of language models while focusing on the elimination process of relevant alternatives.", "keywords": "natural language understanding;reading comprehension;evaluation;dataset;rationale", "primary_area": "", "supplementary_material": "", "author": "Akira Kawabata;Saku Sugawara", "authorids": "~Akira_Kawabata1;~Saku_Sugawara1", "gender": "M;", "homepage": ";https://penzant.net", "dblp": ";195/8158", "google_scholar": ";1nun9kgAAAAJ", "or_profile": "~Akira_Kawabata1;~Saku_Sugawara1", "aff": "Nara Institute of Science and Technology, Japan;National Institute of Informatics", "aff_domain": "naist.jp;nii.ac.jp", "position": "MS student;Assistant Professor", "bibtex": "@inproceedings{\nkawabata2023evaluating,\ntitle={Evaluating the Rationale Understanding of Critical Reasoning in Logical Reading Comprehension},\nauthor={Akira Kawabata and Saku Sugawara},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=zByqDt16qZ}\n}", "github": "", "project": "", "reviewers": "X3Ra;i432;ivgz", "site": "https://openreview.net/forum?id=zByqDt16qZ", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;4", "excitement": "4;4;4", "reproducibility": "3;5;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 4.0, "correctness_avg": 4.0, "replies_avg": 10, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0002-0061-0680", "linkedin": "akira-kawabata-4b0bb7203/;", "aff_unique_index": "0;1", "aff_unique_norm": "Nara Institute of Science and Technology;National Institute of Informatics", "aff_unique_dep": ";", "aff_unique_url": "https://www.nist.jp;https://www.nii.ac.jp/", "aff_unique_abbr": "NIST;NII", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "id": "zEJFYWWmbG", "title": "Primacy Effect of ChatGPT", "track": "main", "status": "Short Main", "tldr": "", "abstract": "Instruction-tuned large language models (LLMs), such as ChatGPT, have led to promising zero-shot performance in discriminative natural language understanding (NLU) tasks. This involves querying the LLM using a prompt containing the question, and the candidate labels to choose from. The question-answering capabilities of ChatGPT arise from its pre-training on large amounts of human-written text, as well as its subsequent fine-tuning on human preferences, which motivates us to ask: Does ChatGPT also inherit humans' cognitive biases? In this paper, we study the primacy effect of ChatGPT: the tendency of selecting the labels at earlier positions as the answer. We have two main findings: i) ChatGPT's decision is sensitive to the order of labels in the prompt; ii) ChatGPT has a clearly higher chance to select the labels at earlier positions as the answer. We hope that our experiments and analyses provide additional insights into building more reliable ChatGPT-based solutions. We release the source code at https://github.com/wangywUST/PrimacyEffectGPT.", "keywords": "Primacy Effect;ChatGPT;Large Language Models;Natural Language Understanding", "primary_area": "", "supplementary_material": "", "author": "Yiwei Wang;Yujun Cai;Muhao Chen;Yuxuan Liang;Bryan Hooi", "authorids": "~Yiwei_Wang2;~Yujun_Cai1;~Muhao_Chen1;~Yuxuan_Liang1;~Bryan_Hooi1", "gender": "M;F;M;M;", "homepage": ";;https://muhaochen.github.io/;https://yuxuanliang.com;http://bhooi.github.io", "dblp": "50/5889-1;227/4399;173/2608;183/0977;169/9975", "google_scholar": "https://scholar.google.com.hk/citations?user=Sh9QvBkAAAAJ;https://scholar.google.com/citations?hl=en;k79yEZkAAAAJ;n9cODgcAAAAJ;", "or_profile": "~Yiwei_Wang2;~Yujun_Cai1;~Muhao_Chen1;~Yuxuan_Liang1;~Bryan_Hooi1", "aff": "National University of Singapore;Meta Facebook;University of Southern California;The Hong Kong University of Science and Technology (Guangzhou);National University of Singapore", "aff_domain": "u.nus.edu;fb.com;usc.edu;hkust-gz.edu.cn;nus.edu.sg", "position": "PhD student;Researcher;Assistant Research Professor;Assistant Professor;Assistant Professor", "bibtex": "@inproceedings{\nwang2023primacy,\ntitle={Primacy Effect of Chat{GPT}},\nauthor={Yiwei Wang and Yujun Cai and Muhao Chen and Yuxuan Liang and Bryan Hooi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=zEJFYWWmbG}\n}", "github": "", "project": "", "reviewers": "4BGH;Zv7g;SvBZ", "site": "https://openreview.net/forum?id=zEJFYWWmbG", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;4;5", "excitement": "3;4;4", "reproducibility": "4;3;3", "correctness": "3;4;4", "rating_avg": 5.0, "confidence_avg": 4.333333333333333, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0003-0118-3147;0000-0003-2817-7337;0000-0002-5645-1754", "linkedin": ";;;yoshall/;", "aff_unique_index": "0;1;2;3;0", "aff_unique_norm": "National University of Singapore;Meta;University of Southern California;Hong Kong University of Science and Technology", "aff_unique_dep": ";Meta Platforms, Inc.;;", "aff_unique_url": "https://www.nus.edu.sg;https://meta.com;https://www.usc.edu;https://www.ust.hk", "aff_unique_abbr": "NUS;Meta;USC;HKUST", "aff_campus_unique_index": "1;2", "aff_campus_unique": ";Los Angeles;Guangzhou", "aff_country_unique_index": "0;1;1;2;0", "aff_country_unique": "Singapore;United States;China" }, { "id": "zIb2DlqBxm", "title": "PHD: Pixel-Based Language Modeling of Historical Documents", "track": "main", "status": "Long Main", "tldr": "", "abstract": "The digitisation of historical documents has provided historians with unprecedented research opportunities. Yet, the conventional approach to analysing historical documents involves converting them from images to text using OCR, a process that overlooks the potential benefits of treating them as images and introduces high levels of noise. To bridge this gap, we take advantage of recent advancements in pixel-based language models trained to reconstruct masked patches of pixels instead of predicting token distributions. Due to the scarcity of real historical scans, we propose a novel method for generating synthetic scans to resemble real historical documents. We then pre-train our model, PHD, on a combination of synthetic scans and real historical newspapers from the 1700-1900 period. Through our experiments, we demonstrate that PHD exhibits high proficiency in reconstructing masked image patches and provide evidence of our model's noteworthy language understanding capabilities. Notably, we successfully apply our model to a historical QA task, highlighting its usefulness in this domain.", "keywords": "Visual language modelling;Historical documents;Multimodal models", "primary_area": "", "supplementary_material": "", "author": "Nadav Borenstein;Phillip Rust;Desmond Elliott;Isabelle Augenstein", "authorids": "~Nadav_Borenstein1;~Phillip_Rust1;~Desmond_Elliott1;~Isabelle_Augenstein1", "gender": "M;;;F", "homepage": "https://nadav.dk;https://phillip.rs;;http://isabelleaugenstein.github.io/", "dblp": ";263/9843;46/7536;93/11424.html", "google_scholar": "uDM-PC0AAAAJ;6MxyDqcAAAAJ;;https://scholar.google.co.uk/citations?user=DjJp0dcAAAAJ", "or_profile": "~Nadav_Borenstein1;~Phillip_Rust1;~Desmond_Elliott1;~Isabelle_Augenstein1", "aff": "University of Copenhagen;University of Copenhagen;University of Copenhagen;University of Copenhagen", "aff_domain": "diku.dk;ku.dk;ku.dk;ku.dk", "position": "PhD student;PhD student;Assistant Professor;Full Professor", "bibtex": "@inproceedings{\nborenstein2023phd,\ntitle={{PHD}: Pixel-Based Language Modeling of Historical Documents},\nauthor={Nadav Borenstein and Phillip Rust and Desmond Elliott and Isabelle Augenstein},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=zIb2DlqBxm}\n}", "github": "", "project": "", "reviewers": "NET7;Uoyc;nvph", "site": "https://openreview.net/forum?id=zIb2DlqBxm", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "4;4;4", "reproducibility": "3;4;4", "correctness": "3;4;4", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 4.0, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 3.6666666666666665, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0001-5123-821X;;0000-0003-1562-7909", "linkedin": "nadavbor/;;;isabelle-augenstein-82436b7a/", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "University of Copenhagen", "aff_unique_dep": "", "aff_unique_url": "https://www.ku.dk", "aff_unique_abbr": "UCPH", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "Denmark" }, { "id": "zIgc1Qeceh", "title": "Holistic Inter-Annotator Agreement and Corpus Coherence Estimation in a Large-scale Multilingual Annotation Campaign", "track": "main", "status": "Long Main", "tldr": "", "abstract": "In this paper we report on the complexity of persuasion technique annotation in the context of a large multilingual annotation campaign involving 6 languages and approximately 40 annotators. We highlight the techniques that appear to be difficult for humans to annotate and elaborate on our findings on the causes of this phenomenon.\nWe introduce Holistic IAA, a new word embedding-based annotator agreement metric and we report on various experiments using this metric and its correlation with the traditional Inter Annotator Agreement (IAA) metrics. However, given somewhat limited and loose interaction between annotators, i.e., only a few annotators annotate the same document subsets, we try to devise a way to assess the coherence of the entire dataset and strive to find a good proxy for IAA between annotators tasked to annotate different documents and in different languages, for which classical IAA metrics can not be applied.", "keywords": "persuasion techniques;annotation;inter-annotator agreement;data quality;IAA", "primary_area": "", "supplementary_material": "", "author": "Nicolas Stefanovitch;Jakub Piskorski", "authorids": "~Nicolas_Stefanovitch1;~Jakub_Piskorski2", "gender": ";", "homepage": ";", "dblp": ";71/1942", "google_scholar": ";xDQ3yuQAAAAJ", "or_profile": "~Nicolas_Stefanovitch1;~Jakub_Piskorski2", "aff": ";Institute of Computer Science, Polish Academy of Science", "aff_domain": ";ipipan.waw.pl", "position": ";Research Associate", "bibtex": "@inproceedings{\nstefanovitch2023holistic,\ntitle={Holistic Inter-Annotator Agreement and Corpus Coherence Estimation in a Large-scale Multilingual Annotation Campaign},\nauthor={Nicolas Stefanovitch and Jakub Piskorski},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=zIgc1Qeceh}\n}", "github": "", "project": "", "reviewers": "Jcj8;Wmuj;9ysf", "site": "https://openreview.net/forum?id=zIgc1Qeceh", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;4;4", "excitement": "3;3;3", "reproducibility": "3;4;3", "correctness": "4;5;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0", "aff_unique_norm": "Polish Academy of Sciences", "aff_unique_dep": "Institute of Computer Science", "aff_unique_url": "https://www.pan.pl", "aff_unique_abbr": "PAS", "aff_country_unique_index": "0", "aff_country_unique": "Poland" }, { "id": "zLAHDHhgLa", "title": "Fine-grained Conversational Decoding via Isotropic and Proximal Search", "track": "main", "status": "Short Main", "tldr": "", "abstract": "General-purpose text decoding approaches are usually adopted for dialogue response generation. Although the quality of the generated responses can be improved with dialogue-specific encoding methods, conversational decoding methods are still under-explored. Inspired by SimDRC that a good dialogue feature space should follow the rules of locality and isotropy, we present a fine-grained\nconversational decoding method, termed isotropic and proximal search (IPS). Our method is designed to generate the semantic-\nconcentrated response, while still maintaining informativeness and discrimination against the context. Experiments show that our approach\nsignificantly outperforms existing decoding strategies in the dialogue field across both automatic and human evaluation metrics. More in-\ndepth analyses further confirm the effectiveness of our approach.", "keywords": "text generation; dialogue system; decoding strategy", "primary_area": "", "supplementary_material": "", "author": "Yuxuan YAO;Han Wu;Qiling Xu;Linqi Song", "authorids": "~Yuxuan_YAO1;~Han_Wu5;~Qiling_Xu1;~Linqi_Song1", "gender": ";M;;M", "homepage": ";https://hahahawu.com/;;https://sites.google.com/site/aisquaredlab/", "dblp": ";13/1864-4;;137/7963.html", "google_scholar": ";https://scholar.google.com.hk/citations?user=1SHXVAIAAAAJ;;UcGN3MoAAAAJ", "or_profile": "~Yuxuan_YAO1;~Han_Wu5;~Qiling_Xu1;~Linqi_Song1", "aff": "City University of Hong Kong;City University of Hong Kong;City University of Hong Kong;City University of Hong Kong", "aff_domain": "cityu.edu.hk;cityu.edu.hk;cityu.edu.hk;cityu.edu.hk", "position": "PhD student;PhD student;PhD student;Assistant Professor", "bibtex": "@inproceedings{\nyao2023finegrained,\ntitle={Fine-grained Conversational Decoding via Isotropic and Proximal Search},\nauthor={Yuxuan YAO and Han Wu and Qiling Xu and Linqi Song},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=zLAHDHhgLa}\n}", "github": "", "project": "", "reviewers": "T3Fd;nWpT;ybjD", "site": "https://openreview.net/forum?id=zLAHDHhgLa", "pdf_size": 0, "rating": "5;5;5", "confidence": "3;3;3", "excitement": "4;4;4", "reproducibility": "4;5;4", "correctness": "4;4;4", "rating_avg": 5.0, "confidence_avg": 3.0, "excitement_avg": 4.0, "reproducibility_avg": 4.333333333333333, "correctness_avg": 4.0, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0009-0009-3955-7272;0000-0002-8008-064X;0009-0005-4400-3179;0000-0003-2756-4984", "linkedin": ";;;", "aff_unique_index": "0;0;0;0", "aff_unique_norm": "City University of Hong Kong", "aff_unique_dep": "", "aff_unique_url": "https://www.cityu.edu.hk", "aff_unique_abbr": "CityU", "aff_campus_unique_index": "0;0;0;0", "aff_campus_unique": "Hong Kong SAR", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "China" }, { "id": "zM3mlyflTt", "title": "Approximating Two-Layer Feedforward Networks for Efficient Transformers", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "How to reduce compute and memory requirements of neural networks (NNs) without sacrificing performance? Many recent works use sparse Mixtures of Experts (MoEs) to build resource-efficient large language models (LMs). Here we introduce several novel perspectives on MoEs, presenting a general framework that *unifies* various methods to *approximate two-layer NNs* (e.g., feedforward blocks of Transformers), including product-key memories (PKMs). Leveraging insights from this framework, we propose methods to improve both MoEs and PKMs. Unlike prior work that compares MoEs with dense baselines under the *compute-equal* condition, our evaluation condition is *parameter-equal*, which is crucial to properly evaluate LMs. We show that our MoEs are competitive with the *dense* Transformer-XL on both the WikiText-103 and enwiki8 datasets at two different scales, while being much more resource efficient. This demonstrates that MoEs are relevant not only to extremely large LMs but also to any-scale resource-efficient LMs. Our code is public.", "keywords": "transformers;moe;mixture of experts;pkm;product key memories;approximate computation;efficient transformers;language modelling", "primary_area": "", "supplementary_material": "", "author": "R\u00f3bert Csord\u00e1s;Kazuki Irie;J\u00fcrgen Schmidhuber", "authorids": "~R\u00f3bert_Csord\u00e1s1;~Kazuki_Irie1;~J\u00fcrgen_Schmidhuber1", "gender": "M;;M", "homepage": "https://robertcsordas.github.io/;https://sites.harvard.edu/kazuki-irie/;http://people.idsia.ch/~juergen/", "dblp": "166/4773.html;148/9667;s/JurgenSchmidhuber", "google_scholar": "av1lplwAAAAJ;https://scholar.google.de/citations?user=-gZ-BdwAAAAJ;https://scholar.google.ch/citations?user=gLnCTgIAAAAJ", "or_profile": "~R\u00f3bert_Csord\u00e1s1;~Kazuki_Irie1;~J\u00fcrgen_Schmidhuber1", "aff": "IDSIA;The Swiss AI Lab IDSIA, Dalle Molle Institute for Artificial Intelligence Research;IDSIA", "aff_domain": "idsia.ch;idsia.ch;idsia.ch", "position": "PhD student;Postdoc;Scientific Director", "bibtex": "@inproceedings{\ncsord{\\'a}s2023approximating,\ntitle={Approximating Two-Layer Feedforward Networks for Efficient Transformers},\nauthor={R{\\'o}bert Csord{\\'a}s and Kazuki Irie and J{\\\"u}rgen Schmidhuber},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=zM3mlyflTt}\n}", "github": "", "project": "", "reviewers": "FXRA;QrbU;JndA;d9Y9", "site": "https://openreview.net/forum?id=zM3mlyflTt", "pdf_size": 0, "rating": "3;3;3;3", "confidence": "2;4;2;3", "excitement": "2;3;3;3", "reproducibility": "3;3;4;4", "correctness": "3;4;3;3", "rating_avg": 3.0, "confidence_avg": 2.75, "excitement_avg": 2.75, "reproducibility_avg": 3.5, "correctness_avg": 3.25, "replies_avg": 14, "authors#_avg": 3, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";0000-0003-0923-691X;", "linkedin": "robertcsordas/;;", "aff_unique_index": "0;1;0", "aff_unique_norm": "Institute of Digital Technologies;IDSIA", "aff_unique_dep": ";Swiss AI Lab", "aff_unique_url": "https://www.idsia.ch;https://www.idsia.ch/", "aff_unique_abbr": "IDSIA;IDSIA", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "Switzerland" }, { "id": "zSUOfRVl28", "title": "Decoding the Silent Majority: Inducing Belief Augmented Social Graph with Large Language Model for Response Forecasting", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Automatic response forecasting for news media plays a crucial role in enabling content producers to efficiently predict the impact of news releases and prevent unexpected negative outcomes such as social conflict and moral injury. To effectively forecast responses, it is essential to develop measures that leverage the social dynamics and contextual information surrounding individuals, especially in cases where explicit profiles or historical actions of the users are limited (referred to as lurkers). As shown in a previous study, 97% of all tweets are produced by only the most active 25% of users. However, existing approaches have limited exploration of how to best process and utilize these important features. To address this gap, we propose a novel framework, named SocialSense, that leverages a large language model to induce a belief-centered graph on top of an existent social network, along with graph-based propagation to capture social dynamics. We hypothesize that the induced graph that bridges the gap between distant users who share similar beliefs allows the model to effectively capture the response patterns. Our method surpasses existing state-of-the-art in experimental evaluations for both zero-shot and supervised settings, demonstrating its effectiveness in response forecasting. Moreover, the analysis reveals the framework's capability to effectively handle unseen user and lurker scenarios, further highlighting its robustness and practical applicability.", "keywords": "Response Forecasting;Social Media;Social Network;Language Model;ChatGPT;Personalization;Response Prediction", "primary_area": "", "supplementary_material": "", "author": "Chenkai Sun;Jinning Li;Yi Fung;Hou Pong Chan;Tarek Abdelzaher;ChengXiang Zhai;Heng Ji", "authorids": "~Chenkai_Sun1;~Jinning_Li2;~Yi_Fung1;~Hou_Pong_Chan2;~Tarek_Abdelzaher1;~ChengXiang_Zhai1;~Heng_Ji3", "gender": "M;M;F;M;M;M;F", "homepage": "https://chenkaisun.github.io/;https://jinningli.cn;https://mayrfung.github.io;https://kenchan0226.github.io;http://abdelzaher.cs.illinois.edu/;http://czhai.cs.illinois.edu/;http://blender.cs.illinois.edu/hengji.html", "dblp": "251/9509;211/7889-1;223/2782-1.html;178/3691.html;a/TarekFAbdelzaher;z/ChengXiangZhai;", "google_scholar": "ipzG4asAAAAJ;ED8QSJwAAAAJ;eUae2K0AAAAJ;HCljxf0AAAAJ;https://scholar.google.com.tw/citations?user=cA28Zs0AAAAJ;YU-baPIAAAAJ;z7GCqT4AAAAJ", "or_profile": "~Chenkai_Sun1;~Jinning_Li2;~Yi_Fung1;~Hou_Pong_Chan2;~Tarek_Abdelzaher1;~ChengXiang_Zhai1;~Heng_Ji3", "aff": "University of Illinois Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Macau;University of Illinois, Urbana Champaign;University of Illinois, Urbana Champaign;University of Illinois, Urbana-Champaign", "aff_domain": "illinois.edu;illinois.edu;illinois.edu;umac.mo;illinois.edu;illinois.edu;uiuc.edu", "position": "PhD student;PhD student;PhD student;Lecturer;Full Professor;Full Professor;Full Professor", "bibtex": "@inproceedings{\nsun2023decoding,\ntitle={Decoding the Silent Majority: Inducing Belief Augmented Social Graph with Large Language Model for Response Forecasting},\nauthor={Chenkai Sun and Jinning Li and Yi Fung and Hou Pong Chan and Tarek Abdelzaher and ChengXiang Zhai and Heng Ji},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=zSUOfRVl28}\n}", "github": "", "project": "", "reviewers": "9BLt;EsDk;HaTs;2MJW", "site": "https://openreview.net/forum?id=zSUOfRVl28", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;3;3;3", "excitement": "3;4;4;4", "reproducibility": "3;4;4;3", "correctness": "3;4;4;4", "rating_avg": 4.0, "confidence_avg": 3.25, "excitement_avg": 3.75, "reproducibility_avg": 3.5, "correctness_avg": 3.75, "replies_avg": 14, "authors#_avg": 7, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-7999-6933;0000-0003-1927-9999;;0000-0001-9207-4178;0000-0003-3883-7220;0000-0002-6434-3702;", "linkedin": "chenkaisun/;jinning-li-343168162/;;;tarek-abdelzaher-0216071/;;", "aff_unique_index": "0;0;0;1;0;0;2", "aff_unique_norm": "University of Illinois Urbana-Champaign;University of Macau;University of Illinois", "aff_unique_dep": ";;", "aff_unique_url": "https://illinois.edu;https://www.um.edu.mo;https://illinois.edu", "aff_unique_abbr": "UIUC;UM;UIUC", "aff_campus_unique_index": "0;0;0;1;0;0;0", "aff_campus_unique": "Urbana-Champaign;Macau SAR", "aff_country_unique_index": "0;0;0;1;0;0;0", "aff_country_unique": "United States;China" }, { "id": "zVi11zjaPe", "title": "EIT: Enhanced Interactive Transformer", "track": "main", "status": "Reject", "tldr": "", "abstract": "Two principles: the \\textit{complementary principle} and the \\textit{consensus principle} are widely acknowledged in the literature of multi-view learning. However, current design of Multi-head self-attention, an instance of multi-view learning, prioritizes the complementarity while ignoring the consensus. To address this problem, We propose an enhanced multi-head self-attention (EMHA). First, to satisfy the \\textit{complementary principle}, EMHA removes the one-to-one mapping constraint among queries and keys in multiple subspaces and allows each query to attend to multiple keys. On top of that, we develop a method to fully encourage consensus among heads by introducing two interaction models, namely Inner-Subspace Interaction and Cross-Subspace Interaction. Extensive experiments on a wide range of language tasks (e.g. machine translation, abstractive summarization and grammar correction, languages modeling), show its superiority, with a very modest increase in model size. ", "keywords": "Transformer; Multi-head self-attention; Multi-view learning;", "primary_area": "", "supplementary_material": "", "author": "Tong Zheng;Bei Li;Huiwen Bao;Yi Jing;Tong Xiao;JingBo Zhu", "authorids": "~Tong_Zheng1;~Bei_Li1;~Huiwen_Bao1;~Yi_Jing2;~Tong_Xiao4;~JingBo_Zhu2", "gender": "M;M;;M;;F", "homepage": "https://kidzheng.github.io/;https://libeineu.github.io/;https://github.com/qinger521;https://www.nlplab.com/members/xiaotong.html;https://dblp.org/pid/73/2129.html;", "dblp": ";;;05/5091;;", "google_scholar": "https://scholar.google.com/citations?hl=zh-CN;wzbJ5EIAAAAJ;;-fov7zkAAAAJ;;3PnRUyQAAAAJ", "or_profile": "~Tong_Zheng1;~Bei_Li1;~Yi_Jing2;~Tong_Xiao4;~JingBo_Zhu2;~bao_huiwen1", "aff": ";Northeastern University;Northeastern University;Northeastern University;Northeastern University;Northeastern University", "aff_domain": ";neu.edu.cn;neu.edu.cn;mail.neu.edu.cn;mail.neu.edu.cn;neu.edu.cn", "position": ";PhD student;MS student;Full Professor;Full Professor;Intern", "bibtex": "@misc{\nzheng2023eit,\ntitle={{EIT}: Enhanced Interactive Transformer},\nauthor={Tong Zheng and Bei Li and Huiwen Bao and Yi Jing and Tong Xiao and JingBo Zhu},\nyear={2023},\nurl={https://openreview.net/forum?id=zVi11zjaPe}\n}", "github": "", "project": "", "reviewers": "gRpq;Exgp;S8VV;gNnp", "site": "https://openreview.net/forum?id=zVi11zjaPe", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;3;3;4", "excitement": "4;4;3;4", "reproducibility": "2;3;4;3", "correctness": "3;4;4;3", "rating_avg": 4.0, "confidence_avg": 3.5, "excitement_avg": 3.75, "reproducibility_avg": 3.0, "correctness_avg": 3.5, "replies_avg": 13, "authors#_avg": 6, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0002-3472-4387;;;;;", "linkedin": ";;;tong-xiao-168bb081/;;", "aff_unique_index": "0;0;0;0;0", "aff_unique_norm": "Northeastern University", "aff_unique_dep": "", "aff_unique_url": "https://www.northeastern.edu", "aff_unique_abbr": "NEU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0", "aff_country_unique": "United States" }, { "id": "zWGDn1AmRH", "title": "ReFSQL: A Retrieval-Augmentation Framework for Text-to-SQL Generation", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Text-to-SQL is the task that aims at translating natural language questions into SQL queries.\nExisting methods directly align the natural language with SQL Language and train one encoder-decoder-based model to fit all questions. \nHowever, they underestimate the inherent structural characteristics of SQL, as well as the gap between specific structure knowledge and general knowledge. This leads to structure errors in the generated SQL.\nTo address the above challenges, we propose a retrieval-argument framework, namely ReFSQL.\nIt contains two parts, structure-enhanced retriever and the generator.\nStructure-enhanced retriever is designed to identify samples with comparable specific knowledge in an unsupervised way. \nSubsequently, we incorporate the retrieved samples\u2019 SQL into the input, enabling the model to acquire prior knowledge of similar SQL grammar. \nTo further bridge the gap between specific and general knowledge, we present a mahalanobis contrastive learning method, which facilitates the transfer of the sample toward the specific knowledge distribution constructed by the retrieved samples.\nExperimental results on five datasets verify the effectiveness of our approach in improving the accuracy and robustness of Text-to-SQL generation.\nOur framework has achieved improved performance when combined with many other backbone models (including the 11B flan-T5) and also achieved state-of-the-art performance when compared to existing methods that employ the fine-tuning approach.", "keywords": "Text-to-SQL;Retrieval-Augmentation", "primary_area": "", "supplementary_material": "", "author": "Kun Zhang;XieXiong Lin;Yuanzhuo Wang;Xin Zhang;Fei Sun;Cen Jianhe;Hexiang Tan;Xuhui Jiang;Huawei Shen", "authorids": "~Kun_Zhang6;~XieXiong_Lin1;~Yuanzhuo_Wang1;~Xin_Zhang38;~Fei_Sun3;~Cen_Jianhe1;~Hexiang_Tan1;~Xuhui_Jiang1;~Huawei_Shen1", "gender": "M;M;M;;M;M;;M;M", "homepage": ";;https://www.ict.ac.cn/sourcedb/cn/jssrck/201011/t20101122_3025790.html;;http://ofey.me;https://github.com/PureEidolon;;https://github.com/jxh4945777;https://www.ict.ac.cn/sourcedb/cn/jssrck/201402/t20140221_4037648.html", "dblp": ";;65/5018;;51/394-1;;;289/7926;", "google_scholar": "pbq4AXQAAAAJ;keqS9HUAAAAJ;v1KzwYEAAAAJ;;OlRxBhcAAAAJ;;;https://scholar.google.com.hk/citations?user=10GD-YMAAAAJ;", "or_profile": "~Kun_Zhang6;~XieXiong_Lin1;~Yuanzhuo_Wang1;~Xin_Zhang38;~Fei_Sun3;~Cen_Jianhe1;~Hexiang_Tan1;~Xuhui_Jiang1;~Huawei_Shen1", "aff": "Institute of Computing Technology , Chinese Academy of Sciences;;Chinese Academy of Sciences;;Institute of Computing Technology, Chinese Academy of Sciences;Zhengzhou University;;Institute of Computing Technology, Chinese Academy of Sciences;Institute of Computing Technology, Chinese Academy of Sciences", "aff_domain": "ict.ac.cn;;ict.ac.cn;;ict.ac.cn;zzu.edu.cn;;ict.ac.cn;ict.ac.cn", "position": "PhD student;;Full Professor;;Associate Professor;MS student;;PhD student;Full Professor", "bibtex": "@inproceedings{\nzhang2023refsql,\ntitle={Re{FSQL}: A Retrieval-Augmentation Framework for Text-to-{SQL} Generation},\nauthor={Kun Zhang and XieXiong Lin and Yuanzhuo Wang and Xin Zhang and Fei Sun and Cen Jianhe and Hexiang Tan and Xuhui Jiang and Huawei Shen},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=zWGDn1AmRH}\n}", "github": "", "project": "", "reviewers": "wb9q;sEnG;fMpS", "site": "https://openreview.net/forum?id=zWGDn1AmRH", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;3;4", "excitement": "3;3;4", "reproducibility": "3;2;4", "correctness": "2;3;4", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 3.0, "correctness_avg": 3.0, "replies_avg": 9, "authors#_avg": 9, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;0000-0002-6799-1756;;0000-0002-6146-148X;;;0000-0002-1741-0781;0000-0002-1081-8119", "linkedin": ";;;;;;;;", "aff_unique_index": "0;0;0;1;0;0", "aff_unique_norm": "Chinese Academy of Sciences;Zhengzhou University", "aff_unique_dep": "Institute of Computing Technology;", "aff_unique_url": "http://www.ict.ac.cn;http://www.zzu.edu.cn", "aff_unique_abbr": "CAS;ZZU", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "zaBPb6Pu21", "title": "Chinese Lexical Substitution: Dataset and Method", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Existing lexical substitution (LS) benchmarks were collected by asking human annotators to think of substitutes from memory, resulting in benchmarks with limited coverage and relatively small scales. To overcome this problem, we propose a novel annotation method to construct an LS dataset based on human and machine collaboration. Based on our annotation method, we construct the first Chinese LS dataset CHNLS which consists of 33,695 instances and 144,708 substitutes, covering three text genres (News, Novel, and Wikipedia). Specifically, we first combine four unsupervised LS methods as an ensemble method to generate the candidate substitutes, and then let human annotators judge these candidates or add new ones. This collaborative process combines the diversity of machine-generated substitutes with the expertise of human annotators. Experimental results that the ensemble method outperforms other LS methods. To our best knowledge, this is the first study for the Chinese LS task.", "keywords": "Lexical substitution;Chinese writing assistance;Substitution generation", "primary_area": "", "supplementary_material": "", "author": "Jipeng Qiang;Kang Liu;LiYing;Yun Li;Yi Zhu;Yun-Hao Yuan;Xiaocheng Hu;Xiaoye Ouyang", "authorids": "~Jipeng_Qiang1;~Kang_Liu6;~LiYing1;~Yun_Li8;~Yi_Zhu5;~Yun-Hao_Yuan1;~Xiaocheng_Hu1;~Xiaoye_Ouyang1", "gender": "M;M;F;M;M;M;M;F", "homepage": "https://qiang2100.github.io/;;https://github.com/Leeying9/;https://xxgcxy.yzu.edu.cn/info/1020/4050.htm;;;;", "dblp": "138/2494;;;https://dblp.uni-trier.de/pid/87/6284-10.html;67/4972-6;51/7436;129/5586;179/2953.html", "google_scholar": "1SgBQM4AAAAJ;;;;jSfAPUwAAAAJ;RTJh0WAAAAAJ;;", "or_profile": "~Jipeng_Qiang1;~Kang_Liu6;~LiYing1;~Yun_Li8;~Yi_Zhu5;~Yun-Hao_Yuan1;~Xiaocheng_Hu1;~Xiaoye_Ouyang1", "aff": "Yangzhou University;Yangzhou University;Yangzhou University;;Yangzhou University;Yangzhou University;China Academy of Electronics and Information Technology;China Academy of Electronics and Information Technology", "aff_domain": "yzu.edu.cn;yzu.edu.cn;yzu.edu.cn;;yzu.edu.cn;yzu.edu.cn;caeit.cetc.com.cn;caeit.cetc.com.cn", "position": "Associate Professor;MS student;Undergrad student;;Associate Professor;Associate Professor;Researcher;Associate Professor", "bibtex": "@inproceedings{\nqiang2023chinese,\ntitle={Chinese Lexical Substitution: Dataset and Method},\nauthor={Jipeng Qiang and Kang Liu and LiYing and Yun Li and Yi Zhu and Yun-Hao Yuan and Xiaocheng Hu and Xiaoye Ouyang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=zaBPb6Pu21}\n}", "github": "", "project": "", "reviewers": "24p3;o7zy;tGuY", "site": "https://openreview.net/forum?id=zaBPb6Pu21", "pdf_size": 0, "rating": "5;5;5", "confidence": "4;3;3", "excitement": "3;4;3", "reproducibility": "5;5;3", "correctness": "3;4;2", "rating_avg": 5.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.3333333333333335, "reproducibility_avg": 4.333333333333333, "correctness_avg": 3.0, "replies_avg": 11, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-5721-0293;0000-0002-4112-2405;;;0000-0003-3045-2588;;;0000-0002-0283-7790", "linkedin": ";;;;;;;", "aff_unique_index": "0;0;0;0;0;1;1", "aff_unique_norm": "Yangzhou University;China Academy of Electronics and Information Technology", "aff_unique_dep": ";", "aff_unique_url": "https://www.yzu.edu.cn;http://www.cea-iet.cn/", "aff_unique_abbr": "YZU;CAEIT", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0;0;0;0;0", "aff_country_unique": "China" }, { "id": "zdMislOLTv", "title": "Zero-Shot-BERT-Adapters: a Zero-Shot Pipeline for Unknown Intent Detection", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "Intent discovery is a crucial task in natural language processing, and it is increasingly relevant for various of industrial applications.\nIdentifying novel, unseen intents from user inputs remains one of the biggest challenges in this field.\nHerein, we propose Zero-Shot-BERT-Adapters, a two-stage method for multilingual intent discovery relying on a Transformer architecture,\nfine-tuned with Adapters. We train the model for Natural Language Inference (NLI) and later perform unknown intent classification in a zero-shot setting for multiple languages. In our evaluation, we first analyze the quality of the model after adaptive fine-tuning on known classes. Secondly, we evaluate its performance in casting intent classification as an NLI task. Lastly, we test the zero-shot performance of the model on unseen classes, showing how Zero-Shot-BERT-Adapters can effectively perform intent discovery by generating semantically similar intents, if not equal, to the ground-truth ones. Our experiments show how Zero-Shot-BERT-Adapters outperforms various baselines in two zero-shot settings: known intent classification and unseen intent discovery. The proposed pipeline holds the potential for broad application in customer care. It enables automated dynamic triage using a lightweight model that can be easily deployed and scaled in various business scenarios, unlike large language models. Zero-Shot-BERT-Adapters represents an innovative multi-language approach for intent discovery, enabling the online generation of novel intents.\nA Python package implementing the pipeline and the new datasets we compiled are available at the following link: https://github.com/GT4SD/zero-shot-bert-adapters.", "keywords": "Zero Shot;Intent Detection;Emerging Intents;BERT;Adapters;NLP;Multiligual", "primary_area": "", "supplementary_material": "", "author": "Daniele Comi;Dimitrios Christofidellis;Pier Francesco Piazza;Matteo Manica", "authorids": "~Daniele_Comi1;~Dimitrios_Christofidellis1;~Pier_Francesco_Piazza1;~Matteo_Manica1", "gender": "M;M;M;M", "homepage": "https://comidan.github.io/;;;https://ibm.biz/matteomanica", "dblp": ";;;194/3100", "google_scholar": "5HbIVxAAAAAJ;;;-20KQZQAAAAJ", "or_profile": "~Daniele_Comi1;~Dimitrios_Christofidellis1;~Pier_Francesco_Piazza1;~Matteo_Manica1", "aff": "International Business Machines;Queen's University Belfast;IBM, International Business Machines;International Business Machines", "aff_domain": "ibm.com;qub.ac.uk;us.ibm.com;ibm.com", "position": "Engineer;PhD student;Intern;Senior Research Scientist", "bibtex": "@inproceedings{\ncomi2023zeroshotbertadapters,\ntitle={Zero-Shot-{BERT}-Adapters: a Zero-Shot Pipeline for Unknown Intent Detection},\nauthor={Daniele Comi and Dimitrios Christofidellis and Pier Francesco Piazza and Matteo Manica},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=zdMislOLTv}\n}", "github": "", "project": "", "reviewers": "wrS3;ihi6;cqfr", "site": "https://openreview.net/forum?id=zdMislOLTv", "pdf_size": 0, "rating": "1;1;1", "confidence": "4;2;3", "excitement": "3;2;3", "reproducibility": "4;3;5", "correctness": "3;2;2", "rating_avg": 1.0, "confidence_avg": 3.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 4.0, "correctness_avg": 2.3333333333333335, "replies_avg": 11, "authors#_avg": 4, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-6647-7299;;;0000-0002-8872-0269", "linkedin": "daniele-comi-05886981/;dimitris-christofidellis-252142127/;pier-francesco-piazza/;matteo-manica-drugilsberg/", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "International Business Machines Corporation;Queen's University Belfast;International Business Machines", "aff_unique_dep": ";;", "aff_unique_url": "https://www.ibm.com;https://www.qub.ac.uk;https://www.ibm.com", "aff_unique_abbr": "IBM;QUB;IBM", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1;0;0", "aff_country_unique": "United States;United Kingdom" }, { "id": "zeGXjQYhXz", "title": "Video-Text Retrieval by Supervised Sparse Multi-Grained Learning", "track": "main", "status": "Long Findings", "tldr": "", "abstract": "While recent progress in video-text retrieval has been advanced by the exploration of better representation learning, in this paper, we present a novel multi-grained sparse learning framework, S3MA, to learn an aligned sparse space shared between the video and the text for video-text retrieval. The shared sparse space is initialized with a finite number of sparse concepts, each of which refers to a number of words. With the text data at hand, we learn and update the shared sparse space in a supervised manner using the proposed similarity and alignment losses. Moreover, to enable multi-grained alignment, we incorporate frame representations for better modeling the video modality and calculating fine-grained and coarse-grained similarities. Benefiting from the learned shared sparse space and multi-grained similarities, extensive experiments on several video-text retrieval benchmarks demonstrate the superiority of S3MA over existing methods.", "keywords": "Video-Text Retrieval;Multimodal Learning", "primary_area": "", "supplementary_material": "", "author": "Yimu Wang;Peng Shi", "authorids": "~Yimu_Wang1;~Peng_Shi2", "gender": "M;M", "homepage": "https://yimuwangcs.github.io;", "dblp": "140/7766;", "google_scholar": "TV2vnN8AAAAJ;XTbDLrkAAAAJ", "or_profile": "~Yimu_Wang1;~Peng_Shi2", "aff": "University of Waterloo;Amazon AWS", "aff_domain": "uwaterloo.ca;amazon.com", "position": "PhD student;Researcher", "bibtex": "@inproceedings{\nwang2023videotext,\ntitle={Video-Text Retrieval by Supervised Sparse Multi-Grained Learning},\nauthor={Yimu Wang and Peng Shi},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=zeGXjQYhXz}\n}", "github": "", "project": "", "reviewers": "UH8H;VnYa;Vvmb", "site": "https://openreview.net/forum?id=zeGXjQYhXz", "pdf_size": 0, "rating": "3;3;3", "confidence": "3;4;3", "excitement": "2;4;3", "reproducibility": "3;4;3", "correctness": "1;4;3", "rating_avg": 3.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.0, "reproducibility_avg": 3.3333333333333335, "correctness_avg": 2.6666666666666665, "replies_avg": 11, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": "yimu-wang-854743151/;", "aff_unique_index": "0;1", "aff_unique_norm": "University of Waterloo;Amazon", "aff_unique_dep": ";Amazon Web Services", "aff_unique_url": "https://uwaterloo.ca;https://aws.amazon.com", "aff_unique_abbr": "UW;AWS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;1", "aff_country_unique": "Canada;United States" }, { "id": "zpayaLaUhL", "title": "Absolute Position Embedding Learns Sinusoid-like Waves for Attention Based on Relative Position", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Attention weight is a clue to interpret how a Transformer-based model makes an inference. In some attention heads, the attention focuses on the neighbors of each token. This allows the output vector of each token to depend on the surrounding tokens and contributes to make the inference context-dependent. We analyze the mechanism behind the concentration of attention on nearby tokens. We show that the phenomenon emerges as follows: (1) learned position embedding has sinusoid-like components, (2) such components are transmitted to the query and the key in the self-attention, (3) the attention head shifts the phases of the sinusoid-like components so that the attention concentrates on nearby tokens at specific relative positions. In other words, a certain type of Transformer-based model acquires the sinusoidal positional encoding to some extent on its own through Masked Language Modeling.", "keywords": "position embedding;attention mechanism;Transformer;BERT;RoBERTa", "primary_area": "", "supplementary_material": "", "author": "Yuji Yamamoto;Takuya Matsuzaki", "authorids": "~Yuji_Yamamoto1;~Takuya_Matsuzaki1", "gender": ";M", "homepage": "https://yuji96.github.io/;https://researchmap.jp/mtzk?lang=en", "dblp": ";36/1621", "google_scholar": ";T6O8AdoAAAAJ", "or_profile": "~Yuji_Yamamoto1;~Takuya_Matsuzaki1", "aff": "Tokyo University of Science;Tokyo University of Science", "aff_domain": "tus.ac.jp;tus.ac.jp", "position": "Undergrad student;Full Professor", "bibtex": "@inproceedings{\nyamamoto2023absolute,\ntitle={Absolute Position Embedding Learns Sinusoid-like Waves for Attention Based on Relative Position},\nauthor={Yuji Yamamoto and Takuya Matsuzaki},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=zpayaLaUhL}\n}", "github": "", "project": "", "reviewers": "8B3x;5Jmf;fPAJ", "site": "https://openreview.net/forum?id=zpayaLaUhL", "pdf_size": 0, "rating": "4;4;4", "confidence": "4;3;3", "excitement": "4;4;3", "reproducibility": "3;3;3", "correctness": "5;4;3", "rating_avg": 4.0, "confidence_avg": 3.3333333333333335, "excitement_avg": 3.6666666666666665, "reproducibility_avg": 3.0, "correctness_avg": 4.0, "replies_avg": 9, "authors#_avg": 2, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";", "linkedin": ";", "aff_unique_index": "0;0", "aff_unique_norm": "Tokyo University of Science", "aff_unique_dep": "", "aff_unique_url": "https://www.tus.ac.jp", "aff_unique_abbr": "TUS", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0", "aff_country_unique": "Japan" }, { "id": "zrBrl2iQUr", "title": "Crossing the Aisle: Unveiling Partisan and Counter-Partisan Events in News Reporting", "track": "main", "status": "Short Findings", "tldr": "", "abstract": "News media is expected to uphold unbiased reporting. Yet they may still affect public opinion by selectively including or omitting events that support or contradict their ideological positions. \n\nPrior work in NLP has only studied media bias via linguistic style and word usage. \n\nIn this paper, we study to which degree media balances news reporting and affects consumers through event inclusion or omission. \nWe first introduce the task of detecting both partisan and counter-partisan events: events that support or oppose the author's political ideology. \n\nTo conduct our study, we annotate a high-quality dataset, PAC, containing $8,511$ (counter-)partisan event annotations in $304$ news articles from ideologically diverse media outlets. \n\nWe benchmark PAC to highlight the challenges of this task. \n\nOur findings highlight both the ways in which the news subtly shapes opinion and the need for large language models that better understand events within a broader context. Our dataset can be found at https://github.com/launchnlp/Partisan-Event-Dataset.", "keywords": "partisan event;media bias", "primary_area": "", "supplementary_material": "", "author": "Kaijian Zou;Xinliang Frederick Zhang;Winston Wu;Nicholas Beauchamp;Lu Wang", "authorids": "~Kaijian_Zou1;~Xinliang_Frederick_Zhang1;~Winston_Wu1;~Nicholas_Beauchamp1;~Lu_Wang9", "gender": "M;M;;M;F", "homepage": "https://zkjzou.github.io/;https://web.eecs.umich.edu/~xlfzhang/;;http://nickbeauchamp.com;https://web.eecs.umich.edu/~wangluxy/", "dblp": ";277/5381;;220/2037;49/3800-8", "google_scholar": "q2tM5CYAAAAJ;-uGCT5QAAAAJ;;;uczqEdUAAAAJ", "or_profile": "~Kaijian_Zou1;~Xinliang_Frederick_Zhang1;~Winston_Wu1;~Nicholas_Beauchamp1;~Lu_Wang9", "aff": "University of Michigan - Ann Arbor;Bloomberg;;Northeastern University;University of Michigan", "aff_domain": "umich.edu;bloomberg.net;;northeastern.edu;umich.edu", "position": "PhD student;Intern;;Associate Professor;Associate Professor", "bibtex": "@inproceedings{\nzou2023crossing,\ntitle={Crossing the Aisle: Unveiling Partisan and Counter-Partisan Events in News Reporting},\nauthor={Kaijian Zou and Xinliang Frederick Zhang and Winston Wu and Nicholas Beauchamp and Lu Wang},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=zrBrl2iQUr}\n}", "github": "", "project": "", "reviewers": "5vXQ;wiNC;ZkKA", "site": "https://openreview.net/forum?id=zrBrl2iQUr", "pdf_size": 0, "rating": "3;3;3", "confidence": "4;4;4", "excitement": "3;3;2", "reproducibility": "4;4;3", "correctness": "3;3;2", "rating_avg": 3.0, "confidence_avg": 4.0, "excitement_avg": 2.6666666666666665, "reproducibility_avg": 3.6666666666666665, "correctness_avg": 2.6666666666666665, "replies_avg": 10, "authors#_avg": 5, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": ";;;;", "linkedin": "kaijian-kai-zou-19991107/;frederick-x-zhang/?locale=en_US;;;", "aff_unique_index": "0;1;2;0", "aff_unique_norm": "University of Michigan;Bloomberg;Northeastern University", "aff_unique_dep": ";;", "aff_unique_url": "https://www.umich.edu;https://www.bloomberg.com;https://www.northeastern.edu", "aff_unique_abbr": "UM;Bloomberg;NEU", "aff_campus_unique_index": "0", "aff_campus_unique": "Ann Arbor;", "aff_country_unique_index": "0;0;0;0", "aff_country_unique": "United States" }, { "id": "zwqDROxClj", "title": "IAG: Induction-Augmented Generation Framework for Answering Reasoning Questions", "track": "main", "status": "Long Main", "tldr": "", "abstract": "Retrieval-Augmented Generation (RAG), by incorporating external knowledge with parametric memory of language models, has become the state-of-the-art architecture for open-domain QA tasks. However, common knowledge bases are inherently constrained by limited coverage and noisy information, making retrieval-based approaches inadequate to answer implicit reasoning questions. In this paper, we propose an Induction-Augmented Generation (IAG) framework that utilizes inductive knowledge along with the retrieved documents for implicit reasoning. We leverage large language models (LLMs) for deriving such knowledge via a novel prompting method based on inductive reasoning patterns. On top of this, we implement two versions of IAG named IAG-GPT and IAG-Student, respectively. IAG-GPT directly utilizes the knowledge generated by GPT-3 for answer prediction, while IAG-Student gets rid of dependencies on GPT service at inference time by incorporating a student inductor model. The inductor is firstly trained via knowledge distillation and further optimized by back-propagating the generator feedback via differentiable beam scores. Experimental results show that IAG outperforms RAG baselines as well as ChatGPT on two Open-Domain QA tasks. Notably, our best models have won the first place in the official leaderboards of CSQA2.0 (since Nov 1, 2022) and StrategyQA (since Jan 8, 2023).", "keywords": "Open-domain question answering;Inductive reasoning;Prompting", "primary_area": "", "supplementary_material": "", "author": "Zhebin Zhang;Xinyu Zhang;Yuanhang Ren;Saijiang Shi;Meng Han;Yongkang Wu;Ruofei Lai;Zhao Cao", "authorids": "~Zhebin_Zhang3;~Xinyu_Zhang6;~Yuanhang_Ren1;~Saijiang_Shi1;~Meng_Han5;~Yongkang_Wu1;~Ruofei_Lai1;~Zhao_Cao1", "gender": "M;M;M;;F;;M;M", "homepage": ";https://scholar.google.com/citations?hl=en&user=W_WZEQEAAAAJ;https://github.com/ryh95;https://www.zhihu.com/people/shi-xian-sen-76;;;;http://caozhao.hw", "dblp": ";https://dblp.uni-trier.de/pid/58/4582;223/2474;;;;301/9182;69/8078", "google_scholar": ";https://scholar.google.com/citations?hl=en;;;https://scholar.google.com/citations?hl=zh-CN;YYJIxacAAAAJ;;aJmTPaoAAAAJ", "or_profile": "~Zhebin_Zhang3;~Xinyu_Zhang6;~Yuanhang_Ren1;~Saijiang_Shi1;~Meng_Han5;~Yongkang_Wu1;~Ruofei_Lai1;~Zhao_Cao1", "aff": "Huawei Technologies Ltd.;Huawei Technologies Ltd.;;;;;;Huawei Technologies Ltd.", "aff_domain": "huawei.com;huawei.com;;;;;;huawei.com", "position": "Researcher;Principal Researcher;;;;;;Principal Researcher", "bibtex": "@inproceedings{\nzhang2023iag,\ntitle={{IAG}: Induction-Augmented Generation Framework for Answering Reasoning Questions},\nauthor={Zhebin Zhang and Xinyu Zhang and Yuanhang Ren and Saijiang Shi and Meng Han and Yongkang Wu and Ruofei Lai and Zhao Cao},\nbooktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},\nyear={2023},\nurl={https://openreview.net/forum?id=zwqDROxClj}\n}", "github": "", "project": "", "reviewers": "HrRJ;i5uE;bY1q;1Ny9", "site": "https://openreview.net/forum?id=zwqDROxClj", "pdf_size": 0, "rating": "4;4;4;4", "confidence": "4;3;5;4", "excitement": "4;4;4;3", "reproducibility": "4;3;3;4", "correctness": "4;3;4;3", "rating_avg": 4.0, "confidence_avg": 4.0, "excitement_avg": 3.75, "reproducibility_avg": 3.5, "correctness_avg": 3.5, "replies_avg": 12, "authors#_avg": 8, "corr_rating_confidence": 0.0, "corr_rating_correctness": 0.0, "orcid": "0000-0001-9270-5443;0000-0002-6829-4522;;;;;;0000-0002-4214-7858", "linkedin": ";;;;;;;", "aff_unique_index": "0;0;0", "aff_unique_norm": "Huawei", "aff_unique_dep": "Huawei Technologies", "aff_unique_url": "https://www.huawei.com", "aff_unique_abbr": "Huawei", "aff_campus_unique_index": "", "aff_campus_unique": "", "aff_country_unique_index": "0;0;0", "aff_country_unique": "China" } ]